test_pdfminer_psparser.py 3.17 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
#!/usr/bin/env python

# -*- coding: utf-8 -*-

from nose.tools import assert_equal, assert_true, assert_false
from nose import SkipTest
import nose

import logging

from pdfminer.psparser import *

##  Simplistic Test cases
##
class TestPSBaseParser:

    TESTDATA = br'''%!PS
begin end
 "  @ #
/a/BCD /Some_Name /foo#5f#xbaa
0 +1 -2 .5 1.234
(abc) () (abc ( def ) ghi)
(def\040\0\0404ghi) (bach\\slask) (foo\nbaa)
(this % is not a comment.)
(foo
baa)
(foo\
baa)
<> <20> < 40 4020 >
<abcd00
12345>
func/a/b{(c)do*}def
[ 1 (z) ! ]
<< /foo (bar) >>
'''

    TOKENS = [
      (5, KWD(b'begin')), (11, KWD(b'end')), (16, KWD(b'"')), (19, KWD(b'@')),
      (21, KWD(b'#')), (23, LIT('a')), (25, LIT('BCD')), (30, LIT('Some_Name')),
      (41, LIT('foo_xbaa')), (54, 0), (56, 1), (59, -2), (62, 0.5),
      (65, 1.234), (71, b'abc'), (77, b''), (80, b'abc ( def ) ghi'),
      (98, b'def \x00 4ghi'), (118, b'bach\\slask'), (132, b'foo\nbaa'),
      (143, b'this % is not a comment.'), (170, b'foo\nbaa'), (180, b'foobaa'),
      (191, b''), (194, b' '), (199, b'@@ '), (211, b'\xab\xcd\x00\x124\x05'),
      (226, KWD(b'func')), (230, LIT('a')), (232, LIT('b')),
      (234, KWD(b'{')), (235, b'c'), (238, KWD(b'do*')), (241, KWD(b'}')),
      (242, KWD(b'def')), (246, KWD(b'[')), (248, 1), (250, b'z'), (254, KWD(b'!')),
      (256, KWD(b']')), (258, KWD(b'<<')), (261, LIT('foo')), (266, b'bar'),
      (272, KWD(b'>>'))
    ]

    OBJS = [
      (23, LIT('a')), (25, LIT('BCD')), (30, LIT('Some_Name')),
      (41, LIT('foo_xbaa')), (54, 0), (56, 1), (59, -2), (62, 0.5),
      (65, 1.234), (71, b'abc'), (77, b''), (80, b'abc ( def ) ghi'),
      (98, b'def \x00 4ghi'), (118, b'bach\\slask'), (132, b'foo\nbaa'),
      (143, b'this % is not a comment.'), (170, b'foo\nbaa'), (180, b'foobaa'),
      (191, b''), (194, b' '), (199, b'@@ '), (211, b'\xab\xcd\x00\x124\x05'),
      (230, LIT('a')), (232, LIT('b')), (234, [b'c']), (246, [1, b'z']),
      (258, {'foo': b'bar'}),
    ]

    def get_tokens(self, s):
        from io import BytesIO

        class MyParser(PSBaseParser):
            def flush(self):
                self.add_results(*self.popall())
        parser = MyParser(BytesIO(s))
        r = []
        try:
            while True:
                r.append(parser.nexttoken())
        except PSEOF:
            pass
        return r

    def get_objects(self, s):
        from io import BytesIO

        class MyParser(PSStackParser):
            def flush(self):
                self.add_results(*self.popall())
        parser = MyParser(BytesIO(s))
        r = []
        try:
            while True:
                r.append(parser.nextobject())
        except PSEOF:
            pass
        return r

    def test_1(self):
        tokens = self.get_tokens(self.TESTDATA)
        logging.info(tokens)
        assert_equal(tokens, self.TOKENS)
        return

    def test_2(self):
        objs = self.get_objects(self.TESTDATA)
        logging.info(objs)
        assert_equal(objs, self.OBJS)
        return

if __name__ == '__main__':
    #import logging,sys,os,six
    #logging.basicConfig(level=logging.DEBUG, filename='%s_%d.%d.log'%(os.path.basename(__file__),sys.version_info[0],sys.version_info[1]))
    nose.runmodule()