Commit e61b1fef authored by SVN-Git Migration's avatar SVN-Git Migration

Imported Upstream version 20110227+dfsg

parent d2dc206f
......@@ -47,6 +47,6 @@ $(CMAPDST)/to-unicode-Adobe-Korea1.pickle.gz:
$(CONV_CMAP) $(CMAPDST) Adobe-Korea1 $(CMAPSRC)/cid2code_Adobe_Korea1.txt cp949 euc-kr
test: cmap
cd samples && $(MAKE) test CMP=cmp
cd samples && $(MAKE) test
test_clean:
-cd samples && $(MAKE) clean
Metadata-Version: 1.0
Name: pdfminer
Version: 20101226
Version: 20110227
Summary: PDF parser and analyzer
Home-page: http://www.unixuser.org/~euske/python/pdfminer/index.html
Author: Yusuke Shinyama
......
......@@ -9,7 +9,7 @@
<div align=right class=lastmod>
<!-- hhmts start -->
Last Modified: Sun Dec 26 08:31:09 UTC 2010
Last Modified: Sun Feb 27 10:51:18 UTC 2011
<!-- hhmts end -->
</div>
......@@ -32,7 +32,7 @@ Python PDF parser and analyzer
<ul>
<li> <a href="#cmap">CJK languages support</a>
</ul>
<li> <a href="#documentation">Documentation</a>
<li> <a href="#tools">Command Line Tools</a>
<ul>
<li> <a href="#pdf2txt">pdf2txt.py</a>
<li> <a href="#dumppdf">dumppdf.py</a>
......@@ -159,9 +159,9 @@ paste the following commands on a command line prompt:
<strong>python setup.py install</strong>
</pre></blockquote>
<h2><a name="documentation">Documentation</a></h2>
<h2><a name="tools">Command Line Tools</a></h2>
<p>
PDFMiner comes with two command line tools:
PDFMiner comes with two handy tools:
<code>pdf2txt.py</code> and <code>dumppdf.py</code>.
<h3><a name="pdf2txt">pdf2txt.py</a></h3>
......@@ -184,7 +184,7 @@ Not all characters in a PDF can be safely converted to Unicode.
$ <strong>pdf2txt.py -o output.html samples/naacl06-shinyama.pdf</strong>
(extract text as an HTML file whose filename is output.html)
$ <strong>pdf2txt.py -c euc-jp -o output.html samples/jo.pdf</strong>
$ <strong>pdf2txt.py -V -c euc-jp -o output.html samples/jo.pdf</strong>
(extract a Japanese HTML file in vertical writing, CMap is required)
$ <strong>pdf2txt.py -P mypassword -o output.txt secret.pdf</strong>
......@@ -270,6 +270,9 @@ are M = 1.0, L = 0.3, and W = 0.2, respectively.
<dd> Forces to perform layout analysis for all the text strings,
including texts contained in figures.
<p>
<dt> <code>-V</code>
<dd> Allows vertical writing detection.
<p>
<dt> <code>-Y <em>layout_mode</em></code>
<dd> Specifies how the page layout should be preserved. (Currently only applies to HTML format.)
<ul>
......@@ -354,6 +357,7 @@ no stream header is displayed for the ease of saving it to a file.
<h2><a name="changes">Changes</a></h2>
<ul>
<li> 2010/02/27: Bugfixes and layout analysis improvements. Thanks to fujimoto.report.
<li> 2010/12/26: A couple of bugfixes and minor improvements. Thanks to Kevin Brubeck Unhammer and Daniel Gerber.
<li> 2010/10/17: A couple of bugfixes and minor improvements. Thanks to standardabweichung and Alastair Irving.
<li> 2010/09/07: A minor bugfix. Thanks to Alexander Garden.
......
#!/usr/bin/env python2
__version__ = '20101226'
__version__ = '20110227'
if __name__ == '__main__': print __version__
......@@ -240,7 +240,8 @@ class CMapDB(object):
filename = '%s.pickle.gz' % name
if klass.debug:
print >>sys.stderr, 'loading:', name
for directory in os.path.dirname(cmap.__file__), '/usr/share/pdfminer/':
default_path = os.environ.get('CMAP_PATH', '/usr/share/pdfminer/')
for directory in (os.path.dirname(cmap.__file__), default_path):
path = os.path.join(directory, filename)
if os.path.exists(path):
gzfile = gzip.open(path)
......
This diff is collapsed.
#!/usr/bin/env python2
import sys
from utils import apply_matrix_pt, get_bound, INF
from utils import bsearch, bbox2str, matrix2str, Plane
from pdffont import PDFUnicodeNotDefined
def uniq(objs):
done = set()
for obj in objs:
if obj in done: continue
done.add(obj)
yield obj
return
def csort(objs, key):
idxs = dict( (obj,i) for (i,obj) in enumerate(objs) )
return sorted(objs, key=lambda obj:(key(obj), idxs[obj]))
from utils import bbox2str, matrix2str, uniq, csort, Plane
## LAParams
......@@ -23,19 +9,19 @@ def csort(objs, key):
class LAParams(object):
def __init__(self,
writing_mode='lr-tb',
line_overlap=0.5,
char_margin=2.0,
line_margin=0.5,
word_margin=0.1,
boxes_flow=0,
boxes_flow=0.5,
detect_vertical=False,
all_texts=False):
self.writing_mode = writing_mode
self.line_overlap = line_overlap
self.char_margin = char_margin
self.line_margin = line_margin
self.word_margin = word_margin
self.boxes_flow = boxes_flow
self.detect_vertical = detect_vertical
self.all_texts = all_texts
return
......@@ -402,7 +388,7 @@ class LTTextGroupLRTB(LTTextGroup):
def analyze(self, laparams):
# reorder the objects from top-left to bottom-right.
self._objs = csort(self._objs, key=lambda obj:
(1-laparams.boxes_flow)*(obj.x0+obj.x1) -
(1-laparams.boxes_flow)*(obj.x0) -
(1+laparams.boxes_flow)*(obj.y0+obj.y1))
return LTTextGroup.analyze(self, laparams)
......@@ -412,7 +398,7 @@ class LTTextGroupTBRL(LTTextGroup):
# reorder the objects from top-right to bottom-left.
self._objs = csort(self._objs, key=lambda obj:
-(1+laparams.boxes_flow)*(obj.x0+obj.x1)
-(1-laparams.boxes_flow)*(obj.y0+obj.y1))
-(1-laparams.boxes_flow)*(obj.y1))
return LTTextGroup.analyze(self, laparams)
......@@ -480,7 +466,8 @@ class LTLayoutContainer(LTContainer):
# |<--->|
# (char_margin)
k |= 1
if (obj0.is_compatible(obj1) and obj0.is_hoverlap(obj1) and
if (laparams.detect_vertical and
obj0.is_compatible(obj1) and obj0.is_hoverlap(obj1) and
min(obj0.width, obj1.width) * laparams.line_overlap < obj0.hoverlap(obj1) and
obj0.vdistance(obj1) < max(obj0.height, obj1.height) * laparams.char_margin):
# obj0 and obj1 is vertically aligned:
......@@ -527,9 +514,6 @@ class LTLayoutContainer(LTContainer):
def get_textboxes(self, laparams, lines):
plane = Plane(lines)
for line in lines:
plane.add(line)
plane.finish()
boxes = {}
for line in lines:
neighbors = line.find_neighbors(plane, laparams.line_margin)
......@@ -555,35 +539,42 @@ class LTLayoutContainer(LTContainer):
return
def group_textboxes(self, laparams, boxes):
def dist(obj1, obj2):
def dist((x0,y0,x1,y1), obj1, obj2):
"""A distance function between two TextBoxes.
Consider the bounding rectangle for obj1 and obj2.
Return its area less the areas of obj1 and obj2,
shown as 'www' below. This value may be negative.
+------+..........+
| obj1 |wwwwwwwwww:
+------+www+------+
:wwwwwwwwww| obj2 |
+..........+------+
+------+..........+ (x1,y1)
| obj1 |wwwwwwwwww:
+------+www+------+
:wwwwwwwwww| obj2 |
(x0,y0) +..........+------+
"""
return ((max(obj1.x1,obj2.x1) - min(obj1.x0,obj2.x0)) *
(max(obj1.y1,obj2.y1) - min(obj1.y0,obj2.y0)) -
(obj1.width*obj1.height + obj2.width*obj2.height))
return ((x1-x0)*(y1-y0) - obj1.width*obj1.height - obj2.width*obj2.height)
boxes = boxes[:]
# XXX this is slow when there're many textboxes.
# XXX this is very slow when there're many textboxes.
while 2 <= len(boxes):
mindist = INF
mindist = (INF,0)
minpair = None
plane = Plane(boxes)
boxes = csort(boxes, key=lambda obj: obj.width*obj.height)
for i in xrange(len(boxes)):
for j in xrange(i+1, len(boxes)):
(obj1, obj2) = (boxes[i], boxes[j])
d = dist(obj1, obj2)
if d < mindist:
mindist = d
minpair = (obj1, obj2)
assert minpair
b = (min(obj1.x0,obj2.x0), min(obj1.y0,obj2.y0),
max(obj1.x1,obj2.x1), max(obj1.y1,obj2.y1))
others = set(plane.find(b)).difference((obj1,obj2))
d = dist(b, obj1, obj2)
# disregard if there's any other object in between.
if 0 < d and others:
d = (1,d)
else:
d = (0,d)
if mindist <= d: continue
mindist = d
minpair = (obj1, obj2)
assert minpair is not None, boxes
(obj1, obj2) = minpair
boxes.remove(obj1)
boxes.remove(obj2)
......
......@@ -119,12 +119,13 @@ class PDFTextDevice(PDFDevice):
##
class TagExtractor(PDFDevice):
def __init__(self, rsrcmgr, outfp, codec='utf-8'):
def __init__(self, rsrcmgr, outfp, codec='utf-8', debug=0):
PDFDevice.__init__(self, rsrcmgr)
self.outfp = outfp
self.codec = codec
self.debug = debug
self.pageno = 0
self.stack = []
self._stack = []
return
def render_string(self, textstate, seq):
......@@ -158,16 +159,16 @@ class TagExtractor(PDFDevice):
s = ''.join( ' %s="%s"' % (enc(k), enc(str(v))) for (k,v)
in sorted(props.iteritems()) )
self.outfp.write('<%s%s>' % (enc(tag.name), s))
self.stack.append(tag)
self._stack.append(tag)
return
def end_tag(self):
assert self.stack
tag = self.stack.pop(-1)
assert self._stack
tag = self._stack.pop(-1)
self.outfp.write('</%s>' % enc(tag.name))
return
def do_tag(self, tag, props=None):
self.begin_tag(tag, props)
self.stack.pop(-1)
self._stack.pop(-1)
return
......@@ -266,10 +266,11 @@ class CFFFont(object):
def __iter__(self):
return iter( self[i] for i in xrange(len(self)) )
def __init__(self, fp0):
self.fp = fp0
def __init__(self, name, fp):
self.name = name
self.fp = fp
# Header
(_major,_minor,hdrsize,self.offsize) = unpack('BBBB', self.fp.read(4))
(_major,_minor,hdrsize,offsize) = unpack('BBBB', self.fp.read(4))
self.fp.read(hdrsize-4)
# Name INDEX
self.name_index = self.INDEX(self.fp)
......@@ -281,9 +282,9 @@ class CFFFont(object):
self.subr_index = self.INDEX(self.fp)
# Top DICT DATA
self.top_dict = getdict(self.dict_index[0])
(charset_pos,) = self.top_dict.get(15, 0)
(encoding_pos,) = self.top_dict.get(16, 0)
(charstring_pos,) = self.top_dict.get(17, 0)
(charset_pos,) = self.top_dict.get(15, [0])
(encoding_pos,) = self.top_dict.get(16, [0])
(charstring_pos,) = self.top_dict.get(17, [0])
# CharStrings
self.fp.seek(charstring_pos)
self.charstring = self.INDEX(self.fp)
......@@ -299,9 +300,18 @@ class CFFFont(object):
for (code,gid) in enumerate(unpack('B'*n, self.fp.read(n))):
self.code2gid[code] = gid
self.gid2code[gid] = code
else:
elif format == '\x01':
# Format 1
assert 0
(n,) = unpack('B', self.fp.read(1))
code = 0
for i in xrange(n):
(first,nleft) = unpack('BB', self.fp.read(2))
for gid in xrange(first,first+nleft+1):
self.code2gid[code] = gid
self.gid2code[gid] = code
code += 1
else:
raise ValueError('unsupported encoding format: %r' % format)
# Charsets
self.name2gid = {}
self.gid2name = {}
......@@ -315,9 +325,22 @@ class CFFFont(object):
name = self.getstr(sid)
self.name2gid[name] = gid
self.gid2name[gid] = name
else:
elif format == '\x01':
# Format 1
(n,) = unpack('B', self.fp.read(1))
sid = 0
for i in xrange(n):
(first,nleft) = unpack('BB', self.fp.read(2))
for gid in xrange(first,first+nleft+1):
name = self.getstr(sid)
self.name2gid[name] = gid
self.gid2name[gid] = name
sid += 1
elif format == '\x02':
# Format 2
assert 0
else:
raise ValueError('unsupported charset format: %r' % format)
#print self.code2gid
#print self.name2gid
#assert 0
......@@ -339,7 +362,7 @@ class TrueTypeFont(object):
self.name = name
self.fp = fp
self.tables = {}
fonttype = fp.read(4)
self.fonttype = fp.read(4)
(ntables, _1, _2, _3) = unpack('>HHHH', fp.read(8))
for _ in xrange(ntables):
(name, tsum, offset, length) = unpack('>4sLLL', fp.read(16))
......@@ -670,7 +693,9 @@ class PDFCIDFont(PDFFont):
def main(argv):
for fname in argv[1:]:
fp = file(fname, 'rb')
font = TrueTypeFont(fname, fp)
#font = TrueTypeFont(fname, fp)
font = CFFFont(fname, fp)
print font
fp.close()
return
......
......@@ -124,11 +124,12 @@ class PDFGraphicState(object):
##
class PDFResourceManager(object):
'''
"""Repository of shared resources.
ResourceManager facilitates reuse of shared resources
such as fonts and images so that large objects are not
allocated multiple times.
'''
"""
debug = 0
def __init__(self):
......
......@@ -51,20 +51,21 @@ class PDFObjRef(PDFObject):
# resolve
def resolve1(x):
'''
Resolve an object. If this is an array or dictionary,
it may still contains some indirect objects inside.
'''
"""Resolves an object.
If this is an array or dictionary, it may still contains
some indirect objects inside.
"""
while isinstance(x, PDFObjRef):
x = x.resolve()
return x
def resolve_all(x):
'''
Recursively resolve X and all the internals.
"""Recursively resolves the given object and all the internals.
Make sure there is no indirect reference within the nested object.
This procedure might be slow.
'''
"""
while isinstance(x, PDFObjRef):
x = x.resolve()
if isinstance(x, list):
......@@ -75,9 +76,8 @@ def resolve_all(x):
return x
def decipher_all(decipher, objid, genno, x):
'''
Recursively decipher X.
'''
"""Recursively deciphers the given object.
"""
if isinstance(x, str):
return decipher(objid, genno, x)
if isinstance(x, list):
......
......@@ -140,9 +140,8 @@ OCT_STRING = re.compile(r'[0-7]')
ESC_STRING = { 'b':8, 't':9, 'n':10, 'f':12, 'r':13, '(':40, ')':41, '\\':92 }
class PSBaseParser(object):
'''
Most basic PostScript parser that performs only tokenization.
'''
"""Most basic PostScript parser that performs only tokenization.
"""
BUFSIZ = 4096
debug = 0
......@@ -175,9 +174,8 @@ class PSBaseParser(object):
return
def seek(self, pos):
'''
Seeks the parser to the given position.
'''
"""Seeks the parser to the given position.
"""
if 2 <= self.debug:
print >>stderr, 'seek: %r' % pos
self.fp.seek(pos)
......@@ -203,9 +201,8 @@ class PSBaseParser(object):
return
def nextline(self):
'''
Fetches a next line that ends either with \\r or \\n.
'''
"""Fetches a next line that ends either with \\r or \\n.
"""
linebuf = ''
linepos = self.bufpos + self.charpos
eol = False
......@@ -234,10 +231,10 @@ class PSBaseParser(object):
return (linepos, linebuf)
def revreadlines(self):
'''
Fetches a next line backword. This is used to locate
the trailers at the end of a file.
'''
"""Fetches a next line backword.
This is used to locate the trailers at the end of a file.
"""
self.fp.seek(0, 2)
pos = self.fp.tell()
buf = ''
......@@ -534,11 +531,11 @@ class PSStackParser(PSBaseParser):
return
def nextobject(self):
'''
Yields a list of objects: keywords, literals, strings,
numbers, arrays and dictionaries. Arrays and dictionaries
are represented as Python sequence and dictionaries.
'''
"""Yields a list of objects.
Returns keywords, literals, strings, numbers, arrays and dictionaries.
Arrays and dictionaries are represented as Python lists and dictionaries.
"""
while not self.results:
(pos, token) = self.nexttoken()
#print (pos,token), (self.curtype, self.curstack)
......
......@@ -11,30 +11,52 @@ from struct import pack, unpack
MATRIX_IDENTITY = (1, 0, 0, 1, 0, 0)
def mult_matrix((a1,b1,c1,d1,e1,f1), (a0,b0,c0,d0,e0,f0)):
'''Returns the multiplication of two matrices.'''
"""Returns the multiplication of two matrices."""
return (a0*a1+c0*b1, b0*a1+d0*b1,
a0*c1+c0*d1, b0*c1+d0*d1,
a0*e1+c0*f1+e0, b0*e1+d0*f1+f0)
def translate_matrix((a,b,c,d,e,f), (x,y)):
'''Translates a matrix by (x,y).'''
"""Translates a matrix by (x,y)."""
return (a,b,c,d,x*a+y*c+e,x*b+y*d+f)
def apply_matrix_pt((a,b,c,d,e,f), (x,y)):
'''Applies a matrix to a point.'''
"""Applies a matrix to a point."""
return (a*x+c*y+e, b*x+d*y+f)
def apply_matrix_norm((a,b,c,d,e,f), (p,q)):
'''Equivalent to apply_matrix_pt(M, (p,q)) - apply_matrix_pt(M, (0,0))'''
"""Equivalent to apply_matrix_pt(M, (p,q)) - apply_matrix_pt(M, (0,0))"""
return (a*p+c*q, b*p+d*q)
## Utility functions
##
# uniq
def uniq(objs):
"""Eliminates duplicated elements."""
done = set()
for obj in objs:
if obj in done: continue
done.add(obj)
yield obj
return
# csort
def csort(objs, key):
"""Order-preserving sorting function."""
idxs = dict( (obj,i) for (i,obj) in enumerate(objs) )
return sorted(objs, key=lambda obj:(key(obj), idxs[obj]))
# drange
def drange(v0, v1, d):
"""Returns a discrete range."""
assert v0 < v1
return xrange(int(v0)/d, int(v1+d-1)/d)
# get_bound
def get_bound(pts):
'''Compute a minimal rectangle that covers all the points.'''
"""Compute a minimal rectangle that covers all the points."""
(x0, y0, x1, y1) = (INF, INF, -INF, -INF)
for (x,y) in pts:
x0 = min(x0, x)
......@@ -45,7 +67,7 @@ def get_bound(pts):
# pick
def pick(seq, func, maxobj=None):
'''Picks the object obj where func(obj) has the highest value.'''
"""Picks the object obj where func(obj) has the highest value."""
maxscore = None
for obj in seq:
score = func(obj)
......@@ -53,31 +75,9 @@ def pick(seq, func, maxobj=None):
(maxscore,maxobj) = (score,obj)
return maxobj
# bsearch
def bsearch(objs, v0):
'''Tries to find the closest value to v0.'''
nb_objs = len(objs)
i0 = 0
i1 = nb_objs
while i0 < i1:
i = (i0+i1)/2
(v, obj) = objs[i]
if v0 == v:
(i0,i1) = (i,i+1)
while 0 < i0 and objs[i0-1][0] == v0:
i0 -= 1
while i1 < nb_objs-1 and objs[i1][0] == v0:
i1 += 1
break
elif v0 < v:
i1 = i
else:
i0 = i+1
return (i0,i1)
# choplist
def choplist(n, seq):
'''Groups every n elements of the list.'''
"""Groups every n elements of the list."""
r = []
for x in seq:
r.append(x)
......@@ -88,7 +88,7 @@ def choplist(n, seq):
# nunpack
def nunpack(s, default=0):
'''Unpacks 1 to 4 byte integers (big endian).'''
"""Unpacks 1 to 4 byte integers (big endian)."""
l = len(s)
if not l:
return default
......@@ -139,7 +139,7 @@ PDFDocEncoding = ''.join( unichr(x) for x in (
0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x00ff,
))
def decode_text(s):
'''Decodes a PDFDocEncoding string to Unicode.'''
"""Decodes a PDFDocEncoding string to Unicode."""
if s.startswith('\xfe\xff'):
return unicode(s[2:], 'utf-16be', 'ignore')
else:
......@@ -147,7 +147,7 @@ def decode_text(s):
# enc
def enc(x, codec='ascii'):
'''Encodes a string for SGML/XML/HTML'''
"""Encodes a string for SGML/XML/HTML"""
x = x.replace('&','&amp;').replace('>','&gt;').replace('<','&lt;').replace('"','&quot;')
return x.encode(codec, 'xmlcharrefreplace')
......@@ -191,43 +191,46 @@ class ObjIdRange(object):
##
class Plane(object):
def __init__(self, objs):
self._idxs = {}
self._xobjs = []
self._yobjs = []
def __init__(self, objs=None, gridsize=50):
self._objs = {}
self.gridsize = gridsize
if objs is not None:
for obj in objs:
self.add(obj)
return
def __repr__(self):
return ('<Plane objs=%r>' % list(self))
def __iter__(self):
return self._idxs.iterkeys()
def _getrange(self, (x0,y0,x1,y1)):
for y in drange(y0, y1, self.gridsize):
for x in drange(x0, x1, self.gridsize):
yield (x,y)
return
# add(obj): place an object in a certain area.
def add(self, obj):
self._idxs[obj] = len(self._idxs)
self._xobjs.append((obj.x0, obj))
self._xobjs.append((obj.x1, obj))
self._yobjs.append((obj.y0, obj))
self._yobjs.append((obj.y1, obj))
return
# finish()
def finish(self):
self._xobjs.sort()
self._yobjs.sort()
for k in self._getrange((obj.x0, obj.y0, obj.x1, obj.y1)):
if k not in self._objs:
r = []
self._objs[k] = r
else:
r = self._objs[k]
r.append(obj)
return
# find(): finds objects that are in a certain area.
def find(self, (x0,y0,x1,y1)):
i0 = bsearch(self._xobjs, x0)[0]
i1 = bsearch(self._xobjs, x1)[1]
xobjs = set( obj for (_,obj) in self._xobjs[i0:i1] )
i0 = bsearch(self._yobjs, y0)[0]
i1 = bsearch(self._yobjs, y1)[1]
yobjs = set( obj for (_,obj) in self._yobjs[i0:i1] )
xobjs.intersection_update(yobjs)
return sorted(xobjs, key=lambda obj: self._idxs[obj])
r = set()
for k in self._getrange((x0,y0,x1,y1)):
if k not in self._objs: continue
for obj in self._objs[k]:
if obj in r: continue
r.add(obj)
if (obj.x1 <= x0 or x1 <= obj.x0 or
obj.y1 <= y0 or y1 <= obj.y0): continue
yield obj
return
# create_bmp
......@@ -235,5 +238,6 @@ def create_bmp(data, bits, width, height):
info = pack('<IiiHHIIIIII', 40, width, height, 1, bits, 0, len(data), 0, 0, 0, 0)
assert len(info) == 40, len(info)
header = pack('<ccIHHI', 'B', 'M', 14+40+len(data), 0, 0, 14+40)
assert len(header) == 14, len(header)
# XXX re-rasterize every line
return header+info+data
# GNUMakefile for test
RM=rm -f
#CMP=cmp
CMP=:
PYTHON=python2
PDF2TXT=PYTHONPATH=.. $(PYTHON) ../tools/pdf2txt.py -p1
PDF2TXT=PYTHONPATH=.. $(PYTHON) ../tools/pdf2txt.py -p1 -V
HTMLS=$(HTMLS_FREE) $(HTMLS_NONFREE)
HTMLS_FREE= \
......@@ -49,9 +48,10 @@ XMLS_NONFREE= \
nonfree/naacl06-shinyama.xml \
nonfree/nlp2004slides.xml
all: test
all: htmls texts xmls
test: htmls texts xmls
test:
$(MAKE) all CMP=cmp
clean:
-$(RM) $(HTMLS)
......
......@@ -1169,20 +1169,20 @@
</text>
</textline>
</textbox>
</page>
<layout>
<textgroup bbox="74.836,46.684,727.149,556.917">
<textgroup bbox="168.846,277.813,727.149,556.917">
<textgroup bbox="715.703,277.813,727.149,547.331">
<textbox id="0" bbox="715.703,518.573,727.149,547.331" />
<textbox id="1" bbox="715.703,277.813,727.149,287.399" />
</textgroup>
<textgroup bbox="74.836,46.684,671.450,556.917">
<textbox id="2" bbox="168.846,346.025,671.450,556.917" />
</textgroup>