layout.py 22 KB
Newer Older
1
from sortedcontainers import SortedListWithKey
2

3 4 5 6 7 8 9 10 11 12
from .utils import INF
from .utils import Plane
from .utils import get_bound
from .utils import uniq
from .utils import fsplit
from .utils import bbox2str
from .utils import matrix2str
from .utils import apply_matrix_pt

import six # Python 2+3 compatibility
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29

##  IndexAssigner
##
class IndexAssigner(object):

    def __init__(self, index=0):
        self.index = index
        return

    def run(self, obj):
        if isinstance(obj, LTTextBox):
            obj.index = self.index
            self.index += 1
        elif isinstance(obj, LTTextGroup):
            for x in obj:
                self.run(x)
        return
30 31 32 33 34 35 36 37


##  LAParams
##
class LAParams(object):

    def __init__(self,
                 line_overlap=0.5,
38
                 char_margin=2.0,
39 40
                 line_margin=0.5,
                 word_margin=0.1,
41 42
                 boxes_flow=0.5,
                 detect_vertical=False,
43 44 45 46 47
                 all_texts=False):
        self.line_overlap = line_overlap
        self.char_margin = char_margin
        self.line_margin = line_margin
        self.word_margin = word_margin
48
        self.boxes_flow = boxes_flow
49
        self.detect_vertical = detect_vertical
50 51 52 53
        self.all_texts = all_texts
        return

    def __repr__(self):
54 55
        return ('<LAParams: char_margin=%.1f, line_margin=%.1f, word_margin=%.1f all_texts=%r>' %
                (self.char_margin, self.line_margin, self.word_margin, self.all_texts))
56 57 58 59 60 61


##  LTItem
##
class LTItem(object):

62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
    def analyze(self, laparams):
        """Perform the layout analysis."""
        return


##  LTText
##
class LTText(object):

    def __repr__(self):
        return ('<%s %r>' %
                (self.__class__.__name__, self.get_text()))

    def get_text(self):
        raise NotImplementedError


##  LTComponent
##
class LTComponent(LTItem):

83
    def __init__(self, bbox):
84
        LTItem.__init__(self)
85 86 87 88
        self.set_bbox(bbox)
        return

    def __repr__(self):
89 90
        return ('<%s %s>' %
                (self.__class__.__name__, bbox2str(self.bbox)))
91

92 93 94 95 96 97 98 99 100 101 102 103
    # Disable comparison.
    def __lt__(self, _):
        raise ValueError
    def __le__(self, _):
        raise ValueError
    def __gt__(self, _):
        raise ValueError
    def __ge__(self, _):
        raise ValueError

    def set_bbox(self, bbox):
        (x0, y0, x1, y1) = bbox
104 105 106 107 108 109
        self.x0 = x0
        self.y0 = y0
        self.x1 = x1
        self.y1 = y1
        self.width = x1-x0
        self.height = y1-y0
110
        self.bbox = bbox
111
        return
112 113 114

    def is_empty(self):
        return self.width <= 0 or self.height <= 0
115

116
    def is_hoverlap(self, obj):
117
        assert isinstance(obj, LTComponent), str(type(obj))
118 119 120
        return obj.x0 <= self.x1 and self.x0 <= obj.x1

    def hdistance(self, obj):
121
        assert isinstance(obj, LTComponent), str(type(obj))
122 123 124 125 126 127
        if self.is_hoverlap(obj):
            return 0
        else:
            return min(abs(self.x0-obj.x1), abs(self.x1-obj.x0))

    def hoverlap(self, obj):
128
        assert isinstance(obj, LTComponent), str(type(obj))
129 130 131 132 133 134
        if self.is_hoverlap(obj):
            return min(abs(self.x0-obj.x1), abs(self.x1-obj.x0))
        else:
            return 0

    def is_voverlap(self, obj):
135
        assert isinstance(obj, LTComponent), str(type(obj))
136 137 138
        return obj.y0 <= self.y1 and self.y0 <= obj.y1

    def vdistance(self, obj):
139
        assert isinstance(obj, LTComponent), str(type(obj))
140 141 142 143 144 145
        if self.is_voverlap(obj):
            return 0
        else:
            return min(abs(self.y0-obj.y1), abs(self.y1-obj.y0))

    def voverlap(self, obj):
146
        assert isinstance(obj, LTComponent), str(type(obj))
147 148 149 150 151 152
        if self.is_voverlap(obj):
            return min(abs(self.y0-obj.y1), abs(self.y1-obj.y0))
        else:
            return 0


153
##  LTCurve
154
##
155
class LTCurve(LTComponent):
156

157
    def __init__(self, linewidth, pts, stroke = False, fill = False, evenodd = False, stroking_color = None, non_stroking_color = None):
158
        LTComponent.__init__(self, get_bound(pts))
159 160
        self.pts = pts
        self.linewidth = linewidth
161 162 163 164 165
        self.stroke = stroke
        self.fill = fill
        self.evenodd = evenodd
        self.stroking_color = stroking_color
        self.non_stroking_color = non_stroking_color
166 167 168
        return

    def get_pts(self):
169
        return ','.join('%.3f,%.3f' % p for p in self.pts)
170 171 172 173


##  LTLine
##
174
class LTLine(LTCurve):
175

176 177
    def __init__(self, linewidth, p0, p1, stroke = False, fill = False, evenodd = False, stroking_color = None, non_stroking_color = None):
        LTCurve.__init__(self, linewidth, [p0, p1], stroke, fill, evenodd, stroking_color, non_stroking_color)
178 179 180 181 182
        return


##  LTRect
##
183
class LTRect(LTCurve):
184

185 186 187
    def __init__(self, linewidth, bbox, stroke = False, fill = False, evenodd = False, stroking_color = None, non_stroking_color = None):
        (x0, y0, x1, y1) = bbox
        LTCurve.__init__(self, linewidth, [(x0, y0), (x1, y0), (x1, y1), (x0, y1)], stroke, fill, evenodd, stroking_color, non_stroking_color)
188 189 190 191 192
        return


##  LTImage
##
193
class LTImage(LTComponent):
194 195

    def __init__(self, name, stream, bbox):
196
        LTComponent.__init__(self, bbox)
197 198 199 200 201 202 203 204 205 206 207 208
        self.name = name
        self.stream = stream
        self.srcsize = (stream.get_any(('W', 'Width')),
                        stream.get_any(('H', 'Height')))
        self.imagemask = stream.get_any(('IM', 'ImageMask'))
        self.bits = stream.get_any(('BPC', 'BitsPerComponent'), 1)
        self.colorspace = stream.get_any(('CS', 'ColorSpace'))
        if not isinstance(self.colorspace, list):
            self.colorspace = [self.colorspace]
        return

    def __repr__(self):
209
        return ('<%s(%s) %s %r>' %
210
                (self.__class__.__name__, self.name,
211
                 bbox2str(self.bbox), self.srcsize))
212 213


214
##  LTAnno
215
##
216
class LTAnno(LTItem, LTText):
217 218

    def __init__(self, text):
219
        self._text = text
220 221
        return

222 223
    def get_text(self):
        return self._text
224 225 226 227


##  LTChar
##
228
class LTChar(LTComponent, LTText):
229

230
    def __init__(self, matrix, font, fontsize, scaling, rise,
231
                 text, textwidth, textdisp, ncs, graphicstate):
232 233
        LTText.__init__(self)
        self._text = text
234 235
        self.matrix = matrix
        self.fontname = font.fontname
236 237
        self.ncs = ncs
        self.graphicstate = graphicstate
238
        self.adv = textwidth * fontsize * scaling
239
        # compute the boundary rectangle.
240
        if font.is_vertical():
241
            # vertical
242
            width = font.get_width() * fontsize
243
            (vx, vy) = textdisp
244
            if vx is None:
245
                vx = width * 0.5
246 247 248 249 250 251 252
            else:
                vx = vx * fontsize * .001
            vy = (1000 - vy) * fontsize * .001
            tx = -vx
            ty = vy + rise
            bll = (tx, ty+self.adv)
            bur = (tx+width, ty)
253 254
        else:
            # horizontal
255
            height = font.get_height() * fontsize
256
            descent = font.get_descent() * fontsize
257 258 259
            ty = descent + rise
            bll = (0, ty)
            bur = (self.adv, ty+height)
260
        (a, b, c, d, e, f) = self.matrix
261
        self.upright = (0 < a*d*scaling and b*c <= 0)
262 263
        (x0, y0) = apply_matrix_pt(self.matrix, bll)
        (x1, y1) = apply_matrix_pt(self.matrix, bur)
264
        if x1 < x0:
265
            (x0, x1) = (x1, x0)
266
        if y1 < y0:
267 268
            (y0, y1) = (y1, y0)
        LTComponent.__init__(self, (x0, y0, x1, y1))
269 270 271 272
        if font.is_vertical():
            self.size = self.width
        else:
            self.size = self.height
273 274 275
        return

    def __repr__(self):
276
        return ('<%s %s matrix=%s font=%r adv=%s text=%r>' %
277
                (self.__class__.__name__, bbox2str(self.bbox),
278 279 280 281 282
                 matrix2str(self.matrix), self.fontname, self.adv,
                 self.get_text()))

    def get_text(self):
        return self._text
283

284
    def is_compatible(self, obj):
285
        """Returns True if two characters can coexist in the same line."""
286
        return True
287

288

289 290
##  LTContainer
##
291
class LTContainer(LTComponent):
292

293
    def __init__(self, bbox):
294
        LTComponent.__init__(self, bbox)
295
        self._objs = []
296 297 298
        return

    def __iter__(self):
299
        return iter(self._objs)
300 301

    def __len__(self):
302
        return len(self._objs)
303 304

    def add(self, obj):
305
        self._objs.append(obj)
306 307
        return

308 309 310
    def extend(self, objs):
        for obj in objs:
            self.add(obj)
311 312
        return

313 314 315 316
    def analyze(self, laparams):
        for obj in self._objs:
            obj.analyze(laparams)
        return
317

318 319 320 321 322 323

##  LTExpandableContainer
##
class LTExpandableContainer(LTContainer):

    def __init__(self):
324
        LTContainer.__init__(self, (+INF, +INF, -INF, -INF))
325 326
        return

327 328 329 330 331 332 333
    def add(self, obj):
        LTContainer.add(self, obj)
        self.set_bbox((min(self.x0, obj.x0), min(self.y0, obj.y0),
                       max(self.x1, obj.x1), max(self.y1, obj.y1)))
        return


334 335 336 337 338 339 340 341 342 343
##  LTTextContainer
##
class LTTextContainer(LTExpandableContainer, LTText):

    def __init__(self):
        LTText.__init__(self)
        LTExpandableContainer.__init__(self)
        return

    def get_text(self):
344 345
        return ''.join(obj.get_text() for obj in self if isinstance(obj, LTText))

346 347 348

##  LTTextLine
##
349
class LTTextLine(LTTextContainer):
350

351
    def __init__(self, word_margin):
352
        LTTextContainer.__init__(self)
353
        self.word_margin = word_margin
354 355 356
        return

    def __repr__(self):
357
        return ('<%s %s %r>' %
358 359
                (self.__class__.__name__, bbox2str(self.bbox),
                 self.get_text()))
360

361
    def analyze(self, laparams):
362
        LTTextContainer.analyze(self, laparams)
363
        LTContainer.add(self, LTAnno('\n'))
364
        return
365 366 367 368

    def find_neighbors(self, plane, ratio):
        raise NotImplementedError

369

370 371
class LTTextLineHorizontal(LTTextLine):

372 373 374 375 376 377 378
    def __init__(self, word_margin):
        LTTextLine.__init__(self, word_margin)
        self._x1 = +INF
        return

    def add(self, obj):
        if isinstance(obj, LTChar) and self.word_margin:
379
            margin = self.word_margin * max(obj.width, obj.height)
380
            if self._x1 < obj.x0-margin:
381
                LTContainer.add(self, LTAnno(' '))
382 383
        self._x1 = obj.x1
        LTTextLine.add(self, obj)
384 385 386
        return

    def find_neighbors(self, plane, ratio):
387 388 389 390 391 392 393 394 395
        d = ratio*self.height
        objs = plane.find((self.x0, self.y0-d, self.x1, self.y1+d))
        return [obj for obj in objs
                if (isinstance(obj, LTTextLineHorizontal) and
                    abs(obj.height-self.height) < d and
                    (abs(obj.x0-self.x0) < d or
                     abs(obj.x1-self.x1) < d))]


396 397
class LTTextLineVertical(LTTextLine):

398 399 400
    def __init__(self, word_margin):
        LTTextLine.__init__(self, word_margin)
        self._y0 = -INF
401 402
        return

403 404
    def add(self, obj):
        if isinstance(obj, LTChar) and self.word_margin:
405
            margin = self.word_margin * max(obj.width, obj.height)
406
            if obj.y1+margin < self._y0:
407
                LTContainer.add(self, LTAnno(' '))
408 409 410
        self._y0 = obj.y0
        LTTextLine.add(self, obj)
        return
411

412
    def find_neighbors(self, plane, ratio):
413 414 415 416 417 418 419 420
        d = ratio*self.width
        objs = plane.find((self.x0-d, self.y0, self.x1+d, self.y1))
        return [obj for obj in objs
                if (isinstance(obj, LTTextLineVertical) and
                    abs(obj.width-self.width) < d and
                    (abs(obj.y0-self.y0) < d or
                     abs(obj.y1-self.y1) < d))]

421 422 423 424 425 426

##  LTTextBox
##
##  A set of text objects that are grouped within
##  a certain rectangular area.
##
427
class LTTextBox(LTTextContainer):
428

429
    def __init__(self):
430
        LTTextContainer.__init__(self)
431
        self.index = -1
432 433 434
        return

    def __repr__(self):
435 436 437
        return ('<%s(%s) %s %r>' %
                (self.__class__.__name__,
                 self.index, bbox2str(self.bbox), self.get_text()))
438

439

440
class LTTextBoxHorizontal(LTTextBox):
441

442
    def analyze(self, laparams):
443
        LTTextBox.analyze(self, laparams)
444
        self._objs.sort(key=lambda obj: -obj.y1)
445
        return
446 447 448

    def get_writing_mode(self):
        return 'lr-tb'
449

450

451 452
class LTTextBoxVertical(LTTextBox):

453
    def analyze(self, laparams):
454
        LTTextBox.analyze(self, laparams)
455
        self._objs.sort(key=lambda obj: -obj.x1)
456
        return
457 458 459

    def get_writing_mode(self):
        return 'tb-rl'
460 461 462 463


##  LTTextGroup
##
464
class LTTextGroup(LTTextContainer):
465 466

    def __init__(self, objs):
467
        LTTextContainer.__init__(self)
468
        self.extend(objs)
469 470
        return

471

472
class LTTextGroupLRTB(LTTextGroup):
473

474
    def analyze(self, laparams):
475
        LTTextGroup.analyze(self, laparams)
476
        # reorder the objects from top-left to bottom-right.
477
        self._objs.sort(key=lambda obj:
478
                           (1-laparams.boxes_flow)*(obj.x0) -
479
                           (1+laparams.boxes_flow)*(obj.y0+obj.y1))
480
        return
481

482

483
class LTTextGroupTBRL(LTTextGroup):
484

485
    def analyze(self, laparams):
486
        LTTextGroup.analyze(self, laparams)
487
        # reorder the objects from top-right to bottom-left.
488
        self._objs.sort(key=lambda obj:
489
                           -(1+laparams.boxes_flow)*(obj.x0+obj.x1)
490
                           - (1-laparams.boxes_flow)*(obj.y1))
491
        return
492 493


494
##  LTLayoutContainer
495
##
496
class LTLayoutContainer(LTContainer):
497

498 499
    def __init__(self, bbox):
        LTContainer.__init__(self, bbox)
500
        self.groups = None
501
        return
502 503 504

    # group_objects: group text object to textlines.
    def group_objects(self, laparams, objs):
505 506 507 508
        obj0 = None
        line = None
        for obj1 in objs:
            if obj0 is not None:
509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524
                # halign: obj0 and obj1 is horizontally aligned.
                #
                #   +------+ - - -
                #   | obj0 | - - +------+   -
                #   |      |     | obj1 |   | (line_overlap)
                #   +------+ - - |      |   -
                #          - - - +------+
                #
                #          |<--->|
                #        (char_margin)
                halign = (obj0.is_compatible(obj1) and
                          obj0.is_voverlap(obj1) and
                          (min(obj0.height, obj1.height) * laparams.line_overlap <
                           obj0.voverlap(obj1)) and
                          (obj0.hdistance(obj1) <
                           max(obj0.width, obj1.width) * laparams.char_margin))
525

526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546
                # valign: obj0 and obj1 is vertically aligned.
                #
                #   +------+
                #   | obj0 |
                #   |      |
                #   +------+ - - -
                #     |    |     | (char_margin)
                #     +------+ - -
                #     | obj1 |
                #     |      |
                #     +------+
                #
                #     |<-->|
                #   (line_overlap)
                valign = (laparams.detect_vertical and
                          obj0.is_compatible(obj1) and
                          obj0.is_hoverlap(obj1) and
                          (min(obj0.width, obj1.width) * laparams.line_overlap <
                           obj0.hoverlap(obj1)) and
                          (obj0.vdistance(obj1) <
                           max(obj0.height, obj1.height) * laparams.char_margin))
547

548 549
                if ((halign and isinstance(line, LTTextLineHorizontal)) or
                    (valign and isinstance(line, LTTextLineVertical))):
550 551
                    line.add(obj1)
                elif line is not None:
552
                    yield line
553 554
                    line = None
                else:
555
                    if valign and not halign:
556
                        line = LTTextLineVertical(laparams.word_margin)
557 558
                        line.add(obj0)
                        line.add(obj1)
559
                    elif halign and not valign:
560
                        line = LTTextLineHorizontal(laparams.word_margin)
561 562 563
                        line.add(obj0)
                        line.add(obj1)
                    else:
564
                        line = LTTextLineHorizontal(laparams.word_margin)
565
                        line.add(obj0)
566
                        yield line
567 568 569
                        line = None
            obj0 = obj1
        if line is None:
570
            line = LTTextLineHorizontal(laparams.word_margin)
571
            line.add(obj0)
572
        yield line
573 574
        return

575 576 577 578
    # group_textlines: group neighboring lines to textboxes.
    def group_textlines(self, laparams, lines):
        plane = Plane(self.bbox)
        plane.extend(lines)
579
        boxes = {}
580 581
        for line in lines:
            neighbors = line.find_neighbors(plane, laparams.line_margin)
582
            if line not in neighbors: continue
583
            members = []
584
            for obj1 in neighbors:
585 586 587
                members.append(obj1)
                if obj1 in boxes:
                    members.extend(boxes.pop(obj1))
588
            if isinstance(line, LTTextLineHorizontal):
589
                box = LTTextBoxHorizontal()
590
            else:
591 592 593 594
                box = LTTextBoxVertical()
            for obj in uniq(members):
                box.add(obj)
                boxes[obj] = box
595 596
        done = set()
        for line in lines:
597
            if line not in boxes: continue
598
            box = boxes[line]
599 600
            if box in done:
                continue
601
            done.add(box)
602 603
            if not box.is_empty():
                yield box
604 605
        return

606
    # group_textboxes: group textboxes hierarchically.
607
    def group_textboxes(self, laparams, boxes):
608
        assert boxes, str((laparams, boxes))
609

610
        def dist(obj1, obj2):
611
            """A distance function between two TextBoxes.
612

613
            Consider the bounding rectangle for obj1 and obj2.
614
            Return its area less the areas of obj1 and obj2,
615
            shown as 'www' below. This value may be negative.
616
                    +------+..........+ (x1, y1)
617 618 619
                    | obj1 |wwwwwwwwww:
                    +------+www+------+
                    :wwwwwwwwww| obj2 |
620
            (x0, y0) +..........+------+
621
            """
622 623 624 625
            x0 = min(obj1.x0, obj2.x0)
            y0 = min(obj1.y0, obj2.y0)
            x1 = max(obj1.x1, obj2.x1)
            y1 = max(obj1.y1, obj2.y1)
626
            return ((x1-x0)*(y1-y0) - obj1.width*obj1.height - obj2.width*obj2.height)
627

628 629 630
        def isany(obj1, obj2):
            """Check if there's any other object between obj1 and obj2.
            """
631 632 633 634 635 636
            x0 = min(obj1.x0, obj2.x0)
            y0 = min(obj1.y0, obj2.y0)
            x1 = max(obj1.x1, obj2.x1)
            y1 = max(obj1.y1, obj2.y1)
            objs = set(plane.find((x0, y0, x1, y1)))
            return objs.difference((obj1, obj2))
637 638 639 640 641 642 643

        def key_obj(t):
            (c,d,_,_) = t
            return (c,d)

        dists = SortedListWithKey(key=key_obj)
        for i in range(len(boxes)):
644
            obj1 = boxes[i]
645
            for j in range(i+1, len(boxes)):
646
                obj2 = boxes[j]
647
                dists.add((0, dist(obj1, obj2), obj1, obj2))
648 649
        plane = Plane(self.bbox)
        plane.extend(boxes)
650
        while dists:
651
            (c, d, obj1, obj2) = dists.pop(0)
652
            if c == 0 and isany(obj1, obj2):
653
                dists.add((1, d, obj1, obj2))
654
                continue
655 656 657
            if (isinstance(obj1, (LTTextBoxVertical, LTTextGroupTBRL)) or
                isinstance(obj2, (LTTextBoxVertical, LTTextGroupTBRL))):
                group = LTTextGroupTBRL([obj1, obj2])
658
            else:
659
                group = LTTextGroupLRTB([obj1, obj2])
660 661
            plane.remove(obj1)
            plane.remove(obj2)
662 663 664 665 666
            removed = [obj1, obj2]
            to_remove = [ (c,d,obj1,obj2) for (c,d,obj1,obj2) in dists
                      if (obj1 in removed or obj2 in removed) ]
            for r in to_remove:
                dists.remove(r)
667
            for other in plane:
668
                dists.add((0, dist(group, other), group, other))
669
            plane.add(group)
670
        assert len(plane) == 1, str(len(plane))
671
        return list(plane)
672

673 674 675
    def analyze(self, laparams):
        # textobjs is a list of LTChar objects, i.e.
        # it has all the individual characters in the page.
676
        (textobjs, otherobjs) = fsplit(lambda obj: isinstance(obj, LTChar), self)
677 678
        for obj in otherobjs:
            obj.analyze(laparams)
679 680 681
        if not textobjs:
            return
        textlines = list(self.group_objects(laparams, textobjs))
682 683 684
        (empties, textlines) = fsplit(lambda obj: obj.is_empty(), textlines)
        for obj in empties:
            obj.analyze(laparams)
685
        textboxes = list(self.group_textlines(laparams, textlines))
686
        if -1 <= laparams.boxes_flow and laparams.boxes_flow <= +1 and textboxes:
687 688 689 690 691 692
            self.groups = self.group_textboxes(laparams, textboxes)
            assigner = IndexAssigner()
            for group in self.groups:
                group.analyze(laparams)
                assigner.run(group)
            textboxes.sort(key=lambda box: box.index)
693 694 695 696 697 698 699
        else:
            def getkey(box):
                if isinstance(box, LTTextBoxVertical):
                    return (0, -box.x1, box.y0)
                else:
                    return (1, box.y0, box.x0)
            textboxes.sort(key=getkey)
700 701 702
        self._objs = textboxes + otherobjs + empties
        return

703 704 705

##  LTFigure
##
706
class LTFigure(LTLayoutContainer):
707 708

    def __init__(self, name, bbox, matrix):
709 710
        self.name = name
        self.matrix = matrix
711 712 713
        (x, y, w, h) = bbox
        bbox = get_bound(apply_matrix_pt(matrix, (p, q))
                         for (p, q) in ((x, y), (x+w, y), (x, y+h), (x+w, y+h)))
714
        LTLayoutContainer.__init__(self, bbox)
715 716 717
        return

    def __repr__(self):
718 719 720
        return ('<%s(%s) %s matrix=%s>' %
                (self.__class__.__name__, self.name,
                 bbox2str(self.bbox), matrix2str(self.matrix)))
721

722
    def analyze(self, laparams):
723 724
        if not laparams.all_texts:
            return
725
        LTLayoutContainer.analyze(self, laparams)
726
        return
727 728 729 730


##  LTPage
##
731
class LTPage(LTLayoutContainer):
732 733

    def __init__(self, pageid, bbox, rotate=0):
734
        LTLayoutContainer.__init__(self, bbox)
735 736 737 738 739
        self.pageid = pageid
        self.rotate = rotate
        return

    def __repr__(self):
740 741 742
        return ('<%s(%r) %s rotate=%r>' %
                (self.__class__.__name__, self.pageid,
                 bbox2str(self.bbox), self.rotate))