diff --git a/pom.xml b/pom.xml index 2c70a3a818350075e788b530d7496696d4fd6d4d..21fa9df4c7cca9a6f27eda23ff9f2c80d8d3c816 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ <artifactId>sambox</artifactId> <packaging>jar</packaging> <name>sambox</name> - <version>1.1.19</version> + <version>1.1.41</version> <description>An Apache PDFBox fork intended to be used as PDF processor for Sejda and PDFsam related projects</description> <url>http://www.sejda.org</url> @@ -33,7 +33,7 @@ <connection>scm:git:git@github.com:torakiki/sambox.git</connection> <developerConnection>scm:git:git@github.com:torakiki/sambox.git</developerConnection> <url>scm:git:git@github.com:torakiki/sambox.git</url> - <tag>v1.1.19</tag> + <tag>v1.1.41</tag> </scm> <developers> @@ -138,6 +138,47 @@ </plugins> </build> </profile> + <profile> + <id>private-release</id> + <distributionManagement> + <snapshotRepository> + <id>sejda-pro-snapshot</id> + <url>http://mvn.sejda.com/artifactory/libs-snapshot</url> + </snapshotRepository> + <repository> + <id>sejda-pro</id> + <url>http://mvn.sejda.com/artifactory/libs-release</url> + </repository> + </distributionManagement> + <build> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-release-plugin</artifactId> + <version>2.5.2</version> + <configuration> + <tagNameFormat>v@{project.version}</tagNameFormat> + <preparationGoals>clean install</preparationGoals> + <localCheckout>true</localCheckout> + </configuration> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-gpg-plugin</artifactId> + <version>1.6</version> + <executions> + <execution> + <id>sign-artifacts</id> + <phase>verify</phase> + <goals> + <goal>sign</goal> + </goals> + </execution> + </executions> + </plugin> + </plugins> + </build> + </profile> </profiles> <build> @@ -192,7 +233,7 @@ <artifactId>maven-surefire-plugin</artifactId> <version>2.19.1</version> <configuration> - <argLine>-Xmx768m</argLine> + <argLine>-Xmx768m -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider</argLine> <excludes> <exclude>org/sejda/sambox/rendering/TestPDFToImage.java</exclude> </excludes> @@ -200,6 +241,187 @@ <reuseForks>false</reuseForks> </configuration> </plugin> + <!-- PDFBOX-3974: download test files from JIRA and keep them in repository cache --> + <plugin> + <groupId>com.googlecode.maven-download-plugin</groupId> + <artifactId>download-maven-plugin</artifactId> + <version>1.3.0</version> + <executions> + <execution> + <id>PDFBOX-3703</id> + <phase>generate-test-resources</phase> + <goals> + <goal>wget</goal> + </goals> + <configuration> + <url>https://issues.apache.org/jira/secure/attachment/12854913/966635-p12.pdf</url> + <outputDirectory>${project.build.directory}/pdfs</outputDirectory> + <outputFileName>PDFBOX-3703-966635-p12.pdf</outputFileName> + <sha512>28fcb3be0bd3aa983a05107912b7c75ec8203b1ab14e7e76fa2b542d9d2dec9c96921d4220610dff96a299d935d9fffb3be2b552421b516a93344b14aed0ce0d</sha512> + </configuration> + </execution> + <execution> + <id>PDFBOX-3747</id> + <phase>generate-test-resources</phase> + <goals> + <goal>wget</goal> + </goals> + <configuration> + <url>https://github.com/jondot/dotfiles/blob/master/.fonts/calibri.ttf?raw=true</url> + <outputDirectory>${project.build.directory}/fonts</outputDirectory> + <outputFileName>PDFBOX-3747-calibri.ttf</outputFileName> + <sha512>b7eb8e6f2a4549eb68280d0d8834b2a14f711f2d15ffe1420fde654f05dd939181c617bf51e11c44aededaa729966b49288b0a07a35b79aa73a08b8c48b72de0</sha512> + </configuration> + </execution> + <execution> + <id>PDFBOX-3948</id> + <phase>generate-test-resources</phase> + <goals> + <goal>wget</goal> + </goals> + <configuration> + <url>https://issues.apache.org/jira/secure/attachment/12890034/EUWO6SQS5TM4VGOMRD3FLXZHU35V2CP2.pdf</url> + <outputDirectory>${project.build.directory}/pdfs</outputDirectory> + <outputFileName>PDFBOX-3948-EUWO6SQS5TM4VGOMRD3FLXZHU35V2CP2.pdf</outputFileName> + <sha512>f8a9b0b9ea6132f24e54136a40ad99d67df2402f3849a5cb0b7d80cd72298737fe4701e0e77ddd602a06e3ea0a7e107ca40d8d29389eea5834ff37245829c2d2</sha512> + </configuration> + </execution> + <execution> + <id>PDFBOX-3949</id> + <phase>generate-test-resources</phase> + <goals> + <goal>wget</goal> + </goals> + <configuration> + <url>https://issues.apache.org/jira/secure/attachment/12890037/MKFYUGZWS3OPXLLVU2Z4LWCTVA5WNOGF.pdf</url> + <outputDirectory>${project.build.directory}/pdfs</outputDirectory> + <outputFileName>PDFBOX-3949-MKFYUGZWS3OPXLLVU2Z4LWCTVA5WNOGF.pdf</outputFileName> + <sha512>f450fb40ed5589ce0f390eb110d78bc721b766c34b753770b0cb00b2e40ffe15878f54df2423ab99d7df80dd91512858bf56a7cdc392d5c179b4440176fdd2fb</sha512> + </configuration> + </execution> + <execution> + <id>PDFBOX-3950</id> + <phase>generate-test-resources</phase> + <goals> + <goal>wget</goal> + </goals> + <configuration> + <url>https://issues.apache.org/jira/secure/attachment/12890042/23EGDHXSBBYQLKYOKGZUOVYVNE675PRD.pdf</url> + <outputDirectory>${project.build.directory}/pdfs</outputDirectory> + <outputFileName>PDFBOX-3950-23EGDHXSBBYQLKYOKGZUOVYVNE675PRD.pdf</outputFileName> + <sha512>ee1d464c3ed2ad91a4cafbc474b38e5c961282f53ef599d6d10e02058da5a67064550ddc54774dfa843a8b45f34b7e6e8ab4f9a445ba459fdcd858e8dce65b25</sha512> + </configuration> + </execution> + <execution> + <id>PDFBOX-3951</id> + <phase>generate-test-resources</phase> + <goals> + <goal>wget</goal> + </goals> + <configuration> + <url>https://issues.apache.org/jira/secure/attachment/12890047/FIHUZWDDL2VGPOE34N6YHWSIGSH5LVGZ.pdf</url> + <outputDirectory>${project.build.directory}/pdfs</outputDirectory> + <outputFileName>PDFBOX-3951-FIHUZWDDL2VGPOE34N6YHWSIGSH5LVGZ.pdf</outputFileName> + <sha512>2c0b91beb4a2b098738512fefdd40135bf66286cd350ac4e155a5a0150d649acb1da819c817ee9822e8686f526af6b7862fc63a0dae6dc7f1407c7f8b271c65e</sha512> + </configuration> + </execution> + <execution> + <id>PDFBOX-3964</id> + <phase>generate-test-resources</phase> + <goals> + <goal>wget</goal> + </goals> + <configuration> + <url>https://issues.apache.org/jira/secure/attachment/12892097/c687766d68ac766be3f02aaec5e0d713_2.pdf</url> + <outputDirectory>${project.build.directory}/pdfs</outputDirectory> + <outputFileName>PDFBOX-3964-c687766d68ac766be3f02aaec5e0d713_2.pdf</outputFileName> + <sha512>0457fd291a7f83f531fef205128929c8fa8147dd781ea7b7cd49d4d1287941989e72739329a7b172c6f53df0b54d991b514b9baa6145effa8ec7705ef273877b</sha512> + </configuration> + </execution> + <execution> + <id>PDFBOX-4022</id> + <phase>generate-test-resources</phase> + <goals> + <goal>wget</goal> + </goals> + <configuration> + <url>https://issues.apache.org/jira/secure/attachment/12899008/selection.pdf</url> + <outputDirectory>${project.build.directory}/pdfs</outputDirectory> + <outputFileName>PDFBOX-4022-selection.pdf</outputFileName> + <sha512>d08af71bc8e3911ee3ed7c9ce9d4acc0562488981bc83a9c612de9d5f0640fd2d9805f600810f1cad5293fa4acda12444a0dcefa2543125c95d06059feb2c4f0</sha512> + </configuration> + </execution> + <execution> + <id>PDFBOX-4106</id> + <phase>generate-test-resources</phase> + <goals> + <goal>wget</goal> + </goals> + <configuration> + <url>https://ipafont.ipa.go.jp/old/ipafont/ipag00303.php</url> + <outputDirectory>${project.build.directory}/fonts</outputDirectory> + <outputFileName>ipag00303.zip</outputFileName> + <unpack>true</unpack> + <sha512>59535137c649a2f8bdbb463cd716426811a6003a65883ca6e45bb0af1d526b3889af0fba3a353e90bc8d373cd32b90a27ff9ff6916ecbccb42e922c09e9b046a</sha512> + </configuration> + </execution> + <execution> + <id>PDFBOX-4106b</id> + <phase>generate-test-resources</phase> + <goals> + <goal>wget</goal> + </goals> + <configuration> + <url>https://ipafont.ipa.go.jp/old/ipafont/ipagp00303.php</url> + <outputDirectory>${project.build.directory}/fonts</outputDirectory> + <outputFileName>ipagp00303.zip</outputFileName> + <unpack>true</unpack> + <sha512>26d0a9bfba7f5457a98b0bf45a4a6b081bca4140047a0886625691231459f8c81a6cdbe523e9abcbd45fd7caed21d78f1baf3a2cf9167320f6b79be3d697cb5b</sha512> + </configuration> + </execution> + <execution> + <id>PDFBOX-4115</id> + <phase>generate-test-resources</phase> + <goals> + <goal>wget</goal> + </goals> + <configuration> + <url>https://issues.apache.org/jira/secure/attachment/12911053/n019003l.pfb</url> + <outputDirectory>${project.build.directory}/fonts</outputDirectory> + <outputFileName>n019003l.pfb</outputFileName> + <sha512>8eafe21ffa6f3d7d0a50e9f4e5bcdeb727e804b552d74e65b709e778c9ed4605e5aa63743be285f0bc17ad162768583fec4196e1d1146d98f8703359247f22d0</sha512> + </configuration> + </execution> + <execution> + <id>PDFBOX-4197</id> + <phase>generate-test-resources</phase> + <goals> + <goal>wget</goal> + </goals> + <configuration> + <url>https://issues.apache.org/jira/secure/attachment/12919726/sample.pdf</url> + <outputDirectory>${project.build.directory}/pdfs</outputDirectory> + <outputFileName>PDFBOX-4197.pdf</outputFileName> + <sha512>6fefc869dff9db8cd539db177d35beeacc62304173245742eaee8882dab330860a31cbbd4c4ec6cc724603cc453afc07ec61361fbc1e80a47f44b04ccfbaf40d</sha512> + </configuration> + </execution> + <execution> + <id>PDFBOX-4184</id> + <phase>generate-test-resources</phase> + <goals> + <goal>wget</goal> + </goals> + <configuration> + <url>http://www.crh.noaa.gov/Image/gjt/images/ImageGallery/Uncompahgre_small.jpg</url> + <!-- file is also 032163.jpg + from http://downloads.digitalcorpora.org/corpora/files/govdocs1/zipfiles/032.zip --> + <outputDirectory>${project.build.directory}/imgs</outputDirectory> + <outputFileName>PDFBOX-4184-032163.jpg</outputFileName> + <sha512>35241c979d3808ca9d2641b5ec5e40637132b313f75070faca8b8f6d00ddce394070414236db3993f1092fe3bc16995750d528b6d803a7851423c14c308ccdde</sha512> + </configuration> + </execution> + </executions> + </plugin> </plugins> </build> @@ -222,12 +444,12 @@ <dependency> <groupId>commons-io</groupId> <artifactId>commons-io</artifactId> - <version>2.5</version> + <version>2.6</version> </dependency> <dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>fontbox</artifactId> - <version>2.0.8</version> + <version>2.0.11</version> <exclusions> <exclusion> <artifactId>commons-logging</artifactId> @@ -239,13 +461,19 @@ <groupId>org.bouncycastle</groupId> <artifactId>bcmail-jdk15on</artifactId> <optional>true</optional> - <version>1.56</version> + <version>1.60</version> </dependency> <dependency> <groupId>org.bouncycastle</groupId> <artifactId>bcprov-jdk15on</artifactId> <optional>true</optional> - <version>1.56</version> + <version>1.60</version> + </dependency> + <dependency> + <groupId>org.apache.pdfbox</groupId> + <artifactId>jbig2-imageio</artifactId> + <version>3.0.1</version> + <scope>test</scope> </dependency> <dependency> <groupId>ch.qos.logback</groupId> diff --git a/src/main/java/org/sejda/sambox/contentstream/PDFStreamEngine.java b/src/main/java/org/sejda/sambox/contentstream/PDFStreamEngine.java index 6675f3ac63dbc628994c57b93dba83409e6c4f89..09242d3d863e17512674ced99a5b3d21dd18324d 100644 --- a/src/main/java/org/sejda/sambox/contentstream/PDFStreamEngine.java +++ b/src/main/java/org/sejda/sambox/contentstream/PDFStreamEngine.java @@ -16,6 +16,7 @@ */ package org.sejda.sambox.contentstream; +import static java.util.Objects.isNull; import static java.util.Optional.ofNullable; import java.awt.geom.GeneralPath; @@ -104,6 +105,22 @@ public abstract class PDFStreamEngine operators.put(op.getName(), op); } + /** + * Adds an operator processor to the engine if there isn't an operator already associated with the PDF operator. + * + * @param op operator processor + * @return true if the operator is added, false if not (there's already an operator associated) + */ + public final boolean addOperatorIfAbsent(OperatorProcessor op) + { + if (isNull(operators.putIfAbsent(op.getName(), op))) + { + op.setContext(this); + return true; + } + return false; + } + /** * Initialises the stream engine for the given page. */ @@ -163,7 +180,10 @@ public abstract class PDFStreamEngine throw new IllegalStateException("No current page, call " + "#processChildStream(PDContentStream, PDPage) instead"); } - processStream(form); + if (!form.getCOSObject().isEmpty()) + { + processStream(form); + } } /** @@ -431,7 +451,7 @@ public abstract class PDFStreamEngine * @param contentStream the content stream * @throws IOException if there is an exception while processing the stream */ - protected void processStream(PDContentStream contentStream) throws IOException + public void processStream(PDContentStream contentStream) throws IOException { PDResources parent = pushResources(contentStream); Stack<PDGraphicsState> savedStack = saveGraphicsStack(); @@ -879,7 +899,7 @@ public abstract class PDFStreamEngine protected final Stack<PDGraphicsState> saveGraphicsStack() { Stack<PDGraphicsState> savedStack = graphicsStack; - graphicsStack = new Stack<PDGraphicsState>(); + graphicsStack = new Stack<>(); graphicsStack.add(savedStack.peek().clone()); return savedStack; } @@ -956,7 +976,7 @@ public abstract class PDFStreamEngine } /** - * Returns the stream' resources. + * @return the stream' resources. This is mainly to be used by the {@link OperatorProcessor} classes */ public PDResources getResources() { diff --git a/src/main/java/org/sejda/sambox/cos/COSArray.java b/src/main/java/org/sejda/sambox/cos/COSArray.java index bbd0bfa6ac87fc94ced7713ca05bb0f411c868fd..91d6e8b0fa20c367a31eb18eca8c7f5be7313fe4 100644 --- a/src/main/java/org/sejda/sambox/cos/COSArray.java +++ b/src/main/java/org/sejda/sambox/cos/COSArray.java @@ -22,7 +22,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; -import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.ListIterator; @@ -480,7 +479,7 @@ public class COSArray extends COSBase implements List<COSBase> public List<? extends COSBase> toList() { ArrayList<COSBase> retList = new ArrayList<>(size()); - Collections.copy(retList, objects); + retList.addAll(objects); return retList; } diff --git a/src/main/java/org/sejda/sambox/cos/COSDictionary.java b/src/main/java/org/sejda/sambox/cos/COSDictionary.java index af70ebda8cc26925ce2d420141dbf01adff27e5f..0b6a1ef54cbf45dc10fa8e622268fafbdc0aec20 100644 --- a/src/main/java/org/sejda/sambox/cos/COSDictionary.java +++ b/src/main/java/org/sejda/sambox/cos/COSDictionary.java @@ -592,7 +592,6 @@ public class COSDictionary extends COSBase /** * Convenience method that will get the dictionary object that is expected to be a name and convert it to a string. - * Null is returned if the entry does not exist in the dictionary. * * @param key The key to the item in the dictionary. * @param defaultValue The value to return if the dictionary item is null. @@ -605,7 +604,6 @@ public class COSDictionary extends COSBase /** * Convenience method that will get the dictionary object that is expected to be a name and convert it to a string. - * Null is returned if the entry does not exist in the dictionary. * * @param key The key to the item in the dictionary. * @param defaultValue The value to return if the dictionary item is null. @@ -643,7 +641,6 @@ public class COSDictionary extends COSBase /** * Convenience method that will get the dictionary object that is expected to be a name and convert it to a string. - * Null is returned if the entry does not exist in the dictionary. * * @param key The key to the item in the dictionary. * @param defaultValue The default value to return. @@ -656,7 +653,6 @@ public class COSDictionary extends COSBase /** * Convenience method that will get the dictionary object that is expected to be a name and convert it to a string. - * Null is returned if the entry does not exist in the dictionary. * * @param key The key to the item in the dictionary. * @param defaultValue The default value to return. @@ -709,7 +705,6 @@ public class COSDictionary extends COSBase /** * Convenience method that will get the dictionary object that is expected to be a name and convert it to a string. - * Null is returned if the entry does not exist in the dictionary. * * @param embedded The embedded dictionary. * @param key The key to the item in the dictionary. @@ -1167,6 +1162,26 @@ public class COSDictionary extends COSBase return getItem(COSName.getPDFName(key)); } + /** + * This is a special case of getItem that takes multiple keys, it will handle the situation + * where multiple keys could get the same value, ie if either CS or ColorSpace is used to get + * the colorspace. This will get an object from this dictionary. + * + * @param firstKey The first key to try. + * @param secondKey The second key to try. + * + * @return The object that matches the key. + */ + public COSBase getItem(COSName firstKey, COSName secondKey) + { + COSBase retval = getItem(firstKey); + if (retval == null && secondKey != null) + { + retval = getItem(secondKey); + } + return retval; + } + /** * @return names of the entries in this dictionary. The returned set is in the order the entries were added to the * dictionary. diff --git a/src/main/java/org/sejda/sambox/cos/COSDocument.java b/src/main/java/org/sejda/sambox/cos/COSDocument.java index 08aa2dd4bc6b95559cf64458ea472d4dd09787db..aa735a2d02815af6bd74746826e3199e1945397a 100644 --- a/src/main/java/org/sejda/sambox/cos/COSDocument.java +++ b/src/main/java/org/sejda/sambox/cos/COSDocument.java @@ -16,6 +16,7 @@ */ package org.sejda.sambox.cos; +import static java.util.Objects.nonNull; import static java.util.Optional.ofNullable; import static org.sejda.util.RequireUtils.requireNotBlank; import static org.sejda.util.RequireUtils.requireNotNullArg; @@ -83,7 +84,8 @@ public class COSDocument extends COSBase */ public boolean isEncrypted() { - return trailer.getCOSObject().getDictionaryObject(COSName.ENCRYPT) != null; + return nonNull( + trailer.getCOSObject().getDictionaryObject(COSName.ENCRYPT, COSDictionary.class)); } /** diff --git a/src/main/java/org/sejda/sambox/cos/COSName.java b/src/main/java/org/sejda/sambox/cos/COSName.java index fc09c596f9dceec65ff6758900ad3a1051cb6dea..a5025650b36c09f9d597eaf4d2716978e117de1a 100644 --- a/src/main/java/org/sejda/sambox/cos/COSName.java +++ b/src/main/java/org/sejda/sambox/cos/COSName.java @@ -34,6 +34,7 @@ public final class COSName extends COSBase implements Comparable<COSName> // A public static final COSName A = newCommonInstance("A"); public static final COSName AA = newCommonInstance("AA"); + public static final COSName AC = newCommonInstance("AC"); public static final COSName ACRO_FORM = newCommonInstance("AcroForm"); public static final COSName ACTUAL_TEXT = newCommonInstance("ActualText"); public static final COSName ADBE_PKCS7_DETACHED = newCommonInstance("adbe.pkcs7.detached"); @@ -124,6 +125,7 @@ public final class COSName extends COSBase implements Comparable<COSName> public static final COSName CMAPNAME = newCommonInstance("CMapName"); public static final COSName CMYK = newCommonInstance("CMYK"); public static final COSName CO = newCommonInstance("CO"); + public static final COSName COLOR = new COSName("Color"); public static final COSName COLOR_BURN = newCommonInstance("ColorBurn"); public static final COSName COLOR_DODGE = newCommonInstance("ColorDodge"); public static final COSName COLORANTS = newCommonInstance("Colorants"); @@ -184,6 +186,7 @@ public final class COSName extends COSBase implements Comparable<COSName> public static final COSName DOC_OPEN = newCommonInstance("DocOpen"); public static final COSName DOC_TIME_STAMP = newCommonInstance("DocTimeStamp"); public static final COSName DOCMDP = newCommonInstance("DocMDP"); + public static final COSName DOCUMENT = new COSName("Document"); public static final COSName DOMAIN = newCommonInstance("Domain"); public static final COSName DOS = newCommonInstance("DOS"); public static final COSName DP = newCommonInstance("DP"); @@ -264,6 +267,7 @@ public final class COSName extends COSBase implements Comparable<COSName> public static final COSName HIDE_MENUBAR = newCommonInstance("HideMenubar"); public static final COSName HIDE_TOOLBAR = newCommonInstance("HideToolbar"); public static final COSName HIDE_WINDOWUI = newCommonInstance("HideWindowUI"); + public static final COSName HUE = new COSName("Hue"); // I public static final COSName I = newCommonInstance("I"); public static final COSName IC = newCommonInstance("IC"); @@ -284,6 +288,9 @@ public final class COSName extends COSBase implements Comparable<COSName> public static final COSName INTERPOLATE = newCommonInstance("Interpolate"); public static final COSName IT = newCommonInstance("IT"); public static final COSName ITALIC_ANGLE = newCommonInstance("ItalicAngle"); + public static final COSName ISSUER = newCommonInstance("Issuer"); + public static final COSName IX = newCommonInstance("IX"); + // J public static final COSName JAVA_SCRIPT = newCommonInstance("JavaScript"); public static final COSName JBIG2_DECODE = newCommonInstance("JBIG2Decode"); @@ -334,6 +341,7 @@ public final class COSName extends COSBase implements Comparable<COSName> public static final COSName MCID = newCommonInstance("MCID"); public static final COSName MDP = newCommonInstance("MDP"); public static final COSName MEDIA_BOX = newCommonInstance("MediaBox"); + public static final COSName MEASURE = new COSName("Measure"); public static final COSName METADATA = newCommonInstance("Metadata"); public static final COSName MISSING_WIDTH = newCommonInstance("MissingWidth"); public static final COSName MK = newCommonInstance("MK"); @@ -346,6 +354,7 @@ public final class COSName extends COSBase implements Comparable<COSName> public static final COSName NAME = newCommonInstance("Name"); public static final COSName NAMES = newCommonInstance("Names"); public static final COSName NEED_APPEARANCES = newCommonInstance("NeedAppearances"); + public static final COSName NEW_WINDOW = new COSName("NewWindow"); public static final COSName NEXT = newCommonInstance("Next"); public static final COSName NM = newCommonInstance("NM"); public static final COSName NON_EFONT_NO_WARN = newCommonInstance("NonEFontNoWarn"); @@ -444,6 +453,7 @@ public final class COSName extends COSBase implements Comparable<COSName> // S public static final COSName S = newCommonInstance("S"); public static final COSName SA = newCommonInstance("SA"); + public static final COSName SATURATION = new COSName("Saturation"); public static final COSName SCREEN = newCommonInstance("Screen"); public static final COSName SE = newCommonInstance("SE"); public static final COSName SEPARATION = newCommonInstance("Separation"); @@ -527,6 +537,8 @@ public final class COSName extends COSBase implements Comparable<COSName> public static final COSName VIEW_AREA = newCommonInstance("ViewArea"); public static final COSName VIEW_CLIP = newCommonInstance("ViewClip"); public static final COSName VIEWER_PREFERENCES = newCommonInstance("ViewerPreferences"); + public static final COSName VOLUME = new COSName("Volume"); + public static final COSName VP = new COSName("VP"); // W public static final COSName W = newCommonInstance("W"); public static final COSName W2 = newCommonInstance("W2"); diff --git a/src/main/java/org/sejda/sambox/cos/COSStream.java b/src/main/java/org/sejda/sambox/cos/COSStream.java index 6c41ba09c77d2c0f4d412ba0be17306f7eef17d3..60a523c29d2e3574a2c2703fed8b30e34c932f63 100644 --- a/src/main/java/org/sejda/sambox/cos/COSStream.java +++ b/src/main/java/org/sejda/sambox/cos/COSStream.java @@ -401,7 +401,7 @@ public class COSStream extends COSDictionary implements Closeable, Encryptable /** * Sets the function to be used to encrypt this stream. * - * @param encrypted + * @param encryptor */ public void setEncryptor(Function<InputStream, InputStream> encryptor) { @@ -471,28 +471,39 @@ public class COSStream extends COSDictionary implements Closeable, Encryptable /** * Adds Flate decode filter to the current filters list if possible + * + * @true if the FlateDecode filter has been added */ - public void addCompression() throws IOException + public boolean addCompression() { if (canCompress()) { - COSArray newFilters = new COSArray(COSName.FLATE_DECODE); - COSBase filters = getFilters(); - if (filters instanceof COSName) + try { - newFilters.add(filters); - setFilters(newFilters); - } - else if (filters instanceof COSArray) - { - newFilters.addAll((COSArray) filters); - setFilters(newFilters); + COSArray newFilters = new COSArray(COSName.FLATE_DECODE); + COSBase filters = getFilters(); + if (filters instanceof COSName) + { + newFilters.add(filters); + setFilters(newFilters); + } + else if (filters instanceof COSArray) + { + newFilters.addAll((COSArray) filters); + setFilters(newFilters); + } + else + { + setFilters(COSName.FLATE_DECODE); + } + return true; } - else + catch (IOException e) { - setFilters(COSName.FLATE_DECODE); + LOG.warn("Unable to add FlateDecode filter to the stream", e); } } + return false; } /** diff --git a/src/main/java/org/sejda/sambox/filter/ASCIIHexFilter.java b/src/main/java/org/sejda/sambox/filter/ASCIIHexFilter.java index 90e131f019e1d8299c68d57814416c9e07d5dede..2bf1c89f07b1299903ba91416a43e44349a16913 100644 --- a/src/main/java/org/sejda/sambox/filter/ASCIIHexFilter.java +++ b/src/main/java/org/sejda/sambox/filter/ASCIIHexFilter.java @@ -49,47 +49,57 @@ final class ASCIIHexFilter extends Filter }; @Override - public DecodeResult decode(InputStream encoded, OutputStream decoded, - COSDictionary parameters, int index) throws IOException + public DecodeResult decode(InputStream encoded, OutputStream decoded, COSDictionary parameters, + int index) throws IOException { + // TODO iText and pdfjs both have similar impl which is different from what we have. Maybe we can replace this + // with the algorithm in pdfjs int value, firstByte, secondByte; - while ((firstByte = encoded.read()) != -1) + try { - // always after first char - while (isWhitespace(firstByte)) + while ((firstByte = encoded.read()) != -1) { - firstByte = encoded.read(); - } - if (firstByte == -1 || isEOD(firstByte)) - { - break; - } - - if (REVERSE_HEX[firstByte] == -1) - { - LOG.error("Invalid hex, int: " + firstByte + " char: " + (char)firstByte); - } - value = REVERSE_HEX[firstByte] * 16; - secondByte = encoded.read(); - - if (secondByte == -1 || isEOD(secondByte)) - { - // second value behaves like 0 in case of EOD - decoded.write(value); - break; - } - if (secondByte >= 0) - { - if (REVERSE_HEX[secondByte] == -1) + // always after first char + while (isWhitespace(firstByte)) + { + firstByte = encoded.read(); + } + if (firstByte == -1 || isEOD(firstByte)) { - LOG.error("Invalid hex, int: " + secondByte + " char: " + (char)secondByte); + break; } - value += REVERSE_HEX[secondByte]; + + if (REVERSE_HEX[firstByte] == -1) + { + LOG.error("Invalid hex, int: " + firstByte + " char: " + (char) firstByte); + } + value = REVERSE_HEX[firstByte] * 16; + secondByte = encoded.read(); + + if (secondByte == -1 || isEOD(secondByte)) + { + // second value behaves like 0 in case of EOD + decoded.write(value); + break; + } + if (secondByte >= 0) + { + if (REVERSE_HEX[secondByte] == -1) + { + LOG.error( + "Invalid hex, int: " + secondByte + " char: " + (char) secondByte); + } + value += REVERSE_HEX[secondByte]; + } + decoded.write(value); } - decoded.write(value); + decoded.flush(); + return new DecodeResult(parameters); + } + catch (ArrayIndexOutOfBoundsException e) + { + throw new IOException("Illegal character in ASCIIHexFilter", e); } - decoded.flush(); - return new DecodeResult(parameters); } // whitespace diff --git a/src/main/java/org/sejda/sambox/filter/CCITTFaxDecoderStream.java b/src/main/java/org/sejda/sambox/filter/CCITTFaxDecoderStream.java index 8b80cba8cc2f456fc4638bbb2aea0b3e46b0e6d0..b833ec52ad3b2c48377ff8064a6d0c725451d04d 100644 --- a/src/main/java/org/sejda/sambox/filter/CCITTFaxDecoderStream.java +++ b/src/main/java/org/sejda/sambox/filter/CCITTFaxDecoderStream.java @@ -279,7 +279,7 @@ final class CCITTFaxDecoderStream extends FilterInputStream { if (optionByteAligned) { - bufferPos = -1; // Skip remaining bits and fetch the next byte at row start + resetBuffer(); } eof: while (true) { @@ -316,7 +316,7 @@ final class CCITTFaxDecoderStream extends FilterInputStream { if (optionByteAligned) { - bufferPos = -1; // Skip remaining bits and fetch the next byte at row start + resetBuffer(); } decode2D(); } @@ -421,30 +421,14 @@ final class CCITTFaxDecoderStream extends FilterInputStream { return total; } - else - { - n = tree.root; - } + n = tree.root; } } } - private void resetBuffer() throws IOException + private void resetBuffer() { - for (int i = 0; i < decodedRow.length; i++) - { - decodedRow[i] = 0; - } - - while (true) - { - if (bufferPos == -1) - { - return; - } - - readBit(); - } + bufferPos = -1; } int buffer = -1; @@ -688,9 +672,8 @@ final class CCITTFaxDecoderStream extends FilterInputStream } } - static final short[][] BLACK_CODES = { - { // 2 bits - 0x2, 0x3, }, + static final short[][] BLACK_CODES = { { // 2 bits + 0x2, 0x3, }, { // 3 bits 0x2, 0x3, }, { // 4 bits @@ -718,9 +701,8 @@ final class CCITTFaxDecoderStream extends FilterInputStream { // 13 bits 0x4a, 0x4b, 0x4c, 0x4d, 0x52, 0x53, 0x54, 0x55, 0x5a, 0x5b, 0x64, 0x65, 0x6c, 0x6d, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, } }; - static final short[][] BLACK_RUN_LENGTHS = { - { // 2 bits - 3, 2, }, + static final short[][] BLACK_RUN_LENGTHS = { { // 2 bits + 3, 2, }, { // 3 bits 1, 4, }, { // 4 bits @@ -748,9 +730,8 @@ final class CCITTFaxDecoderStream extends FilterInputStream 640, 704, 768, 832, 1280, 1344, 1408, 1472, 1536, 1600, 1664, 1728, 512, 576, 896, 960, 1024, 1088, 1152, 1216, } }; - public static final short[][] WHITE_CODES = { - { // 4 bits - 0x7, 0x8, 0xb, 0xc, 0xe, 0xf, }, + public static final short[][] WHITE_CODES = { { // 4 bits + 0x7, 0x8, 0xb, 0xc, 0xe, 0xf, }, { // 5 bits 0x12, 0x13, 0x14, 0x1b, 0x7, 0x8, }, { // 6 bits @@ -771,9 +752,8 @@ final class CCITTFaxDecoderStream extends FilterInputStream { // 12 bits 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f, } }; - public static final short[][] WHITE_RUN_LENGTHS = { - { // 4 bits - 2, 3, 4, 5, 6, 7, }, + public static final short[][] WHITE_RUN_LENGTHS = { { // 4 bits + 2, 3, 4, 5, 6, 7, }, { // 5 bits 128, 8, 9, 64, 10, 11, }, { // 6 bits diff --git a/src/main/java/org/sejda/sambox/filter/CCITTFaxFilter.java b/src/main/java/org/sejda/sambox/filter/CCITTFaxFilter.java index 2a6e77d79b4a8ce7e09f8b6d7937c1cc459a8078..d62b01d0646288b70af659f7d117844acd3fe248 100644 --- a/src/main/java/org/sejda/sambox/filter/CCITTFaxFilter.java +++ b/src/main/java/org/sejda/sambox/filter/CCITTFaxFilter.java @@ -38,9 +38,6 @@ final class CCITTFaxFilter extends Filter public DecodeResult decode(InputStream encoded, OutputStream decoded, COSDictionary parameters, int index) throws IOException { - DecodeResult result = new DecodeResult(new COSDictionary()); - result.getParameters().addAll(parameters); - // get decode parameters COSDictionary decodeParms = getDecodeParams(parameters, index); @@ -103,12 +100,6 @@ final class CCITTFaxFilter extends Filter invertBitmap(decompressed); } - // repair missing color space - if (!parameters.containsKey(COSName.COLORSPACE)) - { - result.getParameters().setItem(COSName.COLORSPACE, COSName.DEVICEGRAY); - } - decoded.write(decompressed); return new DecodeResult(parameters); } diff --git a/src/main/java/org/sejda/sambox/filter/DCTFilter.java b/src/main/java/org/sejda/sambox/filter/DCTFilter.java index 48575e9cdf78b65d1532c13d88793f6260270a25..ecc91ff83130ed8645e442a85be171da29b8e9ba 100644 --- a/src/main/java/org/sejda/sambox/filter/DCTFilter.java +++ b/src/main/java/org/sejda/sambox/filter/DCTFilter.java @@ -134,8 +134,7 @@ final class DCTFilter extends Filter // already CMYK break; case 1: - // TODO YCbCr - LOG.warn("YCbCr JPEGs not implemented"); + raster = fromYCbCrtoCMYK(raster); break; case 2: raster = fromYCCKtoCMYK(raster); @@ -268,6 +267,44 @@ final class DCTFilter extends Filter return writableRaster; } + private WritableRaster fromYCbCrtoCMYK(Raster raster) + { + WritableRaster writableRaster = raster.createCompatibleWritableRaster(); + + int[] value = new int[4]; + for (int y = 0, height = raster.getHeight(); y < height; y++) + { + for (int x = 0, width = raster.getWidth(); x < width; x++) + { + raster.getPixel(x, y, value); + + // 4-channels 0..255 + float Y = value[0]; + float Cb = value[1]; + float Cr = value[2]; + float K = value[3]; + + // YCbCr to RGB, see http://www.equasys.de/colorconversion.html + int r = clamp( (1.164f * (Y-16)) + (1.596f * (Cr - 128)) ); + int g = clamp( (1.164f * (Y-16)) + (-0.392f * (Cb-128)) + (-0.813f * (Cr-128))); + int b = clamp( (1.164f * (Y-16)) + (2.017f * (Cb-128))); + + // naive RGB to CMYK + int cyan = 255 - r; + int magenta = 255 - g; + int yellow = 255 - b; + + // update new raster + value[0] = cyan; + value[1] = magenta; + value[2] = yellow; + value[3] = (int)K; + writableRaster.setPixel(x, y, value); + } + } + return writableRaster; + } + // converts from BGR to RGB private WritableRaster fromBGRtoRGB(Raster raster) { diff --git a/src/main/java/org/sejda/sambox/filter/Filter.java b/src/main/java/org/sejda/sambox/filter/Filter.java index 561e1e563e50af8aeb8c3c3ef198a1fc02a622a3..3664924651592c8c00040b4133dd4cceaeecbcba 100644 --- a/src/main/java/org/sejda/sambox/filter/Filter.java +++ b/src/main/java/org/sejda/sambox/filter/Filter.java @@ -112,7 +112,7 @@ public abstract class Filter return new COSDictionary(); } } - if (!(dp instanceof COSArray || dp instanceof COSArray)) + if (!(filter instanceof COSArray || dp instanceof COSArray)) { LOG.error("Ignoring invalid DecodeParams. Expected array or dictionary but found {}", dp.getClass().getName()); diff --git a/src/main/java/org/sejda/sambox/filter/FlateFilter.java b/src/main/java/org/sejda/sambox/filter/FlateFilter.java index bdb56f2142d174bc6fe862ef9aba73b85d0f1ed8..e33fe3a20215d9e242225c6313e7d9fad4c09dae 100644 --- a/src/main/java/org/sejda/sambox/filter/FlateFilter.java +++ b/src/main/java/org/sejda/sambox/filter/FlateFilter.java @@ -16,7 +16,6 @@ */ package org.sejda.sambox.filter; -import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -25,9 +24,7 @@ import java.util.zip.Deflater; import java.util.zip.DeflaterOutputStream; import java.util.zip.Inflater; -import org.sejda.io.FastByteArrayOutputStream; import org.sejda.sambox.cos.COSDictionary; -import org.sejda.sambox.cos.COSName; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -40,39 +37,17 @@ import org.slf4j.LoggerFactory; final class FlateFilter extends Filter { private static final Logger LOG = LoggerFactory.getLogger(FlateFilter.class); - private static final int BUFFER_SIZE = 16348; + private static final int BUFFER_SIZE = 0x4000; @Override public DecodeResult decode(InputStream encoded, OutputStream decoded, COSDictionary parameters, int index) throws IOException { - int predictor = -1; - final COSDictionary decodeParams = getDecodeParams(parameters, index); - if (decodeParams != null) - { - predictor = decodeParams.getInt(COSName.PREDICTOR); - } try { - if (predictor > 1) - { - int colors = Math.min(decodeParams.getInt(COSName.COLORS, 1), 32); - int bitsPerPixel = decodeParams.getInt(COSName.BITS_PER_COMPONENT, 8); - int columns = decodeParams.getInt(COSName.COLUMNS, 1); - FastByteArrayOutputStream baos = new FastByteArrayOutputStream(); - decompress(encoded, baos); - ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray()); - Predictor.decodePredictor(predictor, colors, bitsPerPixel, columns, bais, decoded); - decoded.flush(); - baos.reset(); - bais.reset(); - } - else - { - decompress(encoded, decoded); - } + decompress(encoded, Predictor.wrapPredictor(decoded, decodeParams)); } catch (DataFormatException e) { @@ -154,18 +129,19 @@ final class FlateFilter extends Filter } compressionLevel = Math.max(-1, Math.min(Deflater.BEST_COMPRESSION, compressionLevel)); Deflater deflater = new Deflater(compressionLevel); - DeflaterOutputStream out = new DeflaterOutputStream(encoded, deflater); - int amountRead; - int mayRead = input.available(); - if (mayRead > 0) + try (DeflaterOutputStream out = new DeflaterOutputStream(encoded, deflater)) { - byte[] buffer = new byte[Math.min(mayRead, BUFFER_SIZE)]; - while ((amountRead = input.read(buffer, 0, Math.min(mayRead, BUFFER_SIZE))) != -1) + int amountRead; + int mayRead = input.available(); + if (mayRead > 0) { - out.write(buffer, 0, amountRead); + byte[] buffer = new byte[Math.min(mayRead, BUFFER_SIZE)]; + while ((amountRead = input.read(buffer, 0, Math.min(mayRead, BUFFER_SIZE))) != -1) + { + out.write(buffer, 0, amountRead); + } } } - out.close(); encoded.flush(); } } diff --git a/src/main/java/org/sejda/sambox/filter/IdentityFilter.java b/src/main/java/org/sejda/sambox/filter/IdentityFilter.java index f5937199142070ca64d1d9338a6d536572e46561..afc888b5edc49e957c4e2cc468614751afb13b60 100644 --- a/src/main/java/org/sejda/sambox/filter/IdentityFilter.java +++ b/src/main/java/org/sejda/sambox/filter/IdentityFilter.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import org.apache.commons.io.IOUtils; import org.sejda.sambox.cos.COSDictionary; /** @@ -30,19 +31,12 @@ import org.sejda.sambox.cos.COSDictionary; */ final class IdentityFilter extends Filter { - private static final int BUFFER_SIZE = 1024; - @Override public DecodeResult decode(InputStream encoded, OutputStream decoded, COSDictionary parameters, int index) throws IOException { - byte[] buffer = new byte[BUFFER_SIZE]; - int amountRead; - while((amountRead = encoded.read(buffer, 0, BUFFER_SIZE)) != -1) - { - decoded.write(buffer, 0, amountRead); - } + IOUtils.copy(encoded, decoded); decoded.flush(); return new DecodeResult(parameters); } @@ -51,12 +45,7 @@ final class IdentityFilter extends Filter public void encode(InputStream input, OutputStream encoded, COSDictionary parameters) throws IOException { - byte[] buffer = new byte[BUFFER_SIZE]; - int amountRead; - while((amountRead = input.read(buffer, 0, BUFFER_SIZE)) != -1) - { - encoded.write(buffer, 0, amountRead); - } + IOUtils.copy(input, encoded); encoded.flush(); } } diff --git a/src/main/java/org/sejda/sambox/filter/JBIG2Filter.java b/src/main/java/org/sejda/sambox/filter/JBIG2Filter.java index 57f161639675fc742977ffc0181ae0276f7c0a02..836401aa399f67aa8548288dc9c9b66469a4d426 100644 --- a/src/main/java/org/sejda/sambox/filter/JBIG2Filter.java +++ b/src/main/java/org/sejda/sambox/filter/JBIG2Filter.java @@ -40,8 +40,7 @@ import org.slf4j.LoggerFactory; * monochrome (1 bit per pixel) image data (or an approximation of that data). * * Requires a JBIG2 plugin for Java Image I/O to be installed. A known working - * plug-in is <a href="http://code.google.com/p/jbig2-imageio/">jbig2-imageio</a> - * which is available under the GPL v3 license. + * plug-in is the Apache PDFBox JBIG2 plugin. * * @author Timo Boehme */ @@ -49,11 +48,29 @@ final class JBIG2Filter extends Filter { private static final Logger LOG = LoggerFactory.getLogger(JBIG2Filter.class); + private static boolean levigoLogged = false; + + private static synchronized void logLevigoDonated() + { + if (!levigoLogged) + { + LOG.info("The Levigo JBIG2 plugin has been donated to the Apache Foundation"); + LOG.info("and an improved version is available for download at " + + "https://pdfbox.apache.org/download.cgi"); + levigoLogged = true; + } + } + @Override - public DecodeResult decode(InputStream encoded, OutputStream decoded, - COSDictionary parameters, int index) throws IOException + public DecodeResult decode(InputStream encoded, OutputStream decoded, COSDictionary + parameters, int index) throws IOException { ImageReader reader = findImageReader("JBIG2", "jbig2-imageio is not installed"); + if (reader.getClass().getName().contains("levigo")) + { + logLevigoDonated(); + } + DecodeResult result = new DecodeResult(new COSDictionary()); result.getParameters().addAll(parameters); @@ -127,12 +144,6 @@ final class JBIG2Filter extends Filter reader.dispose(); } - // repair missing color space - if (!parameters.containsKey(COSName.COLORSPACE)) - { - result.getParameters().setName(COSName.COLORSPACE, COSName.DEVICEGRAY.getName()); - } - return new DecodeResult(parameters); } diff --git a/src/main/java/org/sejda/sambox/filter/JPXFilter.java b/src/main/java/org/sejda/sambox/filter/JPXFilter.java index 506d44ff75e163400a733194a8cfbf3817b7833e..de1da16f9f147d8524062b9032af457f97ec254e 100644 --- a/src/main/java/org/sejda/sambox/filter/JPXFilter.java +++ b/src/main/java/org/sejda/sambox/filter/JPXFilter.java @@ -25,9 +25,9 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import javax.imageio.ImageIO; import javax.imageio.ImageReader; import javax.imageio.stream.ImageInputStream; +import javax.imageio.stream.MemoryCacheImageInputStream; import org.sejda.sambox.cos.COSDictionary; import org.sejda.sambox.cos.COSName; @@ -86,7 +86,9 @@ public final class JPXFilter extends Filter ImageInputStream iis = null; try { - iis = ImageIO.createImageInputStream(input); + // PDFBOX-4121: ImageIO.createImageInputStream() is much slower + iis = new MemoryCacheImageInputStream(input); + reader.setInput(iis, true, true); BufferedImage image; @@ -116,8 +118,8 @@ public final class JPXFilter extends Filter } // override dimensions, see PDFBOX-1735 - parameters.setInt(COSName.WIDTH, image.getWidth()); - parameters.setInt(COSName.HEIGHT, image.getHeight()); + parameters.setInt(COSName.WIDTH, reader.getWidth(0)); + parameters.setInt(COSName.HEIGHT, reader.getHeight(0)); // extract embedded color space if (!parameters.containsKey(COSName.COLORSPACE)) diff --git a/src/main/java/org/sejda/sambox/filter/LZWFilter.java b/src/main/java/org/sejda/sambox/filter/LZWFilter.java index 7c2f8ed48b729c89f497cb540fcce5f115e6e213..6945eb5625df62ec6511ee34f3fc35dafae9570d 100644 --- a/src/main/java/org/sejda/sambox/filter/LZWFilter.java +++ b/src/main/java/org/sejda/sambox/filter/LZWFilter.java @@ -15,8 +15,6 @@ */ package org.sejda.sambox.filter; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; import java.io.EOFException; import java.io.IOException; import java.io.InputStream; @@ -53,60 +51,39 @@ public class LZWFilter extends Filter * The LZW end of data code. */ public static final long EOD = 257; - - //BEWARE: codeTable must be local to each method, because there is only + + // BEWARE: codeTable must be local to each method, because there is only // one instance of each filter /** * {@inheritDoc} */ @Override - public DecodeResult decode(InputStream encoded, OutputStream decoded, - COSDictionary parameters, int index) throws IOException + public DecodeResult decode(InputStream encoded, OutputStream decoded, COSDictionary parameters, + int index) throws IOException { - int predictor = -1; - int earlyChange = 1; COSDictionary decodeParams = getDecodeParams(parameters, index); - if (decodeParams != null) - { - predictor = decodeParams.getInt(COSName.PREDICTOR); - earlyChange = decodeParams.getInt(COSName.EARLY_CHANGE, 1); - if (earlyChange != 0 && earlyChange != 1) - { - earlyChange = 1; - } - } - if (predictor > 1) - { - @SuppressWarnings("null") - int colors = Math.min(decodeParams.getInt(COSName.COLORS, 1), 32); - int bitsPerPixel = decodeParams.getInt(COSName.BITS_PER_COMPONENT, 8); - int columns = decodeParams.getInt(COSName.COLUMNS, 1); - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - doLZWDecode(encoded, baos, earlyChange); - ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray()); - Predictor.decodePredictor(predictor, colors, bitsPerPixel, columns, bais, decoded); - decoded.flush(); - baos.reset(); - bais.reset(); - } - else + int earlyChange = decodeParams.getInt(COSName.EARLY_CHANGE, 1); + + if (earlyChange != 0 && earlyChange != 1) { doLZWDecode(encoded, decoded, earlyChange); } + + doLZWDecode(encoded, Predictor.wrapPredictor(decoded, decodeParams), earlyChange); return new DecodeResult(parameters); } - private void doLZWDecode(InputStream encoded, OutputStream decoded, int earlyChange) throws IOException + private void doLZWDecode(InputStream encoded, OutputStream decoded, int earlyChange) + throws IOException { - List<byte[]> codeTable = new ArrayList<byte[]>(); + List<byte[]> codeTable = new ArrayList<>(); int chunk = 9; - final MemoryCacheImageInputStream in = new MemoryCacheImageInputStream(encoded); long nextCommand; long prevCommand = -1; - try + try (MemoryCacheImageInputStream in = new MemoryCacheImageInputStream(encoded)) { while ((nextCommand = in.readBits(chunk)) != EOD) { @@ -141,7 +118,7 @@ public class LZWFilter extends Filter decoded.write(newData); codeTable.add(newData); } - + chunk = calculateChunk(codeTable.size(), earlyChange); prevCommand = nextCommand; } @@ -154,8 +131,8 @@ public class LZWFilter extends Filter decoded.flush(); } - private void checkIndexBounds(List<byte[]> codeTable, long index, MemoryCacheImageInputStream in) - throws IOException + private static void checkIndexBounds(List<byte[]> codeTable, long index, + MemoryCacheImageInputStream in) throws IOException { if (index < 0) { @@ -177,68 +154,69 @@ public class LZWFilter extends Filter int chunk = 9; byte[] inputPattern = null; - final MemoryCacheImageOutputStream out = new MemoryCacheImageOutputStream(encoded); - out.writeBits(CLEAR_TABLE, chunk); - int foundCode = -1; - int r; - while ((r = rawData.read()) != -1) + try (MemoryCacheImageOutputStream out = new MemoryCacheImageOutputStream(encoded)) { - byte by = (byte) r; - if (inputPattern == null) + out.writeBits(CLEAR_TABLE, chunk); + int foundCode = -1; + int r; + while ((r = rawData.read()) != -1) { - inputPattern = new byte[] { by }; - foundCode = by & 0xff; - } - else - { - inputPattern = Arrays.copyOf(inputPattern, inputPattern.length + 1); - inputPattern[inputPattern.length - 1] = by; - int newFoundCode = findPatternCode(codeTable, inputPattern); - if (newFoundCode == -1) + byte by = (byte) r; + if (inputPattern == null) { - // use previous - chunk = calculateChunk(codeTable.size() - 1, 1); - out.writeBits(foundCode, chunk); - // create new table entry - codeTable.add(inputPattern); - - if (codeTable.size() == 4096) - { - // code table is full - out.writeBits(CLEAR_TABLE, chunk); - codeTable = createCodeTable(); - } - inputPattern = new byte[] { by }; foundCode = by & 0xff; } else { - foundCode = newFoundCode; + inputPattern = Arrays.copyOf(inputPattern, inputPattern.length + 1); + inputPattern[inputPattern.length - 1] = by; + int newFoundCode = findPatternCode(codeTable, inputPattern); + if (newFoundCode == -1) + { + // use previous + chunk = calculateChunk(codeTable.size() - 1, 1); + out.writeBits(foundCode, chunk); + // create new table entry + codeTable.add(inputPattern); + + if (codeTable.size() == 4096) + { + // code table is full + out.writeBits(CLEAR_TABLE, chunk); + codeTable = createCodeTable(); + } + + inputPattern = new byte[] { by }; + foundCode = by & 0xff; + } + else + { + foundCode = newFoundCode; + } } } - } - if (foundCode != -1) - { - chunk = calculateChunk(codeTable.size() - 1, 1); - out.writeBits(foundCode, chunk); - } + if (foundCode != -1) + { + chunk = calculateChunk(codeTable.size() - 1, 1); + out.writeBits(foundCode, chunk); + } + + // PPDFBOX-1977: the decoder wouldn't know that the encoder would output + // an EOD as code, so he would have increased his own code table and + // possibly adjusted the chunk. Therefore, the encoder must behave as + // if the code table had just grown and thus it must be checked it is + // needed to adjust the chunk, based on an increased table size parameter + chunk = calculateChunk(codeTable.size(), 1); - // PPDFBOX-1977: the decoder wouldn't know that the encoder would output - // an EOD as code, so he would have increased his own code table and - // possibly adjusted the chunk. Therefore, the encoder must behave as - // if the code table had just grown and thus it must be checked it is - // needed to adjust the chunk, based on an increased table size parameter - chunk = calculateChunk(codeTable.size(), 1); + out.writeBits(EOD, chunk); - out.writeBits(EOD, chunk); - - // pad with 0 - out.writeBits(0, 7); - - // must do or file will be empty :-( - out.flush(); - out.close(); + // pad with 0 + out.writeBits(0, 7); + + // must do or file will be empty :-( + out.flush(); + } } /** @@ -246,8 +224,7 @@ public class LZWFilter extends Filter * * @param codeTable The LZW code table. * @param pattern The pattern to be searched for. - * @return The index of the longest matching pattern or -1 if nothing is - * found. + * @return The index of the longest matching pattern or -1 if nothing is found. */ private int findPatternCode(List<byte[]> codeTable, byte[] pattern) { @@ -261,7 +238,7 @@ public class LZWFilter extends Filter if (foundCode != -1) { // we already found pattern with size > 1 - return foundCode; + return foundCode; } else if (pattern.length > 1) { @@ -270,7 +247,8 @@ public class LZWFilter extends Filter } } byte[] tryPattern = codeTable.get(i); - if ((foundCode != -1 || tryPattern.length > foundLen) && Arrays.equals(tryPattern, pattern)) + if ((foundCode != -1 || tryPattern.length > foundLen) + && Arrays.equals(tryPattern, pattern)) { foundCode = i; foundLen = tryPattern.length; @@ -280,12 +258,11 @@ public class LZWFilter extends Filter } /** - * Init the code table with 1 byte entries and the EOD and CLEAR_TABLE - * markers. + * Init the code table with 1 byte entries and the EOD and CLEAR_TABLE markers. */ private List<byte[]> createCodeTable() { - List<byte[]> codeTable = new ArrayList<byte[]>(4096); + List<byte[]> codeTable = new ArrayList<>(4096); for (int i = 0; i < 256; ++i) { codeTable.add(new byte[] { (byte) (i & 0xFF) }); diff --git a/src/main/java/org/sejda/sambox/filter/Predictor.java b/src/main/java/org/sejda/sambox/filter/Predictor.java index d084f39679e3efd68965395203f9fc188f3ea74a..cb89b29df200891bb4dd78733fc627a47b3236b6 100644 --- a/src/main/java/org/sejda/sambox/filter/Predictor.java +++ b/src/main/java/org/sejda/sambox/filter/Predictor.java @@ -15,15 +15,19 @@ */ package org.sejda.sambox.filter; +import java.io.FilterOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.util.Arrays; import org.apache.commons.io.IOUtils; +import org.sejda.sambox.cos.COSDictionary; +import org.sejda.sambox.cos.COSName; /** - * Helper class to contain predictor decoding used by Flate and LZW filter. - * To see the history, look at the FlateFilter class. + * Helper class to contain predictor decoding used by Flate and LZW filter. To see the history, look at the FlateFilter + * class. */ public final class Predictor { @@ -31,9 +35,178 @@ public final class Predictor private Predictor() { } - - static void decodePredictor(int predictor, int colors, int bitsPerComponent, int columns, InputStream in, OutputStream out) - throws IOException + + /** + * Decodes a single line of data in-place. + * + * @param predictor Predictor value for the current line + * @param colors Number of color components, from decode parameters. + * @param bitsPerComponent Number of bits per components, from decode parameters. + * @param columns Number samples in a row, from decode parameters. + * @param actline Current (active) line to decode. Data will be decoded in-place, i.e. - the contents of this buffer + * will be modified. + * @param lastline The previous decoded line. When decoding the first line, this parameter should be an empty byte + * array of the same length as <code>actline</code>. + */ + static void decodePredictorRow(int predictor, int colors, int bitsPerComponent, int columns, + byte[] actline, byte[] lastline) + { + if (predictor == 1) + { + // no prediction + return; + } + final int bitsPerPixel = colors * bitsPerComponent; + final int bytesPerPixel = (bitsPerPixel + 7) / 8; + final int rowlength = actline.length; + switch (predictor) + { + case 2: + // PRED TIFF SUB + if (bitsPerComponent == 8) + { + // for 8 bits per component it is the same algorithm as PRED SUB of PNG format + for (int p = bytesPerPixel; p < rowlength; p++) + { + int sub = actline[p] & 0xff; + int left = actline[p - bytesPerPixel] & 0xff; + actline[p] = (byte) (sub + left); + } + break; + } + if (bitsPerComponent == 16) + { + for (int p = bytesPerPixel; p < rowlength; p += 2) + { + int sub = ((actline[p] & 0xff) << 8) + (actline[p + 1] & 0xff); + int left = (((actline[p - bytesPerPixel] & 0xff) << 8) + + (actline[p - bytesPerPixel + 1] & 0xff)); + actline[p] = (byte) (((sub + left) >> 8) & 0xff); + actline[p + 1] = (byte) ((sub + left) & 0xff); + } + break; + } + if (bitsPerComponent == 1 && colors == 1) + { + // bytesPerPixel cannot be used: + // "A row shall occupy a whole number of bytes, rounded up if necessary. + // Samples and their components shall be packed into bytes + // from high-order to low-order bits." + for (int p = 0; p < rowlength; p++) + { + for (int bit = 7; bit >= 0; --bit) + { + int sub = (actline[p] >> bit) & 1; + if (p == 0 && bit == 7) + { + continue; + } + int left; + if (bit == 7) + { + // use bit #0 from previous byte + left = actline[p - 1] & 1; + } + else + { + // use "previous" bit + left = (actline[p] >> (bit + 1)) & 1; + } + if (((sub + left) & 1) == 0) + { + // reset bit + actline[p] = (byte) (actline[p] & ~(1 << bit)); + } + else + { + // set bit + actline[p] = (byte) (actline[p] | (1 << bit)); + } + } + } + break; + } + // everything else, i.e. bpc 2 and 4, but has been tested for bpc 1 and 8 too + int elements = columns * colors; + for (int p = colors; p < elements; ++p) + { + int bytePosSub = p * bitsPerComponent / 8; + int bitPosSub = 8 - p * bitsPerComponent % 8 - bitsPerComponent; + int bytePosLeft = (p - colors) * bitsPerComponent / 8; + int bitPosLeft = 8 - (p - colors) * bitsPerComponent % 8 - bitsPerComponent; + + int sub = getBitSeq(actline[bytePosSub], bitPosSub, bitsPerComponent); + int left = getBitSeq(actline[bytePosLeft], bitPosLeft, bitsPerComponent); + actline[bytePosSub] = (byte) calcSetBitSeq(actline[bytePosSub], bitPosSub, + bitsPerComponent, sub + left); + } + break; + case 10: + // PRED NONE + // do nothing + break; + case 11: + // PRED SUB + for (int p = bytesPerPixel; p < rowlength; p++) + { + int sub = actline[p]; + int left = actline[p - bytesPerPixel]; + actline[p] = (byte) (sub + left); + } + break; + case 12: + // PRED UP + for (int p = 0; p < rowlength; p++) + { + int up = actline[p] & 0xff; + int prior = lastline[p] & 0xff; + actline[p] = (byte) ((up + prior) & 0xff); + } + break; + case 13: + // PRED AVG + for (int p = 0; p < rowlength; p++) + { + int avg = actline[p] & 0xff; + int left = p - bytesPerPixel >= 0 ? actline[p - bytesPerPixel] & 0xff : 0; + int up = lastline[p] & 0xff; + actline[p] = (byte) ((avg + (left + up) / 2) & 0xff); + } + break; + case 14: + // PRED PAETH + for (int p = 0; p < rowlength; p++) + { + int paeth = actline[p] & 0xff; + int a = p - bytesPerPixel >= 0 ? actline[p - bytesPerPixel] & 0xff : 0;// left + int b = lastline[p] & 0xff;// upper + int c = p - bytesPerPixel >= 0 ? lastline[p - bytesPerPixel] & 0xff : 0;// upperleft + int value = a + b - c; + int absa = Math.abs(value - a); + int absb = Math.abs(value - b); + int absc = Math.abs(value - c); + + if (absa <= absb && absa <= absc) + { + actline[p] = (byte) ((paeth + a) & 0xff); + } + else if (absb <= absc) + { + actline[p] = (byte) ((paeth + b) & 0xff); + } + else + { + actline[p] = (byte) ((paeth + c) & 0xff); + } + } + break; + default: + break; + } + } + + static void decodePredictor(int predictor, int colors, int bitsPerComponent, int columns, + InputStream in, OutputStream out) throws IOException { if (predictor == 1) { @@ -43,9 +216,7 @@ public final class Predictor else { // calculate sizes - final int bitsPerPixel = colors * bitsPerComponent; - final int bytesPerPixel = (bitsPerPixel + 7) / 8; - final int rowlength = (columns * bitsPerPixel + 7) / 8; + final int rowlength = calculateRowLength(colors, bitsPerComponent, columns); byte[] actline = new byte[rowlength]; byte[] lastline = new byte[rowlength]; @@ -69,162 +240,26 @@ public final class Predictor // read line int i, offset = 0; - while (offset < rowlength && ((i = in.read(actline, offset, rowlength - offset)) != -1)) + while (offset < rowlength + && ((i = in.read(actline, offset, rowlength - offset)) != -1)) { offset += i; } - // do prediction as specified in PNG-Specification 1.2 - switch (linepredictor) - { - case 2: - // PRED TIFF SUB - if (bitsPerComponent == 8) - { - // for 8 bits per component it is the same algorithm as PRED SUB of PNG format - for (int p = bytesPerPixel; p < rowlength; p++) - { - int sub = actline[p] & 0xff; - int left = actline[p - bytesPerPixel] & 0xff; - actline[p] = (byte) (sub + left); - } - break; - } - if (bitsPerComponent == 16) - { - for (int p = bytesPerPixel; p < rowlength; p += 2) - { - int sub = ((actline[p] & 0xff) << 8) + (actline[p + 1] & 0xff); - int left = (((actline[p - bytesPerPixel] & 0xff) << 8) - + (actline[p - bytesPerPixel + 1] & 0xff)); - actline[p] = (byte) (((sub + left) >> 8) & 0xff); - actline[p + 1] = (byte) ((sub + left) & 0xff); - } - break; - } - if (bitsPerComponent == 1 && colors == 1) - { - // bytesPerPixel cannot be used: - // "A row shall occupy a whole number of bytes, rounded up if necessary. - // Samples and their components shall be packed into bytes - // from high-order to low-order bits." - for (int p = 0; p < rowlength; p++) - { - for (int bit = 7; bit >= 0; --bit) - { - int sub = (actline[p] >> bit) & 1; - if (p == 0 && bit == 7) - { - continue; - } - int left; - if (bit == 7) - { - // use bit #0 from previous byte - left = actline[p - 1] & 1; - } - else - { - // use "previous" bit - left = (actline[p] >> (bit + 1)) & 1; - } - if (((sub + left) & 1) == 0) - { - // reset bit - actline[p] = (byte) (actline[p] & ~(1 << bit)); - } - else - { - // set bit - actline[p] = (byte) (actline[p] | (1 << bit)); - } - } - } - break; - } - // everything else, i.e. bpc 2 and 4, but has been tested for bpc 1 and 8 too - int elements = columns * colors; - for (int p = colors; p < elements; ++p) - { - int bytePosSub = p * bitsPerComponent / 8; - int bitPosSub = 8 - p * bitsPerComponent % 8 - bitsPerComponent; - int bytePosLeft = (p - colors) * bitsPerComponent / 8; - int bitPosLeft = 8 - (p - colors) * bitsPerComponent % 8 - bitsPerComponent; - - int sub = getBitSeq(actline[bytePosSub], bitPosSub, bitsPerComponent); - int left = getBitSeq(actline[bytePosLeft], bitPosLeft, bitsPerComponent); - actline[bytePosSub] = (byte) calcSetBitSeq(actline[bytePosSub], bitPosSub, - bitsPerComponent, sub + left); - } - break; - case 10: - // PRED NONE - // do nothing - break; - case 11: - // PRED SUB - for (int p = bytesPerPixel; p < rowlength; p++) - { - int sub = actline[p]; - int left = actline[p - bytesPerPixel]; - actline[p] = (byte) (sub + left); - } - break; - case 12: - // PRED UP - for (int p = 0; p < rowlength; p++) - { - int up = actline[p] & 0xff; - int prior = lastline[p] & 0xff; - actline[p] = (byte) ((up + prior) & 0xff); - } - break; - case 13: - // PRED AVG - for (int p = 0; p < rowlength; p++) - { - int avg = actline[p] & 0xff; - int left = p - bytesPerPixel >= 0 ? actline[p - bytesPerPixel] & 0xff : 0; - int up = lastline[p] & 0xff; - actline[p] = (byte) ((avg + (left + up) / 2) & 0xff); - } - break; - case 14: - // PRED PAETH - for (int p = 0; p < rowlength; p++) - { - int paeth = actline[p] & 0xff; - int a = p - bytesPerPixel >= 0 ? actline[p - bytesPerPixel] & 0xff : 0;// left - int b = lastline[p] & 0xff;// upper - int c = p - bytesPerPixel >= 0 ? lastline[p - bytesPerPixel] & 0xff : 0;// upperleft - int value = a + b - c; - int absa = Math.abs(value - a); - int absb = Math.abs(value - b); - int absc = Math.abs(value - c); - - if (absa <= absb && absa <= absc) - { - actline[p] = (byte) ((paeth + a) & 0xff); - } - else if (absb <= absc) - { - actline[p] = (byte) ((paeth + b) & 0xff); - } - else - { - actline[p] = (byte) ((paeth + c) & 0xff); - } - } - break; - default: - break; - } + decodePredictorRow(linepredictor, colors, bitsPerComponent, columns, actline, + lastline); System.arraycopy(actline, 0, lastline, 0, rowlength); out.write(actline); } } } + static int calculateRowLength(int colors, int bitsPerComponent, int columns) + { + final int bitsPerPixel = colors * bitsPerComponent; + return (columns * bitsPerPixel + 7) / 8; + } + // get value from bit interval from a byte static int getBitSeq(int by, int startBit, int bitSize) { @@ -241,4 +276,141 @@ public final class Predictor return (by & mask) | (truncatedVal << startBit); } + /** + * Wraps and <code>OutputStream</code> in a predictor decoding stream as necessary. If no predictor is specified by + * the parameters, the original stream is returned as is. + * + * @param out The stream to which decoded data should be written + * @param decodeParams Decode parameters for the stream + * @return An <code>OutputStream</code> is returned, which will write decoded data into the given stream. If no + * predictor is specified, the original stream is returned. + */ + static OutputStream wrapPredictor(OutputStream out, COSDictionary decodeParams) + { + int predictor = decodeParams.getInt(COSName.PREDICTOR); + if (predictor > 1) + { + int colors = Math.min(decodeParams.getInt(COSName.COLORS, 1), 32); + int bitsPerPixel = decodeParams.getInt(COSName.BITS_PER_COMPONENT, 8); + int columns = decodeParams.getInt(COSName.COLUMNS, 1); + + return new PredictorOutputStream(out, predictor, colors, bitsPerPixel, columns); + } + return out; + } + + /** + * Output stream that implements predictor decoding. Data is buffered until a complete row is available, which is + * then decoded and written to the underlying stream. The previous row is retained for decoding the next row. + */ + private static final class PredictorOutputStream extends FilterOutputStream + { + // current predictor type + private int predictor; + // image decode parameters + private final int colors; + private final int bitsPerComponent; + private final int columns; + private final int rowLength; + // PNG predictor (predictor>=10) means every row has a (potentially different) + // predictor value + private final boolean predictorPerRow; + + // data buffers + private byte[] currentRow, lastRow; + // amount of data in the current row + private int currentRowData = 0; + // was the per-row predictor value read for the current row being processed + private boolean predictorRead = false; + + PredictorOutputStream(OutputStream out, int predictor, int colors, int bitsPerComponent, + int columns) + { + super(out); + this.predictor = predictor; + this.colors = colors; + this.bitsPerComponent = bitsPerComponent; + this.columns = columns; + this.rowLength = calculateRowLength(colors, bitsPerComponent, columns); + this.predictorPerRow = predictor >= 10; + currentRow = new byte[rowLength]; + lastRow = new byte[rowLength]; + } + + @Override + public void write(byte[] bytes) throws IOException + { + write(bytes, 0, bytes.length); + } + + @Override + public void write(byte[] bytes, int off, int len) throws IOException + { + int currentOffset = off; + int maxOffset = currentOffset + len; + while (currentOffset < maxOffset) + { + if (predictorPerRow && currentRowData == 0 && !predictorRead) + { + // PNG predictor; each row starts with predictor type (0, 1, 2, 3, 4) + // read per line predictor, add 10 to tread value 0 as 10, 1 as 11, ... + predictor = bytes[currentOffset] + 10; + currentOffset++; + predictorRead = true; + } + else + { + int toRead = Math.min(rowLength - currentRowData, maxOffset - currentOffset); + System.arraycopy(bytes, currentOffset, currentRow, currentRowData, toRead); + currentRowData += toRead; + currentOffset += toRead; + + // current row is filled, decode it, write it to underlying stream, + // and reset the state. + if (currentRowData == currentRow.length) + { + decodeAndWriteRow(); + } + } + } + } + + private void decodeAndWriteRow() throws IOException + { + decodePredictorRow(predictor, colors, bitsPerComponent, columns, currentRow, lastRow); + out.write(currentRow); + flipRows(); + } + + /** + * Flips the row buffers (to avoid copying), and resets the current-row index and predictorRead flag + */ + private void flipRows() + { + byte[] temp = lastRow; + lastRow = currentRow; + currentRow = temp; + currentRowData = 0; + predictorRead = false; + } + + @Override + public void flush() throws IOException + { + // The last row is allowed to be incomplete, and should be completed with zeros. + if (currentRowData > 0) + { + Arrays.fill(currentRow, currentRowData, rowLength, (byte) 0); + decodeAndWriteRow(); + } + super.flush(); + } + + @Override + public void write(int i) throws IOException + { + throw new UnsupportedOperationException("Not supported"); + } + } + } diff --git a/src/main/java/org/sejda/sambox/filter/RunLengthDecodeFilter.java b/src/main/java/org/sejda/sambox/filter/RunLengthDecodeFilter.java index e93bbbda948eb96e314ce4243b76aaeafe44ec3d..ff8c2754bf75ce497afc41501d2152538fe23d47 100644 --- a/src/main/java/org/sejda/sambox/filter/RunLengthDecodeFilter.java +++ b/src/main/java/org/sejda/sambox/filter/RunLengthDecodeFilter.java @@ -47,9 +47,14 @@ final class RunLengthDecodeFilter extends Filter { int amountToCopy = dupAmount + 1; int compressedRead; - while(amountToCopy > 0) + while (amountToCopy > 0) { compressedRead = encoded.read(buffer, 0, amountToCopy); + // EOF reached? + if (compressedRead == -1) + { + break; + } decoded.write(buffer, 0, compressedRead); amountToCopy -= compressedRead; } @@ -57,6 +62,11 @@ final class RunLengthDecodeFilter extends Filter else { int dupByte = encoded.read(); + // EOF reached? + if (dupByte == -1) + { + break; + } for (int i = 0; i < 257 - dupAmount; i++) { decoded.write(dupByte); diff --git a/src/main/java/org/sejda/sambox/input/AbstractXrefTableParser.java b/src/main/java/org/sejda/sambox/input/AbstractXrefTableParser.java index a0778eb7a0c361bb23c556bdad7918f113ef4b5e..ea9939027d37121465ff4322b2e613bbed4cae9d 100644 --- a/src/main/java/org/sejda/sambox/input/AbstractXrefTableParser.java +++ b/src/main/java/org/sejda/sambox/input/AbstractXrefTableParser.java @@ -120,10 +120,12 @@ abstract class AbstractXrefTableParser onEntryFound(inUseEntry(currentObjectNumber, Long.parseLong(splitString[0]), Integer.parseInt(splitString[1]))); } - catch (NumberFormatException e) + catch (IllegalArgumentException e) { - throw new IOException("Corrupted xref table entry.", e); + throw new IOException( + "Corrupted xref table entry. Invalid xref line: " + currentLine, e); } + } else if (!"f".equals(entryType)) { diff --git a/src/main/java/org/sejda/sambox/input/LazyIndirectObjectsProvider.java b/src/main/java/org/sejda/sambox/input/LazyIndirectObjectsProvider.java index e76519fae4353acaadcc37f7406af26edb065571..a566cf641c66ac9846ccf271b13e53d9e4d279bc 100644 --- a/src/main/java/org/sejda/sambox/input/LazyIndirectObjectsProvider.java +++ b/src/main/java/org/sejda/sambox/input/LazyIndirectObjectsProvider.java @@ -207,6 +207,20 @@ class LazyIndirectObjectsProvider implements IndirectObjectsProvider { LOG.warn("Missing 'endobj' token for {}", xrefEntry); } + + if(found instanceof ExistingIndirectCOSObject) + { + ExistingIndirectCOSObject existingIndirectCOSObject = (ExistingIndirectCOSObject)found; + // does this point to itself? it would cause a StackOverflowError. Example: + // 9 0 obj + // 9 0 R + // endobj + if(existingIndirectCOSObject.id().objectIdentifier.equals(xrefEntry.key())) + { + LOG.warn("Found indirect object definition pointing to itself, for {}", xrefEntry); + found = COSNull.NULL; + } + } store.put(xrefEntry.key(), ofNullable(found).orElse(COSNull.NULL)); } diff --git a/src/main/java/org/sejda/sambox/input/SourceReader.java b/src/main/java/org/sejda/sambox/input/SourceReader.java index c85dfa520081ad904369238ba6856b9844200e5c..bac4f68bac9316aee4506658b83d1dd312a50fac 100644 --- a/src/main/java/org/sejda/sambox/input/SourceReader.java +++ b/src/main/java/org/sejda/sambox/input/SourceReader.java @@ -476,16 +476,34 @@ class SourceReader implements Closeable public final String readNumber() throws IOException { StringBuilder builder = pool.borrow(); + int lastAppended = -1; try { int c = source.read(); if (c != -1 && (isDigit(c) || c == '+' || c == '-' || c == '.')) { builder.append((char) c); + lastAppended = c; + + // Ignore double negative (this is consistent with Adobe Reader) + if (c == '-' && source.peek() == c) + { + source.read(); + } + while ((c = source.read()) != -1 && (isDigit(c) || c == '.' || c == 'E' || c == 'e' || c == '+' || c == '-')) { - builder.append((char) c); + if (c == '-' && !(lastAppended == 'e' || lastAppended == 'E')) + { + // PDFBOX-4064: ignore "-" in the middle of a number + // but not if its a negative exponent 1e-23 + } + else + { + builder.append((char) c); + lastAppended = c; + } } } unreadIfValid(c); diff --git a/src/main/java/org/sejda/sambox/output/ContentStreamWriter.java b/src/main/java/org/sejda/sambox/output/ContentStreamWriter.java index f8b21aa5d42966762136e1379afcef8c969c46b5..d14045690e562d0526cbbb2afd6ffb6ed18a79d2 100644 --- a/src/main/java/org/sejda/sambox/output/ContentStreamWriter.java +++ b/src/main/java/org/sejda/sambox/output/ContentStreamWriter.java @@ -123,7 +123,10 @@ public class ContentStreamWriter extends DefaultCOSWriter { key.accept(this); writeSpace(); - imageParams.getDictionaryObject(key).accept(this); + COSBase imageParamsDictionaryObject = imageParams.getDictionaryObject(key); + if(imageParamsDictionaryObject != null) { + imageParamsDictionaryObject.accept(this); + } writeEOL(); } writer().write(ID_OPERATOR.getBytes(StandardCharsets.US_ASCII)); diff --git a/src/main/java/org/sejda/sambox/output/DefaultCOSWriter.java b/src/main/java/org/sejda/sambox/output/DefaultCOSWriter.java index 4fbbd1bfab1a3b0a2bb982706ee72a93688d9624..45b0e069f01ad8ef1f8c6113ddab99ca6e9e1c02 100644 --- a/src/main/java/org/sejda/sambox/output/DefaultCOSWriter.java +++ b/src/main/java/org/sejda/sambox/output/DefaultCOSWriter.java @@ -139,7 +139,7 @@ class DefaultCOSWriter implements COSWriter public void visit(COSName value) throws IOException { writer.write(SOLIDUS); - byte[] bytes = value.getName().getBytes(StandardCharsets.US_ASCII); + byte[] bytes = value.getName().getBytes(StandardCharsets.UTF_8); for (int i = 0; i < bytes.length; i++) { int current = bytes[i] & 0xFF; diff --git a/src/main/java/org/sejda/sambox/output/DefaultPDFWriter.java b/src/main/java/org/sejda/sambox/output/DefaultPDFWriter.java index 417345bd13f1880bb5fb32d7df6fc7f95dd35133..05eefa116b2932aedaa797ad4bf2c55d1ca9d720 100644 --- a/src/main/java/org/sejda/sambox/output/DefaultPDFWriter.java +++ b/src/main/java/org/sejda/sambox/output/DefaultPDFWriter.java @@ -132,6 +132,10 @@ class DefaultPDFWriter implements Closeable */ public void writeXrefStream(COSDictionary trailer) throws IOException { + if (nonNull(writer.context().addWritten(XrefEntry.DEFAULT_FREE_ENTRY))) + { + LOG.warn("Reserved object number 0 has been overwritten with the expected free entry"); + } writeXrefStream(trailer, -1); } diff --git a/src/main/java/org/sejda/sambox/pdmodel/PDDocument.java b/src/main/java/org/sejda/sambox/pdmodel/PDDocument.java index 81a8f13309c0d7493dae138c69faa19a541f1662..160f18b4b9a5470ea2d2a7797af77efec4e47be7 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/PDDocument.java +++ b/src/main/java/org/sejda/sambox/pdmodel/PDDocument.java @@ -40,6 +40,7 @@ import java.util.Optional; import java.util.Set; import org.sejda.io.CountingWritableByteChannel; +import org.sejda.io.SeekableSources; import org.sejda.sambox.cos.COSArray; import org.sejda.sambox.cos.COSBase; import org.sejda.sambox.cos.COSDictionary; @@ -52,13 +53,14 @@ import org.sejda.sambox.cos.DirectCOSObject; import org.sejda.sambox.encryption.EncryptionContext; import org.sejda.sambox.encryption.MessageDigests; import org.sejda.sambox.encryption.StandardSecurity; +import org.sejda.sambox.input.PDFParser; import org.sejda.sambox.output.PDDocumentWriter; import org.sejda.sambox.output.WriteOption; import org.sejda.sambox.pdmodel.common.PDStream; import org.sejda.sambox.pdmodel.encryption.AccessPermission; import org.sejda.sambox.pdmodel.encryption.PDEncryption; import org.sejda.sambox.pdmodel.encryption.SecurityHandler; -import org.sejda.sambox.pdmodel.font.PDFont; +import org.sejda.sambox.pdmodel.font.Subsettable; import org.sejda.sambox.pdmodel.graphics.color.PDDeviceRGB; import org.sejda.sambox.util.Version; import org.sejda.util.IOUtils; @@ -108,7 +110,7 @@ public class PDDocument implements Closeable private ResourceCache resourceCache = new DefaultResourceCache(); // fonts to subset before saving - private final Set<PDFont> fontsToSubset = new HashSet<>(); + private final Set<Subsettable> fontsToSubset = new HashSet<>(); public PDDocument() { @@ -286,7 +288,7 @@ public class PDDocument implements Closeable /** * @return the list of fonts which will be subset before the document is saved. */ - Set<PDFont> getFontsToSubset() + public Set<Subsettable> getFontsToSubset() { return fontsToSubset; } @@ -569,7 +571,7 @@ public class PDDocument implements Closeable requireOpen(); getDocumentInformation().setProducer("SAMBox " + Version.getVersion() + " (www.sejda.org)"); getDocumentInformation().setModificationDate(Calendar.getInstance()); - for (PDFont font : fontsToSubset) + for (Subsettable font : fontsToSubset) { font.subset(); } @@ -637,4 +639,9 @@ public class PDDocument implements Closeable return resourceCache; } + // bridge to pdfbox style api, used in tests + public static PDDocument load(File file) throws IOException { + return PDFParser.parse(SeekableSources.seekableSourceFrom(file)); + } + } diff --git a/src/main/java/org/sejda/sambox/pdmodel/PDDocumentCatalog.java b/src/main/java/org/sejda/sambox/pdmodel/PDDocumentCatalog.java index fa8057369b2fb3b924100b1e651b808a8bfce2eb..7aa002be8ad4f8bd5a70bf2a486475736eb21b57 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/PDDocumentCatalog.java +++ b/src/main/java/org/sejda/sambox/pdmodel/PDDocumentCatalog.java @@ -16,6 +16,7 @@ */ package org.sejda.sambox.pdmodel; +import static java.util.Objects.nonNull; import static java.util.Optional.ofNullable; import static org.sejda.sambox.util.SpecVersionUtils.V1_5; @@ -241,27 +242,23 @@ public class PDDocumentCatalog implements COSObjectable * Get the Document Open Action for this object. * * @return The action to perform when the document is opened. - * @throws IOException If there is an error creating the destination or action. */ public PDDestinationOrAction getOpenAction() throws IOException { COSBase openAction = root.getDictionaryObject(COSName.OPEN_ACTION); - if (openAction == null) + if (nonNull(openAction)) { - return null; - } - else if (openAction instanceof COSDictionary) - { - return PDActionFactory.createAction((COSDictionary) openAction); - } - else if (openAction instanceof COSArray) - { - return PDDestination.create(openAction); - } - else - { - throw new IOException("Unknown OpenAction " + openAction); + if (openAction instanceof COSDictionary) + { + return PDActionFactory.createAction((COSDictionary) openAction); + } + else if (openAction instanceof COSArray) + { + return PDDestination.create(openAction); + } + LOG.warn("Invalid OpenAction {}", openAction); } + return null; } /** diff --git a/src/main/java/org/sejda/sambox/pdmodel/PDPage.java b/src/main/java/org/sejda/sambox/pdmodel/PDPage.java index 4c56e5b06730b91c939376168c9b7a266819d741..16cd350ca5f4dd88b5922a638f367d27fcb27ed9 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/PDPage.java +++ b/src/main/java/org/sejda/sambox/pdmodel/PDPage.java @@ -44,7 +44,9 @@ import org.sejda.sambox.pdmodel.common.PDMetadata; import org.sejda.sambox.pdmodel.common.PDRectangle; import org.sejda.sambox.pdmodel.common.PDStream; import org.sejda.sambox.pdmodel.interactive.action.PDPageAdditionalActions; +import org.sejda.sambox.pdmodel.interactive.annotation.AnnotationFilter; import org.sejda.sambox.pdmodel.interactive.annotation.PDAnnotation; +import org.sejda.sambox.pdmodel.interactive.measurement.PDViewportDictionary; import org.sejda.sambox.pdmodel.interactive.pagenavigation.PDThreadBead; import org.sejda.sambox.pdmodel.interactive.pagenavigation.PDTransition; import org.sejda.sambox.util.Matrix; @@ -272,10 +274,10 @@ public class PDPage implements COSObjectable, PDContentStream { if (mediaBox == null) { - COSArray array = (COSArray) PDPageTree.getInheritableAttribute(page, COSName.MEDIA_BOX); - if (array != null) + COSBase base = PDPageTree.getInheritableAttribute(page, COSName.MEDIA_BOX); + if (base instanceof COSArray) { - mediaBox = new PDRectangle(array); + mediaBox = new PDRectangle((COSArray) base); } } if (mediaBox == null) @@ -316,11 +318,19 @@ public class PDPage implements COSObjectable, PDContentStream */ public PDRectangle getCropBox() { - COSArray array = (COSArray) PDPageTree.getInheritableAttribute(page, COSName.CROP_BOX); - if (array != null) + try { - return clipToMediaBox(new PDRectangle(array)); + COSBase base = PDPageTree.getInheritableAttribute(page, COSName.CROP_BOX); + if (base instanceof COSArray) + { + return clipToMediaBox(new PDRectangle((COSArray) base)); + } + } + catch (Exception ex) + { + LOG.debug("An error occurred parsing the crop box", ex); } + return getMediaBox(); } @@ -355,10 +365,20 @@ public class PDPage implements COSObjectable, PDContentStream */ public PDRectangle getBleedBox() { - COSArray array = page.getDictionaryObject(COSName.BLEED_BOX, COSArray.class); - if (nonNull(array) && inMediaBoxBounds(new PDRectangle(array))) + try { - return new PDRectangle(array); + COSBase base = page.getDictionaryObject(COSName.BLEED_BOX); + if (base instanceof COSArray) + { + COSArray array = (COSArray) base; + if(inMediaBoxBounds(new PDRectangle(array))) { + return new PDRectangle((COSArray) base); + } + } + } + catch (Exception ex) + { + LOG.debug("An error occurred parsing page bleed box", ex); } return getCropBox(); } @@ -394,10 +414,21 @@ public class PDPage implements COSObjectable, PDContentStream */ public PDRectangle getTrimBox() { - COSArray array = page.getDictionaryObject(COSName.TRIM_BOX, COSArray.class); - if (nonNull(array) && inMediaBoxBounds(new PDRectangle(array))) + try + { + COSBase base = page.getDictionaryObject(COSName.TRIM_BOX); + if (base instanceof COSArray) + { + COSArray array = (COSArray) base; + if(inMediaBoxBounds(new PDRectangle(array))) + { + return new PDRectangle(array); + } + } + } + catch (Exception ex) { - return new PDRectangle(array); + LOG.debug("An error occurred parsing page trim box", ex); } return getCropBox(); } @@ -433,10 +464,21 @@ public class PDPage implements COSObjectable, PDContentStream */ public PDRectangle getArtBox() { - COSArray array = page.getDictionaryObject(COSName.ART_BOX, COSArray.class); - if (nonNull(array) && inMediaBoxBounds(new PDRectangle(array))) + try + { + COSBase base = page.getDictionaryObject(COSName.ART_BOX); + if (base instanceof COSArray) + { + COSArray array = (COSArray) base; + if(inMediaBoxBounds(new PDRectangle(array))) + { + return new PDRectangle(array); + } + } + } + catch (Exception ex) { - return new PDRectangle(array); + LOG.debug("An error occurred parsing page art box", ex); } return getCropBox(); } @@ -665,11 +707,33 @@ public class PDPage implements COSObjectable, PDContentStream } /** - * This will return a list of the Annotations for this page. - * - * @return List of the PDAnnotation objects, never null. + * This will return a list of the annotations for this page. + * + * @return List of the PDAnnotation objects, never null. The returned list is backed by the + * annotations COSArray, so any adding or deleting in this list will change the document too. + * */ public List<PDAnnotation> getAnnotations() + { + return getAnnotations(new AnnotationFilter() + { + @Override + public boolean accept(PDAnnotation annotation) + { + return true; + } + }); + } + + /** + * This will return a list of the annotations for this page. + * + * @param annotationFilter the annotation filter provided allowing to filter out specific annotations + * @return List of the PDAnnotation objects, never null. The returned list is backed by the + * annotations COSArray, so any adding or deleting in this list will change the document too. + * + */ + public List<PDAnnotation> getAnnotations(AnnotationFilter annotationFilter) { COSArray annots = page.getDictionaryObject(COSName.ANNOTS, COSArray.class); if (annots == null) @@ -686,7 +750,7 @@ public class PDPage implements COSObjectable, PDContentStream LOG.warn("Ignored annotation expected to be a dictionary but was {}", item); return null; }); - if (nonNull(annotation)) + if (nonNull(annotation) && annotationFilter.accept(annotation)) { actuals.add(annotation); } @@ -755,4 +819,53 @@ public class PDPage implements COSObjectable, PDContentStream { return resourceCache; } + + /** + * Get the viewports. + * + * @return a list of viewports or null if there is no /VP entry. + */ + public List<PDViewportDictionary> getViewports() + { + COSBase base = page.getDictionaryObject(COSName.VP); + if (!(base instanceof COSArray)) + { + return null; + } + COSArray array = (COSArray) base; + List<PDViewportDictionary> viewports = new ArrayList<PDViewportDictionary>(); + for (int i = 0; i < array.size(); ++i) + { + COSBase base2 = array.getObject(i); + if (base2 instanceof COSDictionary) + { + viewports.add(new PDViewportDictionary((COSDictionary) base2)); + } + else + { + LOG.warn("Array element {} is skipped, must be a (viewport) dictionary", base2); + } + } + return viewports; + } + + /** + * Set the viewports. + * + * @param viewports A list of viewports, or null if the entry is to be deleted. + */ + public void setViewports(List<PDViewportDictionary> viewports) + { + if (viewports == null) + { + page.removeItem(COSName.VP); + return; + } + COSArray array = new COSArray(); + for (PDViewportDictionary viewport : viewports) + { + array.add(viewport); + } + page.setItem(COSName.VP, array); + } } diff --git a/src/main/java/org/sejda/sambox/pdmodel/PDPageContentStream.java b/src/main/java/org/sejda/sambox/pdmodel/PDPageContentStream.java index 68b9ff4b505ec1996f96b0bb02335f3cdc8a36c9..3f4248c2d1a0dccc05f307dc1172f9d87ef4c355 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/PDPageContentStream.java +++ b/src/main/java/org/sejda/sambox/pdmodel/PDPageContentStream.java @@ -52,6 +52,7 @@ import org.sejda.sambox.pdmodel.graphics.color.PDSeparation; import org.sejda.sambox.pdmodel.graphics.form.PDFormXObject; import org.sejda.sambox.pdmodel.graphics.image.PDImageXObject; import org.sejda.sambox.pdmodel.graphics.image.PDInlineImage; +import org.sejda.sambox.pdmodel.graphics.pattern.PDTilingPattern; import org.sejda.sambox.pdmodel.graphics.shading.PDShading; import org.sejda.sambox.pdmodel.graphics.state.PDExtendedGraphicsState; import org.sejda.sambox.pdmodel.graphics.state.RenderingMode; @@ -262,6 +263,26 @@ public final class PDPageContentStream implements Closeable formatDecimal.setGroupingUsed(false); } + /** + * Create a new appearance stream. Note that this is not actually a "page" content stream. + * + * @param doc The document the appearance is part of. + * @param pattern The pattern to add to. + * @param outputStream The output stream to write to. + * @throws IOException If there is an error writing to the page contents. + */ + public PDPageContentStream(PDDocument doc, PDTilingPattern pattern, ContentStreamWriter writer) + throws IOException + { + this.document = doc; + + this.writer = writer; + this.resources = pattern.getResources(); + + formatDecimal.setMaximumFractionDigits(4); + formatDecimal.setGroupingUsed(false); + } + /** * Begin some text operations. * @@ -331,6 +352,40 @@ public final class PDPageContentStream implements Closeable writeOperator("Tf"); } + /** + * Shows the given text at the location specified by the current text matrix with the given interspersed + * positioning. This allows the user to efficiently position each glyph or sequence of glyphs. + * + * @param textWithPositioningArray An array consisting of String and Float types. Each String is output to the page + * using the current text matrix. Using the default coordinate system, each interspersed number adjusts the current + * text matrix by translating to the left or down for horizontal and vertical text respectively. The number is + * expressed in thousands of a text space unit, and may be negative. + * + * @throws IOException if an io exception occurs. + */ + public void showTextWithPositioning(Object[] textWithPositioningArray) throws IOException + { + write("["); + for (Object obj : textWithPositioningArray) + { + if (obj instanceof String) + { + showTextInternal((String) obj); + } + else if (obj instanceof Float) + { + writeOperand((Float) obj); + } + else + { + throw new IllegalArgumentException( + "Argument must consist of array of Float and String types"); + } + } + write("] "); + writeOperator("TJ"); + } + /** * Shows the given text at the location specified by the current text matrix. * @@ -338,6 +393,19 @@ public final class PDPageContentStream implements Closeable * @throws IOException If an io exception occurs. */ public void showText(String text) throws IOException + { + showTextInternal(text); + writer.writeSpace(); + writeOperator("Tj"); + } + + /** + * Shows the given text at the location specified by the current text matrix. + * + * @param text The Unicode text to show. + * @throws IOException If an io exception occurs. + */ + protected void showTextInternal(String text) throws IOException { if (!inTextMode) { @@ -354,7 +422,8 @@ public final class PDPageContentStream implements Closeable // Unicode code points to keep when subsetting if (font.willBeSubset()) { - for (int offset = 0; offset < text.length();) + int offset = 0; + while (offset < text.length()) { int codePoint = text.codePointAt(offset); font.addToSubset(codePoint); @@ -363,8 +432,6 @@ public final class PDPageContentStream implements Closeable } COSString.newInstance(font.encode(text)).accept(writer); - writer.writeSpace(); - writeOperator("Tj"); } /** @@ -373,9 +440,9 @@ public final class PDPageContentStream implements Closeable * @param leading The leading in unscaled text units. * @throws IOException If there is an error writing to the stream. */ - public void setLeading(double leading) throws IOException + public void setLeading(float leading) throws IOException { - writeOperand((float) leading); + writeOperand(leading); writeOperator("TL"); } @@ -635,6 +702,11 @@ public final class PDPageContentStream implements Closeable */ public void transform(Matrix matrix) throws IOException { + if (inTextMode) + { + LOG.warn( + "Modifying the current transformation matrix is not allowed within text objects."); + } writeAffineTransform(matrix.createAffineTransform()); writeOperator("cm"); } @@ -646,6 +718,10 @@ public final class PDPageContentStream implements Closeable */ public void saveGraphicsState() throws IOException { + if (inTextMode) + { + LOG.warn("Saving the graphics state is not allowed within text objects."); + } if (!fontStack.isEmpty()) { fontStack.push(fontStack.peek()); @@ -668,6 +744,10 @@ public final class PDPageContentStream implements Closeable */ public void restoreGraphicsState() throws IOException { + if (inTextMode) + { + LOG.warn("Restoring the graphics state is not allowed within text objects."); + } if (!fontStack.isEmpty()) { fontStack.pop(); @@ -690,10 +770,7 @@ public final class PDPageContentStream implements Closeable { return COSName.getPDFName(colorSpace.getName()); } - else - { - return resources.add(colorSpace); - } + return resources.add(colorSpace); } public void setTextRenderingMode(RenderingMode renderingMode) throws IOException @@ -812,13 +889,13 @@ public final class PDPageContentStream implements Closeable * @throws IOException If an IO error occurs while writing to the stream. * @throws IllegalArgumentException If the parameter is invalid. */ - public void setStrokingColor(double g) throws IOException + public void setStrokingColor(float g) throws IOException { if (isOutsideOneInterval(g)) { throw new IllegalArgumentException("Parameter must be within 0..1, but is " + g); } - writeOperand((float) g); + writeOperand(g); writeOperator("G"); setStrokingColorSpaceStack(PDDeviceGray.INSTANCE); } @@ -929,7 +1006,7 @@ public final class PDPageContentStream implements Closeable * @param k The black value. * @throws IOException If an IO error occurs while writing to the stream. */ - public void setNonStrokingColor(double c, double m, double y, double k) throws IOException + public void setNonStrokingColor(float c, float m, float y, float k) throws IOException { if (isOutsideOneInterval(c) || isOutsideOneInterval(m) || isOutsideOneInterval(y) || isOutsideOneInterval(k)) @@ -937,10 +1014,10 @@ public final class PDPageContentStream implements Closeable throw new IllegalArgumentException("Parameters must be within 0..1, but are " + String.format("(%.2f,%.2f,%.2f,%.2f)", c, m, y, k)); } - writeOperand((float) c); - writeOperand((float) m); - writeOperand((float) y); - writeOperand((float) k); + writeOperand(c); + writeOperand(m); + writeOperand(y); + writeOperand(k); writeOperator("k"); setNonStrokingColorSpaceStack(PDDeviceCMYK.INSTANCE); } @@ -1569,12 +1646,12 @@ public final class PDPageContentStream implements Closeable IOUtils.close(writer); } - private boolean isOutside255Interval(int val) + private static boolean isOutside255Interval(int val) { return val < 0 || val > 255; } - private boolean isOutsideOneInterval(double val) + private static boolean isOutsideOneInterval(double val) { return val < 0 || val > 1; } @@ -1654,4 +1731,18 @@ public final class PDPageContentStream implements Closeable writeOperand(scale); writeOperator("Tz"); } + + /** + * Set the text rise value, i.e. move the baseline up or down. This is useful for drawing superscripts or + * subscripts. + * + * @param rise Specifies the distance, in unscaled text space units, to move the baseline up or down from its + * default location. 0 restores the default location. + * @throws IOException + */ + public void setTextRise(float rise) throws IOException + { + writeOperand(rise); + writeOperator("Ts"); + } } diff --git a/src/main/java/org/sejda/sambox/pdmodel/common/PDNumberTreeNode.java b/src/main/java/org/sejda/sambox/pdmodel/common/PDNumberTreeNode.java index 6291a0cb730b798a493d8c804dd1eef7b50a03e7..73f256242e0a7dc97d6b9da078ee82d7f78c0404 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/common/PDNumberTreeNode.java +++ b/src/main/java/org/sejda/sambox/pdmodel/common/PDNumberTreeNode.java @@ -175,13 +175,20 @@ public class PDNumberTreeNode implements COSObjectable public Map<Integer, COSObjectable> getNumbers() throws IOException { Map<Integer, COSObjectable> indices = null; - COSArray namesArray = (COSArray) node.getDictionaryObject(COSName.NUMS); - if (namesArray != null) + COSArray namesArray = node.getDictionaryObject(COSName.NUMS, COSArray.class); + if (nonNull(namesArray)) { indices = new HashMap<>(); for (int i = 0; i < namesArray.size(); i += 2) { - COSInteger key = (COSInteger) namesArray.getObject(i); + COSBase base = namesArray.getObject(i); + if (!(base instanceof COSInteger)) + { + LOG.error("page labels ignored, index {} should be a number, but is {}", i, + base); + return null; + } + COSInteger key = (COSInteger) base; COSBase cosValue = namesArray.getObject(i + 1); COSObjectable pdValue = convertCOSToPD(cosValue); indices.put(key.intValue(), pdValue); diff --git a/src/main/java/org/sejda/sambox/pdmodel/common/PDPageLabelRange.java b/src/main/java/org/sejda/sambox/pdmodel/common/PDPageLabelRange.java index 059254f9f1b60ccb07c836a7bab35334d99d362a..34a3f2a41c01f18bd52504ced998ebae25869c8b 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/common/PDPageLabelRange.java +++ b/src/main/java/org/sejda/sambox/pdmodel/common/PDPageLabelRange.java @@ -86,6 +86,20 @@ public class PDPageLabelRange implements COSObjectable root = dict; } + public PDPageLabelRange(String style, String prefix, Integer start) { + this(); + + if(style != null) { + setStyle(style); + } + if(prefix != null) { + setPrefix(prefix); + } + if(start != null) { + setStart(start); + } + } + /** * Returns the underlying dictionary. * @@ -136,6 +150,13 @@ public class PDPageLabelRange implements COSObjectable return root.getInt(KEY_START, 1); } + /** + * @return true if the start value for page numbering is defined, false otherwise. + */ + public boolean hasStart() { + return root.getInt(KEY_START) != -1; + } + /** * Sets the start value for page numbering in this page range. * diff --git a/src/main/java/org/sejda/sambox/pdmodel/common/PDPageLabels.java b/src/main/java/org/sejda/sambox/pdmodel/common/PDPageLabels.java index 106b4d884fb98333a82523cb87461aa177dc2362..23348ba707cfa226334913597e2ce974546cfd6c 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/common/PDPageLabels.java +++ b/src/main/java/org/sejda/sambox/pdmodel/common/PDPageLabels.java @@ -20,6 +20,7 @@ import static java.util.Objects.nonNull; import static org.sejda.util.RequireUtils.requireArg; import java.io.IOException; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -142,6 +143,10 @@ public class PDPageLabels implements COSObjectable labels.put(startPage, item); } + public Map<Integer, PDPageLabelRange> getLabels() { + return Collections.unmodifiableMap(labels); + } + @Override public COSBase getCOSObject() { diff --git a/src/main/java/org/sejda/sambox/pdmodel/common/filespecification/PDFileSpecification.java b/src/main/java/org/sejda/sambox/pdmodel/common/filespecification/PDFileSpecification.java index d36f9e5d13e68522ea504e24bb3118ea2f8de159..a2d1cc4462aa709d9f7cab801ab86cc5f060221c 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/common/filespecification/PDFileSpecification.java +++ b/src/main/java/org/sejda/sambox/pdmodel/common/filespecification/PDFileSpecification.java @@ -16,7 +16,12 @@ */ package org.sejda.sambox.pdmodel.common.filespecification; +import org.sejda.sambox.cos.COSBase; +import org.sejda.sambox.cos.COSDictionary; import org.sejda.sambox.cos.COSObjectable; +import org.sejda.sambox.cos.COSString; + +import java.io.IOException; /** * This represents a file specification. @@ -26,6 +31,38 @@ import org.sejda.sambox.cos.COSObjectable; public interface PDFileSpecification extends COSObjectable { + /** + * A file specfication can either be a COSString or a COSDictionary. This + * will create the file specification either way. + * + * @param base The cos object that describes the fs. + * + * @return The file specification for the COSBase object. + * + * @throws IOException If there is an error creating the file spec. + */ + static PDFileSpecification createFS( COSBase base ) throws IOException + { + PDFileSpecification retval = null; + if( base == null ) + { + //then simply return null + } + else if( base instanceof COSString ) + { + retval = new PDSimpleFileSpecification( (COSString)base ); + } + else if( base instanceof COSDictionary) + { + retval = new PDComplexFileSpecification( (COSDictionary)base ); + } + else + { + throw new IOException( "Error: Unknown file specification " + base ); + } + return retval; + } + /** * @return The file name. */ diff --git a/src/main/java/org/sejda/sambox/pdmodel/documentinterchange/logicalstructure/PDStructureElement.java b/src/main/java/org/sejda/sambox/pdmodel/documentinterchange/logicalstructure/PDStructureElement.java index 641946018235a01b3be23b5189d2637c759917b0..ff4e0425db8769c3dfa59bc62c60fc89eb008e31 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/documentinterchange/logicalstructure/PDStructureElement.java +++ b/src/main/java/org/sejda/sambox/pdmodel/documentinterchange/logicalstructure/PDStructureElement.java @@ -89,12 +89,13 @@ public class PDStructureElement extends PDStructureNode */ public PDStructureNode getParent() { - COSDictionary p = (COSDictionary) this.getCOSObject().getDictionaryObject(COSName.P); - if (p == null) + COSBase base = this.getCOSObject().getDictionaryObject(COSName.P); + if (base instanceof COSDictionary) { - return null; + return PDStructureNode.create((COSDictionary) base); } - return PDStructureNode.create(p); + + return null; } /** @@ -136,12 +137,12 @@ public class PDStructureElement extends PDStructureNode */ public PDPage getPage() { - COSDictionary pageDic = (COSDictionary) this.getCOSObject().getDictionaryObject(COSName.PG); - if (pageDic == null) + COSBase base = this.getCOSObject().getDictionaryObject(COSName.PG); + if (base instanceof COSDictionary) { - return null; + return new PDPage((COSDictionary) base); } - return new PDPage(pageDic); + return null; } /** @@ -163,7 +164,7 @@ public class PDStructureElement extends PDStructureNode public Revisions<PDAttributeObject> getAttributes() { Revisions<PDAttributeObject> attributes = - new Revisions<PDAttributeObject>(); + new Revisions<>(); COSBase a = this.getCOSObject().getDictionaryObject(COSName.A); if (a instanceof COSArray) { @@ -172,7 +173,7 @@ public class PDStructureElement extends PDStructureNode PDAttributeObject ao = null; while (it.hasNext()) { - COSBase item = it.next(); + COSBase item = it.next().getCOSObject(); if (item instanceof COSDictionary) { ao = PDAttributeObject.create((COSDictionary) item); @@ -325,7 +326,7 @@ public class PDStructureElement extends PDStructureNode public Revisions<String> getClassNames() { COSName key = COSName.C; - Revisions<String> classNames = new Revisions<String>(); + Revisions<String> classNames = new Revisions<>(); COSBase c = this.getCOSObject().getDictionaryObject(key); if (c instanceof COSName) { @@ -338,7 +339,7 @@ public class PDStructureElement extends PDStructureNode String className = null; while (it.hasNext()) { - COSBase item = it.next(); + COSBase item = it.next().getCOSObject(); if (item instanceof COSName) { className = ((COSName) item).getName(); diff --git a/src/main/java/org/sejda/sambox/pdmodel/documentinterchange/logicalstructure/PDStructureTreeRoot.java b/src/main/java/org/sejda/sambox/pdmodel/documentinterchange/logicalstructure/PDStructureTreeRoot.java index 38693082d04221ec3c7abb7232a463c7045ee60b..86e123b03c456d706ca79d825db686aa02c38e50 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/documentinterchange/logicalstructure/PDStructureTreeRoot.java +++ b/src/main/java/org/sejda/sambox/pdmodel/documentinterchange/logicalstructure/PDStructureTreeRoot.java @@ -75,22 +75,19 @@ public class PDStructureTreeRoot extends PDStructureNode public COSArray getKArray() { COSBase k = this.getCOSObject().getDictionaryObject(COSName.K); - if (k != null) + if (k instanceof COSDictionary) { - if (k instanceof COSDictionary) - { - COSDictionary kdict = (COSDictionary) k; - k = kdict.getDictionaryObject(COSName.K); - if (k instanceof COSArray) - { - return (COSArray) k; - } - } - else + COSDictionary kdict = (COSDictionary) k; + k = kdict.getDictionaryObject(COSName.K); + if (k instanceof COSArray) { return (COSArray) k; } } + else if (k instanceof COSArray) + { + return (COSArray) k; + } return null; } @@ -121,10 +118,10 @@ public class PDStructureTreeRoot extends PDStructureNode */ public PDNameTreeNode<PDStructureElement> getIDTree() { - COSDictionary idTreeDic = (COSDictionary) this.getCOSObject().getDictionaryObject(COSName.ID_TREE); - if (idTreeDic != null) + COSBase base = this.getCOSObject().getDictionaryObject(COSName.ID_TREE); + if (base instanceof COSDictionary) { - return new PDStructureElementNameTreeNode(idTreeDic); + return new PDStructureElementNameTreeNode((COSDictionary) base); } return null; } @@ -146,10 +143,10 @@ public class PDStructureTreeRoot extends PDStructureNode */ public PDNumberTreeNode getParentTree() { - COSDictionary parentTreeDic = (COSDictionary) this.getCOSObject().getDictionaryObject(COSName.PARENT_TREE); - if (parentTreeDic != null) + COSBase base = getCOSObject().getDictionaryObject(COSName.PARENT_TREE); + if (base instanceof COSDictionary) { - return new PDNumberTreeNode(parentTreeDic, COSBase.class); + return new PDNumberTreeNode((COSDictionary) base, COSBase.class); } return null; } diff --git a/src/main/java/org/sejda/sambox/pdmodel/font/FileSystemFontProvider.java b/src/main/java/org/sejda/sambox/pdmodel/font/FileSystemFontProvider.java index 7795832912e2ff491f1a607282763f12519fa68f..d66a86322001735fcb4be664113f300448ae1d3a 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/font/FileSystemFontProvider.java +++ b/src/main/java/org/sejda/sambox/pdmodel/font/FileSystemFontProvider.java @@ -61,6 +61,7 @@ final class FileSystemFontProvider extends FontProvider private final List<FSFontInfo> fontInfoList = new ArrayList<>(); private final FontCache cache; + private boolean initialized = false; private static class FSFontInfo extends FontInfo { @@ -136,7 +137,10 @@ final class FileSystemFontProvider extends FontProvider default: throw new RuntimeException("can't happen"); } - parent.cache.addFont(this, font); + if(font != null) + { + parent.cache.addFont(this, font); + } return font; } @@ -200,6 +204,19 @@ final class FileSystemFontProvider extends FontProvider FileSystemFontProvider(FontCache cache) { this.cache = cache; + // init block moved to lazy initialization when required + } + + private synchronized void initializeIfRequired() + { + if(!this.initialized) { + initialize(); + this.initialized = true; + } + } + + private void initialize() + { try { LOG.trace("Will search the local system for fonts"); @@ -276,57 +293,60 @@ final class FileSystemFontProvider extends FontProvider */ private void saveDiskCache() { - File file = getDiskCacheFile(); - try (BufferedWriter writer = new BufferedWriter(new FileWriter(file))) + try { - for (FSFontInfo fontInfo : fontInfoList) + File file = getDiskCacheFile(); + try (BufferedWriter writer = new BufferedWriter(new FileWriter(file))) { - writer.write(fontInfo.postScriptName.trim().replace("|", "\\|")); - writer.write(FONT_CACHE_SEPARATOR); - writer.write(fontInfo.format.toString()); - writer.write(FONT_CACHE_SEPARATOR); - if (fontInfo.cidSystemInfo != null) - { - writer.write(fontInfo.cidSystemInfo.getRegistry() + '-' - + fontInfo.cidSystemInfo.getOrdering() + '-' - + fontInfo.cidSystemInfo.getSupplement()); - } - writer.write(FONT_CACHE_SEPARATOR); - if (fontInfo.usWeightClass > -1) - { - writer.write(Integer.toHexString(fontInfo.usWeightClass)); - } - writer.write(FONT_CACHE_SEPARATOR); - if (fontInfo.sFamilyClass > -1) - { - writer.write(Integer.toHexString(fontInfo.sFamilyClass)); - } - writer.write(FONT_CACHE_SEPARATOR); - writer.write(Integer.toHexString(fontInfo.ulCodePageRange1)); - writer.write(FONT_CACHE_SEPARATOR); - writer.write(Integer.toHexString(fontInfo.ulCodePageRange2)); - writer.write(FONT_CACHE_SEPARATOR); - if (fontInfo.macStyle > -1) + for (FSFontInfo fontInfo : fontInfoList) { - writer.write(Integer.toHexString(fontInfo.macStyle)); - } - writer.write(FONT_CACHE_SEPARATOR); - if (fontInfo.panose != null) - { - byte[] bytes = fontInfo.panose.getBytes(); - for (int i = 0; i < 10; i++) + writer.write(fontInfo.postScriptName.trim().replace("|", "\\|")); + writer.write(FONT_CACHE_SEPARATOR); + writer.write(fontInfo.format.toString()); + writer.write(FONT_CACHE_SEPARATOR); + if (fontInfo.cidSystemInfo != null) + { + writer.write( + fontInfo.cidSystemInfo.getRegistry() + '-' + fontInfo.cidSystemInfo.getOrdering() + '-' + + fontInfo.cidSystemInfo.getSupplement()); + } + writer.write(FONT_CACHE_SEPARATOR); + if (fontInfo.usWeightClass > -1) + { + writer.write(Integer.toHexString(fontInfo.usWeightClass)); + } + writer.write(FONT_CACHE_SEPARATOR); + if (fontInfo.sFamilyClass > -1) + { + writer.write(Integer.toHexString(fontInfo.sFamilyClass)); + } + writer.write(FONT_CACHE_SEPARATOR); + writer.write(Integer.toHexString(fontInfo.ulCodePageRange1)); + writer.write(FONT_CACHE_SEPARATOR); + writer.write(Integer.toHexString(fontInfo.ulCodePageRange2)); + writer.write(FONT_CACHE_SEPARATOR); + if (fontInfo.macStyle > -1) { - String str = Integer.toHexString(bytes[i]); - if (str.length() == 1) + writer.write(Integer.toHexString(fontInfo.macStyle)); + } + writer.write(FONT_CACHE_SEPARATOR); + if (fontInfo.panose != null) + { + byte[] bytes = fontInfo.panose.getBytes(); + for (int i = 0; i < 10; i++) { - writer.write('0'); + String str = Integer.toHexString(bytes[i]); + if (str.length() == 1) + { + writer.write('0'); + } + writer.write(str); } - writer.write(str); } + writer.write(FONT_CACHE_SEPARATOR); + writer.write(fontInfo.file.getAbsolutePath()); + writer.newLine(); } - writer.write(FONT_CACHE_SEPARATOR); - writer.write(fontInfo.file.getAbsolutePath()); - writer.newLine(); } } catch (IOException | SecurityException e) @@ -416,10 +436,18 @@ final class FileSystemFontProvider extends FontProvider } fontFile = new File(parts[9]); - FSFontInfo info = new FSFontInfo(fontFile, format, postScriptName, - cidSystemInfo, usWeightClass, sFamilyClass, ulCodePageRange1, - ulCodePageRange2, macStyle, panose, this); - results.add(info); + if(fontFile.exists()) + { + + FSFontInfo info = new FSFontInfo(fontFile, format, postScriptName, + cidSystemInfo, usWeightClass, sFamilyClass, ulCodePageRange1, + ulCodePageRange2, macStyle, panose, this); + results.add(info); + } + else + { + LOG.debug("Font file {} not found, skipped", fontFile.getAbsolutePath()); + } pending.remove(fontFile.getAbsolutePath()); } } @@ -679,6 +707,7 @@ final class FileSystemFontProvider extends FontProvider @Override public String toDebugString() { + initializeIfRequired(); StringBuilder sb = new StringBuilder(); for (FSFontInfo info : fontInfoList) { @@ -695,6 +724,7 @@ final class FileSystemFontProvider extends FontProvider @Override public List<? extends FontInfo> getFontInfo() { + initializeIfRequired(); return fontInfoList; } } diff --git a/src/main/java/org/sejda/sambox/pdmodel/font/FontMapperImpl.java b/src/main/java/org/sejda/sambox/pdmodel/font/FontMapperImpl.java index 6b8a16c560145cc59f0659b55e3b71276a3cf628..28756756eb23e32e18595a8df2d3abf05027d1b1 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/font/FontMapperImpl.java +++ b/src/main/java/org/sejda/sambox/pdmodel/font/FontMapperImpl.java @@ -502,7 +502,7 @@ final class FontMapperImpl implements FontMapper { return new CIDFontMapping((OpenTypeFont) font, null, true); } - else + else if(font != null) { return new CIDFontMapping(null, font, true); } diff --git a/src/main/java/org/sejda/sambox/pdmodel/font/FontUtils.java b/src/main/java/org/sejda/sambox/pdmodel/font/FontUtils.java new file mode 100644 index 0000000000000000000000000000000000000000..639c94a17384388f6f1f73b817853aa953ea61cb --- /dev/null +++ b/src/main/java/org/sejda/sambox/pdmodel/font/FontUtils.java @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.sejda.sambox.pdmodel.font; + +import java.io.IOException; +import java.util.Map; + +import org.apache.fontbox.ttf.OS2WindowsMetricsTable; +import org.apache.fontbox.ttf.TrueTypeFont; + +/** + * @author Andrea Vacondio + */ +public final class FontUtils +{ + private static final String BASE25 = "BCDEFGHIJKLMNOPQRSTUVWXYZ"; + + private FontUtils() + { + // util + } + + /** + * @return true if the fsType in the OS/2 table permits embedding. + */ + public static boolean isEmbeddingPermitted(TrueTypeFont ttf) throws IOException + { + if (ttf.getOS2Windows() != null) + { + int fsType = ttf.getOS2Windows().getFsType(); + int exclusive = fsType & 0x8; // bits 0-3 are a set of exclusive bits + + if ((exclusive + & OS2WindowsMetricsTable.FSTYPE_RESTRICTED) == OS2WindowsMetricsTable.FSTYPE_RESTRICTED) + { + // restricted License embedding + return false; + } + else if ((exclusive + & OS2WindowsMetricsTable.FSTYPE_BITMAP_ONLY) == OS2WindowsMetricsTable.FSTYPE_BITMAP_ONLY) + { + // bitmap embedding only + return false; + } + } + return true; + } + + /** + * @return true if the fsType in the OS/2 table permits subsetting. + */ + public static boolean isSubsettingPermitted(TrueTypeFont ttf) throws IOException + { + if (ttf.getOS2Windows() != null) + { + int fsType = ttf.getOS2Windows().getFsType(); + if ((fsType + & OS2WindowsMetricsTable.FSTYPE_NO_SUBSETTING) == OS2WindowsMetricsTable.FSTYPE_NO_SUBSETTING) + { + return false; + } + } + return true; + } + + /** + * @return an uppercase 6-character unique tag for the given subset. + */ + public static String getTag(Map<Integer, Integer> gidToCid) + { + // deterministic + long num = gidToCid.hashCode(); + + // base25 encode + StringBuilder sb = new StringBuilder(); + do + { + long div = num / 25; + int mod = (int) (num % 25); + sb.append(BASE25.charAt(mod)); + num = div; + } while (num != 0 && sb.length() < 6); + + // pad + while (sb.length() < 6) + { + sb.insert(0, 'A'); + } + + return sb.append('+').toString(); + } + + /** + * @return an uppercase 6-character unique tag randomly created + */ + public static String getTag() + { + StringBuilder sb = new StringBuilder(""); + for (int k = 0; k < 6; ++k) + { + sb.append((char) (Math.random() * 26 + 'A')); + } + return sb.append('+').toString(); + } +} diff --git a/src/main/java/org/sejda/sambox/pdmodel/font/PDCIDFont.java b/src/main/java/org/sejda/sambox/pdmodel/font/PDCIDFont.java index e5914299f8acbe00d0528302a8bc46e8c278e388..9d0f02bb7f5f44d27926736cba64e5d7f837b579 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/font/PDCIDFont.java +++ b/src/main/java/org/sejda/sambox/pdmodel/font/PDCIDFont.java @@ -19,16 +19,13 @@ package org.sejda.sambox.pdmodel.font; import static java.util.Objects.nonNull; import java.io.IOException; +import java.io.InputStream; import java.util.HashMap; import java.util.Map; +import org.apache.commons.io.IOUtils; import org.apache.fontbox.util.BoundingBox; -import org.sejda.sambox.cos.COSArray; -import org.sejda.sambox.cos.COSBase; -import org.sejda.sambox.cos.COSDictionary; -import org.sejda.sambox.cos.COSName; -import org.sejda.sambox.cos.COSNumber; -import org.sejda.sambox.cos.COSObjectable; +import org.sejda.sambox.cos.*; import org.sejda.sambox.util.Matrix; import org.sejda.sambox.util.Vector; @@ -138,7 +135,7 @@ public abstract class PDCIDFont implements COSObjectable, PDFontLike, PDVectorFo COSArray array = (COSArray) next; for (int j = 0; j < array.size(); j++) { - int cid = c.intValue() + j; + int cid = c.intValue() + j / 3; COSNumber w1y = (COSNumber) array.getObject(j); COSNumber v1x = (COSNumber) array.getObject(++j); COSNumber v1y = (COSNumber) array.getObject(++j); @@ -257,6 +254,12 @@ public abstract class PDCIDFont implements COSObjectable, PDFontLike, PDVectorFo return width; } + @Override + public boolean hasExplicitWidth(int code) throws IOException + { + return widths.get(codeToCID(code)) != null; + } + @Override public Vector getPositionVector(int code) { @@ -375,4 +378,28 @@ public abstract class PDCIDFont implements COSObjectable, PDFontLike, PDVectorFo * @throws IOException If the text could not be encoded. */ protected abstract byte[] encode(int unicode) throws IOException; + + final int[] readCIDToGIDMap() throws IOException + { + int[] cid2gid = null; + COSBase map = dict.getDictionaryObject(COSName.CID_TO_GID_MAP); + if (map instanceof COSStream) + { + COSStream stream = (COSStream) map; + + InputStream is = stream.getUnfilteredStream(); + byte[] mapAsBytes = IOUtils.toByteArray(is); + IOUtils.closeQuietly(is); + int numberOfInts = mapAsBytes.length / 2; + cid2gid = new int[numberOfInts]; + int offset = 0; + for (int index = 0; index < numberOfInts; index++) + { + int gid = (mapAsBytes[offset] & 0xff) << 8 | mapAsBytes[offset + 1] & 0xff; + cid2gid[index] = gid; + offset += 2; + } + } + return cid2gid; + } } diff --git a/src/main/java/org/sejda/sambox/pdmodel/font/PDCIDFontType0.java b/src/main/java/org/sejda/sambox/pdmodel/font/PDCIDFontType0.java index c55600e4a433e7f6c2ccb18eb9ec2c8dc45fe22a..98f2113dc70bb65d35fe66ce898d5487e9fe7da6 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/font/PDCIDFontType0.java +++ b/src/main/java/org/sejda/sambox/pdmodel/font/PDCIDFontType0.java @@ -62,6 +62,7 @@ public class PDCIDFontType0 extends PDCIDFont private Matrix fontMatrix; private final AffineTransform fontMatrixTransform; private BoundingBox fontBBox; + private int[] cid2gid = null; /** * Constructor. @@ -119,6 +120,7 @@ public class PDCIDFontType0 extends PDCIDFont cidFont = null; t1Font = cffFont; } + cid2gid = readCIDToGIDMap(); isEmbedded = true; isDamaged = false; } @@ -226,11 +228,13 @@ public class PDCIDFontType0 extends PDCIDFont if (getFontDescriptor() != null) { PDRectangle bbox = getFontDescriptor().getFontBoundingBox(); - if (bbox.getLowerLeftX() != 0 || bbox.getLowerLeftY() != 0 || bbox.getUpperRightX() != 0 - || bbox.getUpperRightY() != 0) + if(bbox != null) { - return new BoundingBox(bbox.getLowerLeftX(), bbox.getLowerLeftY(), - bbox.getUpperRightX(), bbox.getUpperRightY()); + if (bbox.getLowerLeftX() != 0 || bbox.getLowerLeftY() != 0 || bbox.getUpperRightX() != 0 + || bbox.getUpperRightY() != 0) { + return new BoundingBox(bbox.getLowerLeftX(), bbox.getLowerLeftY(), + bbox.getUpperRightX(), bbox.getUpperRightY()); + } } } if (cidFont != null) @@ -316,6 +320,11 @@ public class PDCIDFontType0 extends PDCIDFont public GeneralPath getPath(int code) throws IOException { int cid = codeToCID(code); + if (cid2gid != null && isEmbedded) + { + // PDFBOX-4093: despite being a type 0 font, there is a CIDToGIDMap + cid = cid2gid[cid]; + } Type2CharString charstring = getType2CharString(cid); if (charstring != null) { diff --git a/src/main/java/org/sejda/sambox/pdmodel/font/PDCIDFontType2.java b/src/main/java/org/sejda/sambox/pdmodel/font/PDCIDFontType2.java index 4baefbd0d10f65a5a3ebdc14281d9a8a0e4efa1f..f809914fb491ef08d079eb05131b0a8f10c44013 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/font/PDCIDFontType2.java +++ b/src/main/java/org/sejda/sambox/pdmodel/font/PDCIDFontType2.java @@ -20,28 +20,23 @@ import static java.util.Objects.nonNull; import java.awt.geom.GeneralPath; import java.io.IOException; -import java.io.InputStream; import java.lang.reflect.Field; import java.util.HashMap; import java.util.Map; import org.apache.fontbox.cff.Type2CharString; import org.apache.fontbox.cmap.CMap; -import org.apache.fontbox.ttf.CmapSubtable; +import org.apache.fontbox.ttf.CmapLookup; import org.apache.fontbox.ttf.GlyphData; import org.apache.fontbox.ttf.OTFParser; import org.apache.fontbox.ttf.OpenTypeFont; import org.apache.fontbox.ttf.TrueTypeFont; import org.apache.fontbox.util.BoundingBox; -import org.sejda.sambox.cos.COSBase; import org.sejda.sambox.cos.COSDictionary; -import org.sejda.sambox.cos.COSName; -import org.sejda.sambox.cos.COSStream; import org.sejda.sambox.pdmodel.common.PDRectangle; import org.sejda.sambox.pdmodel.common.PDStream; import org.sejda.sambox.util.Matrix; import org.sejda.sambox.util.ReflectionUtils; -import org.sejda.util.IOUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -56,9 +51,10 @@ public class PDCIDFontType2 extends PDCIDFont private final TrueTypeFont ttf; private final int[] cid2gid; + private final HashMap<Integer, Integer> gid2cid = new HashMap<Integer, Integer>(); private final boolean isEmbedded; private final boolean isDamaged; - private final CmapSubtable cmap; // may be null + private final CmapLookup cmap; // may be null private Matrix fontMatrix; private BoundingBox fontBBox; @@ -157,8 +153,17 @@ public class PDCIDFontType2 extends PDCIDFont } ttf = ttfFont; } - cmap = ttf.getUnicodeCmap(false); + cmap = ttf.getUnicodeCmapLookup(false); cid2gid = readCIDToGIDMap(); + if(cid2gid != null) { + for (int cid = 0; cid < cid2gid.length; cid++) + { + int gid = cid2gid[cid]; + if(gid != 0) { + gid2cid.put(gid, cid); + } + } + } } private TrueTypeFont findFontOrSubstitute() throws IOException @@ -209,38 +214,20 @@ public class PDCIDFontType2 extends PDCIDFont if (getFontDescriptor() != null) { PDRectangle bbox = getFontDescriptor().getFontBoundingBox(); - if (nonNull(bbox) && bbox.getLowerLeftX() != 0 || bbox.getLowerLeftY() != 0 - || bbox.getUpperRightX() != 0 || bbox.getUpperRightY() != 0) + if(nonNull(bbox)) { - return new BoundingBox(bbox.getLowerLeftX(), bbox.getLowerLeftY(), - bbox.getUpperRightX(), bbox.getUpperRightY()); + if ((Float.compare(bbox.getLowerLeftX(), 0) != 0 || + Float.compare(bbox.getLowerLeftY(), 0) != 0 || + Float.compare(bbox.getUpperRightX(), 0) != 0 || + Float.compare(bbox.getUpperRightY(), 0) != 0)) + { + return new BoundingBox(bbox.getLowerLeftX(), bbox.getLowerLeftY(), + bbox.getUpperRightX(), bbox.getUpperRightY()); + } } - } - return ttf.getFontBBox(); - } - private int[] readCIDToGIDMap() throws IOException - { - int[] cid2gid = null; - COSBase map = dict.getDictionaryObject(COSName.CID_TO_GID_MAP); - if (map instanceof COSStream) - { - COSStream stream = (COSStream) map; - - InputStream is = stream.getUnfilteredStream(); - byte[] mapAsBytes = IOUtils.toByteArray(is); - IOUtils.closeQuietly(is); - int numberOfInts = mapAsBytes.length / 2; - cid2gid = new int[numberOfInts]; - int offset = 0; - for (int index = 0; index < numberOfInts; index++) - { - int gid = (mapAsBytes[offset] & 0xff) << 8 | mapAsBytes[offset + 1] & 0xff; - cid2gid[index] = gid; - offset += 2; - } } - return cid2gid; + return ttf.getFontBBox(); } @Override @@ -354,7 +341,11 @@ public class PDCIDFontType2 extends PDCIDFont { if (cmap != null) { - cid = cmap.getGlyphId(unicode); + int gid = cmap.getGlyphId(unicode); + // SAMBOX specific here + // if there's a gid to cid mapping, use it. + // otherwise fallback to the old behaviour, which is to assume cid = gid + cid = gid2cid.getOrDefault(gid, gid); } } else diff --git a/src/main/java/org/sejda/sambox/pdmodel/font/PDCIDFontType2Embedder.java b/src/main/java/org/sejda/sambox/pdmodel/font/PDCIDFontType2Embedder.java index fcfa27d5a00a1d6705eb774b35f289e9354cc041..9a99587b20a75e64495c17670fde086b00a699ac 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/font/PDCIDFontType2Embedder.java +++ b/src/main/java/org/sejda/sambox/pdmodel/font/PDCIDFontType2Embedder.java @@ -28,7 +28,7 @@ import java.util.Map; import java.util.Set; import java.util.TreeSet; -import org.apache.fontbox.ttf.TrueTypeFont; +import org.apache.fontbox.ttf.*; import org.sejda.sambox.cos.COSArray; import org.sejda.sambox.cos.COSDictionary; import org.sejda.sambox.cos.COSInteger; @@ -36,6 +36,8 @@ import org.sejda.sambox.cos.COSName; import org.sejda.sambox.pdmodel.PDDocument; import org.sejda.sambox.pdmodel.common.PDStream; import org.sejda.sambox.util.SpecVersionUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Embedded PDCIDFontType2 builder. Helper class to populate a PDCIDFontType2 and its parent PDType0Font from a TTF. @@ -45,10 +47,14 @@ import org.sejda.sambox.util.SpecVersionUtils; */ final class PDCIDFontType2Embedder extends TrueTypeEmbedder { + + private static final Logger LOG = LoggerFactory.getLogger(PDCIDFontType2Embedder.class); + private final PDDocument document; private final PDType0Font parent; private final COSDictionary dict; private final COSDictionary cidFont; + private final boolean vertical; /** * Creates a new TrueType font embedder for the given TTF as a PDCIDFontType2. @@ -60,17 +66,18 @@ final class PDCIDFontType2Embedder extends TrueTypeEmbedder * @throws IOException if the TTF could not be read */ PDCIDFontType2Embedder(PDDocument document, COSDictionary dict, TrueTypeFont ttf, - boolean embedSubset, PDType0Font parent) throws IOException + boolean embedSubset, PDType0Font parent, boolean vertical) throws IOException { super(dict, ttf, embedSubset); this.document = document; this.dict = dict; this.parent = parent; + this.vertical = vertical; // parent Type 0 font dict.setItem(COSName.SUBTYPE, COSName.TYPE0); dict.setName(COSName.BASE_FONT, fontDescriptor.getFontName()); - dict.setItem(COSName.ENCODING, COSName.IDENTITY_H); // CID = GID + dict.setItem(COSName.ENCODING, vertical ? COSName.IDENTITY_V : COSName.IDENTITY_H); // CID = GID // descendant CIDFont cidFont = createCIDFont(); @@ -106,6 +113,11 @@ final class PDCIDFontType2Embedder extends TrueTypeEmbedder // build unicode mapping before subsetting as the subsetted font won't have a cmap buildToUnicodeCMap(gidToCid); + // build vertical metrics before subsetting as the subsetted font won't have vhea, vmtx + if (vertical) + { + buildVerticalMetrics(cidToGid); + } // rebuild the relevant part of the font buildFontFile2(ttfSubset); @@ -137,7 +149,7 @@ final class PDCIDFontType2Embedder extends TrueTypeEmbedder } // skip composite glyph components that have no code point - List<Integer> codes = cmap.getCharCodes(cid); // old GID -> Unicode + List<Integer> codes = cmapLookup.getCharCodes(cid); // old GID -> Unicode if (codes != null) { // use the first entry even for ambiguous mappings @@ -195,6 +207,12 @@ final class PDCIDFontType2Embedder extends TrueTypeEmbedder // W - widths buildWidths(cidFont); + // Vertical metrics + if (vertical) + { + buildVerticalMetrics(cidFont); + } + // CIDToGIDMap cidFont.setItem(COSName.CID_TO_GID_MAP, COSName.IDENTITY); @@ -229,10 +247,8 @@ final class PDCIDFontType2Embedder extends TrueTypeEmbedder out.write(new byte[] { (byte) (gid >> 8 & 0xff), (byte) (gid & 0xff) }); } - byte[] byteArray = out.toByteArray(); - InputStream input = new ByteArrayInputStream(byteArray); + InputStream input = new ByteArrayInputStream(out.toByteArray()); PDStream stream = new PDStream(input, COSName.FLATE_DECODE); - stream.getCOSObject().setInt(COSName.LENGTH1, byteArray.length); cidFont.setItem(COSName.CID_TO_GID_MAP, stream); } @@ -293,6 +309,90 @@ final class PDCIDFontType2Embedder extends TrueTypeEmbedder cidFont.setItem(COSName.W, widths); } + private boolean buildVerticalHeader(COSDictionary cidFont) throws IOException + { + VerticalHeaderTable vhea = ttf.getVerticalHeader(); + if (vhea == null) + { + LOG.warn("Font to be subset is set to vertical, but has no 'vhea' table"); + return false; + } + + float scaling = 1000f / ttf.getHeader().getUnitsPerEm(); + + long v = Math.round(vhea.getAscender() * scaling); + long w1 = Math.round(-vhea.getAdvanceHeightMax() * scaling); + if (v != 880 || w1 != -1000) + { + COSArray cosDw2 = new COSArray(); + cosDw2.add(COSInteger.get(v)); + cosDw2.add(COSInteger.get(w1)); + cidFont.setItem(COSName.DW2, cosDw2); + } + return true; + } + + /** + * Builds vertical metrics with a custom CIDToGIDMap (for embedding font subset). + */ + private void buildVerticalMetrics(Map<Integer, Integer> cidToGid) throws IOException + { + // The "vhea" and "vmtx" tables that specify vertical metrics shall never be used by a conforming + // reader. The only way to specify vertical metrics in PDF shall be by means of the DW2 and W2 + // entries in a CIDFont dictionary. + + if (!buildVerticalHeader(cidFont)) + { + return; + } + + float scaling = 1000f / ttf.getHeader().getUnitsPerEm(); + + VerticalHeaderTable vhea = ttf.getVerticalHeader(); + VerticalMetricsTable vmtx = ttf.getVerticalMetrics(); + GlyphTable glyf = ttf.getGlyph(); + HorizontalMetricsTable hmtx = ttf.getHorizontalMetrics(); + + long v_y = Math.round(vhea.getAscender() * scaling); + long w1 = Math.round(-vhea.getAdvanceHeightMax() * scaling); + + COSArray heights = new COSArray(); + COSArray w2 = new COSArray(); + int prev = Integer.MIN_VALUE; + // Use a sorted list to get an optimal width array + Set<Integer> keys = new TreeSet<Integer>(cidToGid.keySet()); + for (int cid : keys) + { + // Unlike buildWidths, we look up with cid (not gid) here because this is + // the original TTF, not the rebuilt one. + GlyphData glyph = glyf.getGlyph(cid); + if (glyph == null) + { + continue; + } + long height = Math.round((glyph.getYMaximum() + vmtx.getTopSideBearing(cid)) * scaling); + long advance = Math.round(-vmtx.getAdvanceHeight(cid) * scaling); + if (height == v_y && advance == w1) + { + // skip default metrics + continue; + } + // c [w1_1y v_1x v_1y w1_2y v_2x v_2y ... w1_ny v_nx v_ny] + if (prev != cid - 1) + { + w2 = new COSArray(); + heights.add(COSInteger.get(cid)); // c + heights.add(w2); + } + w2.add(COSInteger.get(advance)); // w1_iy + long width = Math.round(hmtx.getAdvanceWidth(cid) * scaling); + w2.add(COSInteger.get(width / 2)); // v_ix + w2.add(COSInteger.get(height)); // v_iy + prev = cid; + } + cidFont.setItem(COSName.W2, heights); + } + /** * Build widths with Identity CIDToGIDMap (for embedding full font). */ @@ -326,7 +426,7 @@ final class PDCIDFontType2Embedder extends TrueTypeEmbedder long lastCid = widths[0]; long lastValue = Math.round(widths[1] * scaling); - COSArray inner = null; + COSArray inner = new COSArray(); COSArray outer = new COSArray(); outer.add(COSInteger.get(lastCid)); @@ -334,7 +434,7 @@ final class PDCIDFontType2Embedder extends TrueTypeEmbedder for (int i = 2; i < widths.length; i += 2) { - long cid = widths[i]; + long cid = widths[i]; long value = Math.round(widths[i + 1] * scaling); switch (state) @@ -410,6 +510,160 @@ final class PDCIDFontType2Embedder extends TrueTypeEmbedder return outer; } + /** + * Build vertical metrics with Identity CIDToGIDMap (for embedding full font). + */ + private void buildVerticalMetrics(COSDictionary cidFont) throws IOException + { + if (!buildVerticalHeader(cidFont)) + { + return; + } + + int cidMax = ttf.getNumberOfGlyphs(); + int[] gidMetrics = new int[cidMax * 4]; + for (int cid = 0; cid < cidMax; cid++) + { + GlyphData glyph = ttf.getGlyph().getGlyph(cid); + if (glyph == null) + { + gidMetrics[cid * 4] = Integer.MIN_VALUE; + } + else + { + gidMetrics[cid * 4] = cid; + gidMetrics[cid * 4 + 1] = ttf.getVerticalMetrics().getAdvanceHeight(cid); + gidMetrics[cid * 4 + 2] = ttf.getHorizontalMetrics().getAdvanceWidth(cid); + gidMetrics[cid * 4 + 3] = glyph.getYMaximum() + ttf.getVerticalMetrics().getTopSideBearing(cid); + } + } + + cidFont.setItem(COSName.W2, getVerticalMetrics(gidMetrics)); + } + + private COSArray getVerticalMetrics(int[] values) throws IOException + { + if (values.length == 0) + { + throw new IllegalArgumentException("length of values must be > 0"); + } + + float scaling = 1000f / ttf.getHeader().getUnitsPerEm(); + + long lastCid = values[0]; + long lastW1Value = Math.round(-values[1] * scaling); + long lastVxValue = Math.round(values[2] * scaling / 2f); + long lastVyValue = Math.round(values[3] * scaling); + + COSArray inner = new COSArray(); + COSArray outer = new COSArray(); + outer.add(COSInteger.get(lastCid)); + + State state = State.FIRST; + + for (int i = 4; i < values.length; i += 4) + { + long cid = values[i]; + if (cid == Integer.MIN_VALUE) + { + // no glyph for this cid + continue; + } + long w1Value = Math.round(-values[i + 1] * scaling); + long vxValue = Math.round(values[i + 2] * scaling / 2); + long vyValue = Math.round(values[i + 3] * scaling); + + switch (state) + { + case FIRST: + if (cid == lastCid + 1 && w1Value == lastW1Value && vxValue == lastVxValue && vyValue == lastVyValue) + { + state = State.SERIAL; + } + else if (cid == lastCid + 1) + { + state = State.BRACKET; + inner = new COSArray(); + inner.add(COSInteger.get(lastW1Value)); + inner.add(COSInteger.get(lastVxValue)); + inner.add(COSInteger.get(lastVyValue)); + } + else + { + inner = new COSArray(); + inner.add(COSInteger.get(lastW1Value)); + inner.add(COSInteger.get(lastVxValue)); + inner.add(COSInteger.get(lastVyValue)); + outer.add(inner); + outer.add(COSInteger.get(cid)); + } + break; + case BRACKET: + if (cid == lastCid + 1 && w1Value == lastW1Value && vxValue == lastVxValue && vyValue == lastVyValue) + { + state = State.SERIAL; + outer.add(inner); + outer.add(COSInteger.get(lastCid)); + } + else if (cid == lastCid + 1) + { + inner.add(COSInteger.get(lastW1Value)); + inner.add(COSInteger.get(lastVxValue)); + inner.add(COSInteger.get(lastVyValue)); + } + else + { + state = State.FIRST; + inner.add(COSInteger.get(lastW1Value)); + inner.add(COSInteger.get(lastVxValue)); + inner.add(COSInteger.get(lastVyValue)); + outer.add(inner); + outer.add(COSInteger.get(cid)); + } + break; + case SERIAL: + if (cid != lastCid + 1 || w1Value != lastW1Value || vxValue != lastVxValue || vyValue != lastVyValue) + { + outer.add(COSInteger.get(lastCid)); + outer.add(COSInteger.get(lastW1Value)); + outer.add(COSInteger.get(lastVxValue)); + outer.add(COSInteger.get(lastVyValue)); + outer.add(COSInteger.get(cid)); + state = State.FIRST; + } + break; + } + lastW1Value = w1Value; + lastVxValue = vxValue; + lastVyValue = vyValue; + lastCid = cid; + } + + switch (state) + { + case FIRST: + inner = new COSArray(); + inner.add(COSInteger.get(lastW1Value)); + inner.add(COSInteger.get(lastVxValue)); + inner.add(COSInteger.get(lastVyValue)); + outer.add(inner); + break; + case BRACKET: + inner.add(COSInteger.get(lastW1Value)); + inner.add(COSInteger.get(lastVxValue)); + inner.add(COSInteger.get(lastVyValue)); + outer.add(inner); + break; + case SERIAL: + outer.add(COSInteger.get(lastCid)); + outer.add(COSInteger.get(lastW1Value)); + outer.add(COSInteger.get(lastVxValue)); + outer.add(COSInteger.get(lastVyValue)); + break; + } + return outer; + } + /** * Returns the descendant CIDFont. */ diff --git a/src/main/java/org/sejda/sambox/pdmodel/font/PDFont.java b/src/main/java/org/sejda/sambox/pdmodel/font/PDFont.java index ad6504802ae94f58bdbff385362c646e51624e6b..e2efd48221322ec70e046d47e9b6813d8d915931 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/font/PDFont.java +++ b/src/main/java/org/sejda/sambox/pdmodel/font/PDFont.java @@ -19,7 +19,6 @@ package org.sejda.sambox.pdmodel.font; import static java.util.Objects.nonNull; import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.Collections; @@ -32,6 +31,7 @@ import org.apache.commons.io.IOUtils; import org.apache.fontbox.afm.FontMetrics; import org.apache.fontbox.cmap.CMap; import org.apache.fontbox.util.BoundingBox; +import org.sejda.io.FastByteArrayOutputStream; import org.sejda.sambox.cos.COSArray; import org.sejda.sambox.cos.COSArrayList; import org.sejda.sambox.cos.COSBase; @@ -51,7 +51,7 @@ import org.slf4j.LoggerFactory; * * @author Ben Litchfield */ -public abstract class PDFont implements COSObjectable, PDFontLike +public abstract class PDFont implements COSObjectable, PDFontLike, Subsettable { private static final Logger LOG = LoggerFactory.getLogger(PDFont.class); protected static final Matrix DEFAULT_FONT_MATRIX = new Matrix(0.001f, 0, 0, 0.001f, 0, 0); @@ -312,8 +312,9 @@ public abstract class PDFont implements COSObjectable, PDFontLike */ public final byte[] encode(String text) throws IOException { - ByteArrayOutputStream out = new ByteArrayOutputStream(); - for (int offset = 0; offset < text.length();) + FastByteArrayOutputStream out = new FastByteArrayOutputStream(); + int offset = 0; + while (offset < text.length()) { int codePoint = text.codePointAt(offset); @@ -326,6 +327,40 @@ public abstract class PDFont implements COSObjectable, PDFontLike return out.toByteArray(); } + /** + * Similar to encode() but handles leniently cases where fonts don't have a glyph by assuming the identity mapping + */ + public final byte[] encodeLeniently(String text) throws IOException + { + FastByteArrayOutputStream out = new FastByteArrayOutputStream(); + for (int offset = 0; offset < text.length();) + { + int codePoint = text.codePointAt(offset); + + // multi-byte encoding with 1 to 4 bytes + byte[] bytes; + try + { + bytes = encode(codePoint); + } + catch (IllegalArgumentException e) + { + if (e.getMessage().contains("No glyph")) + { + bytes = new byte[] { (byte) codePoint }; + } + else + { + throw e; + } + } + out.write(bytes); + + offset += Character.charCount(codePoint); + } + return out.toByteArray(); + } + /** * Encodes the given Unicode code point for use in a PDF content stream. Content streams use a multi-byte encoding * with 1 to 4 bytes. @@ -361,6 +396,27 @@ public abstract class PDFont implements COSObjectable, PDFontLike return width; } + /** + * Similar to getStringWidth() but handles leniently fonts where glyphs are missing, assuming the identity mapping + * of glyphs + * + * Uses encodeLeniently() instead of encode() + */ + public float getStringWidthLeniently(String text) throws IOException + { + byte[] bytes = encodeLeniently(text); + ByteArrayInputStream in = new ByteArrayInputStream(bytes); + + float width = 0; + while (in.available() > 0) + { + int code = readCode(in); + width += getWidth(code); + } + + return width; + } + /** * This will get the average font width for all characters. * @@ -575,25 +631,6 @@ public abstract class PDFont implements COSObjectable, PDFontLike return Standard14Fonts.containsName(getName()); } - /** - * Adds the given Unicode point to the subset. - * - * @param codePoint Unicode code point - */ - public abstract void addToSubset(int codePoint); - - /** - * Replaces this font with a subset containing only the given Unicode characters. - * - * @throws IOException if the subset could not be written - */ - public abstract void subset() throws IOException; - - /** - * Returns true if this font will be subset when embedded. - */ - public abstract boolean willBeSubset(); - @Override public abstract boolean isDamaged(); diff --git a/src/main/java/org/sejda/sambox/pdmodel/font/PDFontLike.java b/src/main/java/org/sejda/sambox/pdmodel/font/PDFontLike.java index 3a8cbf0d906b2004b459b9802a06ee15bebb6fc8..f65e9d29165490f07a8880b3d789066d161b7e7a 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/font/PDFontLike.java +++ b/src/main/java/org/sejda/sambox/pdmodel/font/PDFontLike.java @@ -88,6 +88,15 @@ public interface PDFontLike */ float getWidth(int code) throws IOException; + /** + * Returns true if the Font dictionary specifies an explicit width for the given glyph. + * This includes Width, W but not default widths entries. + * + * @param code character code + * @throws IOException if the font could not be read + */ + boolean hasExplicitWidth(int code) throws IOException; + /** * Returns the width of a glyph in the embedded font file. * diff --git a/src/main/java/org/sejda/sambox/pdmodel/font/PDPanoseClassification.java b/src/main/java/org/sejda/sambox/pdmodel/font/PDPanoseClassification.java index 8d8fb8760571d7004f27eafc5b6cf66127b76cc1..3d0be4f472012d9f7442d39b7859be8d8c7cda17 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/font/PDPanoseClassification.java +++ b/src/main/java/org/sejda/sambox/pdmodel/font/PDPanoseClassification.java @@ -90,7 +90,7 @@ public class PDPanoseClassification @Override public String toString() { - return "{ FamilyType = " + getFamilyKind() + ", " + "SerifStyle = " + getSerifStyle() + ", " + return "{ FamilyKind = " + getFamilyKind() + ", " + "SerifStyle = " + getSerifStyle() + ", " + "Weight = " + getWeight() + ", " + "Proportion = " + getProportion() + ", " + "Contrast = " + getContrast() + ", " + "StrokeVariation = " + getStrokeVariation() + ", " + "ArmStyle = " + getArmStyle() + ", " + "Letterform = " + getLetterform() diff --git a/src/main/java/org/sejda/sambox/pdmodel/font/PDSimpleFont.java b/src/main/java/org/sejda/sambox/pdmodel/font/PDSimpleFont.java index dff389745d176008ac5bf684273b30bf8e6b9092..9b2e404eac7e9038045ab3bcf00d40f4c76622f7 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/font/PDSimpleFont.java +++ b/src/main/java/org/sejda/sambox/pdmodel/font/PDSimpleFont.java @@ -424,4 +424,18 @@ public abstract class PDSimpleFont extends PDFont { return false; } + + @Override + public boolean hasExplicitWidth(int code) throws IOException + { + if (dict.containsKey(COSName.WIDTHS)) + { + int firstChar = dict.getInt(COSName.FIRST_CHAR, -1); + if (code >= firstChar && code - firstChar < getWidths().size()) + { + return true; + } + } + return false; + } } diff --git a/src/main/java/org/sejda/sambox/pdmodel/font/PDTrueTypeFontEmbedder.java b/src/main/java/org/sejda/sambox/pdmodel/font/PDTrueTypeFontEmbedder.java index 1bb495d9d715dde014fbd8493ec009be8626d8a0..7bcaca95d3cabaa6f3f692e9bed195dd194fc48b 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/font/PDTrueTypeFontEmbedder.java +++ b/src/main/java/org/sejda/sambox/pdmodel/font/PDTrueTypeFontEmbedder.java @@ -99,7 +99,7 @@ final class PDTrueTypeFontEmbedder extends TrueTypeEmbedder { String uni = glyphList.toUnicode(name); int charCode = uni.codePointAt(0); - int gid = cmap.getGlyphId(charCode); + int gid = cmapLookup.getGlyphId(charCode); widths.set(entry.getKey() - firstChar, Math.round(hmtx.getAdvanceWidth(gid) * scaling)); } diff --git a/src/main/java/org/sejda/sambox/pdmodel/font/PDType0Font.java b/src/main/java/org/sejda/sambox/pdmodel/font/PDType0Font.java index 4997b50bd63776e6bd0c013ae418f3abf214f9ef..a3ca96a47ea6722005af5b8c487c571d51479c04 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/font/PDType0Font.java +++ b/src/main/java/org/sejda/sambox/pdmodel/font/PDType0Font.java @@ -66,7 +66,7 @@ public class PDType0Font extends PDFont implements PDVectorFont */ public static PDType0Font load(PDDocument doc, File file) throws IOException { - return new PDType0Font(doc, new TTFParser().parse(file), true, true); + return new PDType0Font(doc, new TTFParser().parse(file), true, true, false); } /** @@ -79,7 +79,7 @@ public class PDType0Font extends PDFont implements PDVectorFont */ public static PDType0Font load(PDDocument doc, InputStream input) throws IOException { - return new PDType0Font(doc, new TTFParser().parse(input), true, true); + return new PDType0Font(doc, new TTFParser().parse(input), true, true, false); } /** @@ -94,7 +94,7 @@ public class PDType0Font extends PDFont implements PDVectorFont public static PDType0Font load(PDDocument doc, InputStream input, boolean embedSubset) throws IOException { - return new PDType0Font(doc, new TTFParser().parse(input), embedSubset, true); + return new PDType0Font(doc, new TTFParser().parse(input), embedSubset, true, false); } /** @@ -109,7 +109,63 @@ public class PDType0Font extends PDFont implements PDVectorFont public static PDType0Font load(PDDocument doc, TrueTypeFont ttf, boolean embedSubset) throws IOException { - return new PDType0Font(doc, ttf, embedSubset, false); + return new PDType0Font(doc, ttf, embedSubset, false, false); + } + + /** + * Loads a TTF to be embedded into a document as a vertical Type 0 font. + * + * @param doc The PDF document that will hold the embedded font. + * @param file A TrueType font. + * @return A Type0 font with a CIDFontType2 descendant. + * @throws IOException If there is an error reading the font file. + */ + public static PDType0Font loadVertical(PDDocument doc, File file) throws IOException + { + return new PDType0Font(doc, new TTFParser().parse(file), true, true, true); + } + + /** + * Loads a TTF to be embedded into a document as a vertical Type 0 font. + * + * @param doc The PDF document that will hold the embedded font. + * @param input A TrueType font. + * @return A Type0 font with a CIDFontType2 descendant. + * @throws IOException If there is an error reading the font stream. + */ + public static PDType0Font loadVertical(PDDocument doc, InputStream input) throws IOException + { + return new PDType0Font(doc, new TTFParser().parse(input), true, true, true); + } + + /** + * Loads a TTF to be embedded into a document as a vertical Type 0 font. + * + * @param doc The PDF document that will hold the embedded font. + * @param input A TrueType font. + * @param embedSubset True if the font will be subset before embedding + * @return A Type0 font with a CIDFontType2 descendant. + * @throws IOException If there is an error reading the font stream. + */ + public static PDType0Font loadVertical(PDDocument doc, InputStream input, boolean embedSubset) + throws IOException + { + return new PDType0Font(doc, new TTFParser().parse(input), embedSubset, true, true); + } + + /** + * Loads a TTF to be embedded into a document as a vertical Type 0 font. + * + * @param doc The PDF document that will hold the embedded font. + * @param ttf A TrueType font. + * @param embedSubset True if the font will be subset before embedding + * @return A Type0 font with a CIDFontType2 descendant. + * @throws IOException If there is an error reading the font stream. + */ + public static PDType0Font loadVertical(PDDocument doc, TrueTypeFont ttf, boolean embedSubset) + throws IOException + { + return new PDType0Font(doc, ttf, embedSubset, false, true); } /** @@ -145,10 +201,14 @@ public class PDType0Font extends PDFont implements PDVectorFont * Private. Creates a new TrueType font for embedding. */ private PDType0Font(PDDocument document, TrueTypeFont ttf, boolean embedSubset, - boolean closeOnSubset) + boolean closeOnSubset, boolean vertical) throws IOException { - embedder = new PDCIDFontType2Embedder(document, dict, ttf, embedSubset, this); + if (vertical) + { + ttf.enableVerticalSubstitutions(); + } + embedder = new PDCIDFontType2Embedder(document, dict, ttf, embedSubset, this, vertical); descendantFont = embedder.getCIDFont(); readEncoding(); fetchCMapUCS2(); @@ -343,6 +403,12 @@ public class PDType0Font extends PDFont implements PDVectorFont return descendantFont.encode(unicode); } + @Override + public boolean hasExplicitWidth(int code) throws IOException + { + return descendantFont.hasExplicitWidth(code); + } + @Override public float getAverageFontWidth() { @@ -483,7 +549,8 @@ public class PDType0Font extends PDFont implements PDVectorFont { descendant = getDescendantFont().getClass().getSimpleName(); } - return getClass().getSimpleName() + "/" + descendant + " " + getBaseFont(); + return getClass().getSimpleName() + "/" + descendant + ", PostScript name: " + + getBaseFont(); } @Override diff --git a/src/main/java/org/sejda/sambox/pdmodel/font/PDType1FontEmbedder.java b/src/main/java/org/sejda/sambox/pdmodel/font/PDType1FontEmbedder.java index ab43b358217462cebe0d888b9fb82d692ff7d3c1..0ae4d665220186de623244cf1b51fac2022f1f8e 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/font/PDType1FontEmbedder.java +++ b/src/main/java/org/sejda/sambox/pdmodel/font/PDType1FontEmbedder.java @@ -99,6 +99,7 @@ class PDType1FontEmbedder dict.setInt(COSName.FIRST_CHAR, 0); dict.setInt(COSName.LAST_CHAR, 255); dict.setItem(COSName.WIDTHS, COSArrayList.converterToCOSArray(widths)); + dict.setItem(COSName.ENCODING, encoding); } /** diff --git a/src/main/java/org/sejda/sambox/pdmodel/font/PDType3Font.java b/src/main/java/org/sejda/sambox/pdmodel/font/PDType3Font.java index d5034b66c2040da4e83467d48f65ba925b2bb466..35493ad50b3707f871548d16a22befe9e4e53368 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/font/PDType3Font.java +++ b/src/main/java/org/sejda/sambox/pdmodel/font/PDType3Font.java @@ -37,6 +37,8 @@ import org.sejda.sambox.pdmodel.font.encoding.Encoding; import org.sejda.sambox.pdmodel.font.encoding.GlyphList; import org.sejda.sambox.util.Matrix; import org.sejda.sambox.util.Vector; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * A PostScript Type 3 Font. @@ -45,6 +47,8 @@ import org.sejda.sambox.util.Vector; */ public class PDType3Font extends PDSimpleFont { + private static final Logger LOG = LoggerFactory.getLogger(PDType3Font.class); + private PDResources resources; private COSDictionary charProcs; private Matrix fontMatrix; @@ -65,8 +69,20 @@ public class PDType3Font extends PDSimpleFont @Override protected final void readEncoding() { - encoding = new DictionaryEncoding( - dict.getDictionaryObject(COSName.ENCODING, COSDictionary.class)); + COSBase encodingBase = dict.getDictionaryObject(COSName.ENCODING); + if (encodingBase instanceof COSName) + { + COSName encodingName = (COSName) encodingBase; + encoding = Encoding.getInstance(encodingName); + if (encoding == null) + { + LOG.warn("Unknown encoding: {}", encodingName.getName()); + } + } + else if (encodingBase instanceof COSDictionary) + { + encoding = new DictionaryEncoding((COSDictionary) encodingBase); + } glyphList = GlyphList.getAdobeGlyphList(); } diff --git a/src/main/java/org/sejda/sambox/pdmodel/font/Standard14Fonts.java b/src/main/java/org/sejda/sambox/pdmodel/font/Standard14Fonts.java index 42abd46b9a2cb5639f067aeecfb347b87d523928..53e99457af5e2ac4f5275432dc12ca6244723b58 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/font/Standard14Fonts.java +++ b/src/main/java/org/sejda/sambox/pdmodel/font/Standard14Fonts.java @@ -88,6 +88,12 @@ final class Standard14Fonts addAFM("Times,Italic", "Times-Italic"); addAFM("Times,Bold", "Times-Bold"); addAFM("Times,BoldItalic", "Times-BoldItalic"); + + // PDFBOX-3457: PDF.js file bug864847.pdf + addAFM("ArialMT", "Helvetica"); + addAFM("Arial-ItalicMT", "Helvetica-Oblique"); + addAFM("Arial-BoldMT", "Helvetica-Bold"); + addAFM("Arial-BoldItalicMT", "Helvetica-BoldOblique"); } catch (IOException e) { diff --git a/src/main/java/org/sejda/sambox/pdmodel/font/Subsettable.java b/src/main/java/org/sejda/sambox/pdmodel/font/Subsettable.java new file mode 100644 index 0000000000000000000000000000000000000000..a2833bb51876857be4567a38e744d531e98e82b4 --- /dev/null +++ b/src/main/java/org/sejda/sambox/pdmodel/font/Subsettable.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.sejda.sambox.pdmodel.font; + +import java.io.IOException; + +/** + * A subsettable + * + * @author Andrea Vacondio + */ +public interface Subsettable +{ + /** + * Adds the given Unicode point to the subset. + * + * @param codePoint Unicode code point + */ + void addToSubset(int codePoint); + + /** + * Replaces this font with a subset containing only the given Unicode characters. + * + * @throws IOException if the subset could not be written + */ + void subset() throws IOException; + + /** + * @return true if this font will be subset when embedded. + */ + boolean willBeSubset(); +} diff --git a/src/main/java/org/sejda/sambox/pdmodel/font/TrueTypeEmbedder.java b/src/main/java/org/sejda/sambox/pdmodel/font/TrueTypeEmbedder.java index f67d11cbac31421b66beaf5eb7db76a7799ded30..f76146b7cf933bc20e6c46ed5cc77536612a0eb4 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/font/TrueTypeEmbedder.java +++ b/src/main/java/org/sejda/sambox/pdmodel/font/TrueTypeEmbedder.java @@ -17,17 +17,20 @@ package org.sejda.sambox.pdmodel.font; +import static org.sejda.sambox.pdmodel.font.FontUtils.getTag; +import static org.sejda.sambox.pdmodel.font.FontUtils.isEmbeddingPermitted; +import static org.sejda.sambox.pdmodel.font.FontUtils.isSubsettingPermitted; + import java.awt.geom.GeneralPath; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; -import java.util.ArrayList; +import java.util.Arrays; import java.util.HashSet; -import java.util.List; import java.util.Map; import java.util.Set; - +import org.apache.fontbox.ttf.CmapLookup; import org.apache.fontbox.ttf.CmapSubtable; import org.apache.fontbox.ttf.HeaderTable; import org.apache.fontbox.ttf.HorizontalHeaderTable; @@ -41,7 +44,6 @@ import org.sejda.sambox.cos.COSName; import org.sejda.sambox.pdmodel.common.PDRectangle; import org.sejda.sambox.pdmodel.common.PDStream; import org.sejda.util.IOUtils; - /** * Common functionality for embedding TrueType fonts. * @@ -52,11 +54,13 @@ abstract class TrueTypeEmbedder implements Subsetter { private static final int ITALIC = 1; private static final int OBLIQUE = 512; - private static final String BASE25 = "BCDEFGHIJKLMNOPQRSTUVWXYZ"; protected TrueTypeFont ttf; protected PDFontDescriptor fontDescriptor; + @Deprecated protected final CmapSubtable cmap; + + protected final CmapLookup cmapLookup; private final Set<Integer> subsetCodePoints = new HashSet<>(); private final boolean embedSubset; @@ -86,6 +90,7 @@ abstract class TrueTypeEmbedder implements Subsetter // choose a Unicode "cmap" cmap = ttf.getUnicodeCmap(); + cmapLookup = ttf.getUnicodeCmapLookup(); } public void buildFontFile2(InputStream ttfStream) throws IOException @@ -115,48 +120,6 @@ abstract class TrueTypeEmbedder implements Subsetter fontDescriptor.setFontFile2(stream); } - /** - * Returns true if the fsType in the OS/2 table permits embedding. - */ - private boolean isEmbeddingPermitted(TrueTypeFont ttf) throws IOException - { - if (ttf.getOS2Windows() != null) - { - int fsType = ttf.getOS2Windows().getFsType(); - int exclusive = fsType & 0x8; // bits 0-3 are a set of exclusive bits - - if ((exclusive - & OS2WindowsMetricsTable.FSTYPE_RESTRICTED) == OS2WindowsMetricsTable.FSTYPE_RESTRICTED) - { - // restricted License embedding - return false; - } - else if ((exclusive - & OS2WindowsMetricsTable.FSTYPE_BITMAP_ONLY) == OS2WindowsMetricsTable.FSTYPE_BITMAP_ONLY) - { - // bitmap embedding only - return false; - } - } - return true; - } - - /** - * Returns true if the fsType in the OS/2 table permits subsetting. - */ - private boolean isSubsettingPermitted(TrueTypeFont ttf) throws IOException - { - if (ttf.getOS2Windows() != null) - { - int fsType = ttf.getOS2Windows().getFsType(); - if ((fsType - & OS2WindowsMetricsTable.FSTYPE_NO_SUBSETTING) == OS2WindowsMetricsTable.FSTYPE_NO_SUBSETTING) - { - return false; - } - } - return true; - } /** * Creates a new font descriptor dictionary for the given TTF. @@ -288,21 +251,9 @@ abstract class TrueTypeEmbedder implements Subsetter } // PDF spec required tables (if present), all others will be removed - List<String> tables = new ArrayList<String>(); - tables.add("head"); - tables.add("hhea"); - tables.add("loca"); - tables.add("maxp"); - tables.add("cvt "); - tables.add("prep"); - tables.add("glyf"); - tables.add("hmtx"); - tables.add("fpgm"); - // Windows ClearType - tables.add("gasp"); - // set the GIDs to subset - TTFSubsetter subsetter = new TTFSubsetter(ttf, tables); + TTFSubsetter subsetter = new TTFSubsetter(ttf, Arrays.asList("head", "hhea", "loca", "maxp", + "cvt", "prep", "glyf", "hmtx", "fpgm", "gasp")); subsetter.addAll(subsetCodePoints); // calculate deterministic tag based on the chosen subset @@ -320,7 +271,7 @@ abstract class TrueTypeEmbedder implements Subsetter } /** - * Returns true if the font needs to be subset. + * @return true if the font needs to be subset. */ public boolean needsSubset() { @@ -328,36 +279,10 @@ abstract class TrueTypeEmbedder implements Subsetter } /** - * Rebuild a font subset. + * @return a font subset. */ protected abstract void buildSubset(InputStream ttfSubset, String tag, Map<Integer, Integer> gidToCid) throws IOException; - /** - * Returns an uppercase 6-character unique tag for the given subset. - */ - public String getTag(Map<Integer, Integer> gidToCid) - { - // deterministic - long num = gidToCid.hashCode(); - - // base25 encode - StringBuilder sb = new StringBuilder(); - do - { - long div = num / 25; - int mod = (int) (num % 25); - sb.append(BASE25.charAt(mod)); - num = div; - } while (num != 0 && sb.length() < 6); - - // pad - while (sb.length() < 6) - { - sb.insert(0, 'A'); - } - sb.append('+'); - return sb.toString(); - } } diff --git a/src/main/java/org/sejda/sambox/pdmodel/font/encoding/GlyphList.java b/src/main/java/org/sejda/sambox/pdmodel/font/encoding/GlyphList.java index 5270c3d5c067daa1259e4c57b36ee49eaddb4c14..024854bfbf8644b7fb56f7ec64ebda82492eb7ff 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/font/encoding/GlyphList.java +++ b/src/main/java/org/sejda/sambox/pdmodel/font/encoding/GlyphList.java @@ -16,12 +16,15 @@ */ package org.sejda.sambox.pdmodel.font.encoding; +import static java.util.Objects.nonNull; + import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.HashMap; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -95,7 +98,7 @@ public final class GlyphList private final Map<String, String> unicodeToName; // additional read/write cache for uniXXXX names - private final Map<String, String> uniNameToUnicodeCache = new HashMap<>(); + private final Map<String, String> uniNameToUnicodeCache = new ConcurrentHashMap<>(); /** * Creates a new GlyphList from a glyph list file. @@ -283,7 +286,11 @@ public final class GlyphList LOG.warn("Not a number in Unicode character name: {}", name); } } - uniNameToUnicodeCache.put(name, unicode); + if (nonNull(unicode)) + { + // null value not allowed in ConcurrentHashMap + uniNameToUnicodeCache.put(name, unicode); + } } return unicode; } diff --git a/src/main/java/org/sejda/sambox/pdmodel/graphics/blend/BlendComposite.java b/src/main/java/org/sejda/sambox/pdmodel/graphics/blend/BlendComposite.java index 7c312245a25391dadf3663ff1d838b51599af87a..ab2cf9849b1b61cebfc78ac470d7d489db1cec50 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/graphics/blend/BlendComposite.java +++ b/src/main/java/org/sejda/sambox/pdmodel/graphics/blend/BlendComposite.java @@ -35,9 +35,6 @@ import org.slf4j.LoggerFactory; */ public final class BlendComposite implements Composite { - /** - * Log instance. - */ private static final Logger LOG = LoggerFactory.getLogger(BlendComposite.class); /** @@ -45,6 +42,7 @@ public final class BlendComposite implements Composite * * @param blendMode Desired blend mode * @param constantAlpha Constant alpha, must be in the inclusive range [0.0...1.0] or it will be clipped. + * @return a blend composite. */ public static Composite getInstance(BlendMode blendMode, float constantAlpha) { @@ -62,11 +60,12 @@ public final class BlendComposite implements Composite { return AlphaComposite.getInstance(AlphaComposite.SRC_OVER, constantAlpha); } - return new BlendComposite(blendMode, constantAlpha); + else + { + return new BlendComposite(blendMode, constantAlpha); + } } - // TODO - non-separable blending modes - private final BlendMode blendMode; private final float constantAlpha; @@ -81,21 +80,18 @@ public final class BlendComposite implements Composite public CompositeContext createContext(ColorModel srcColorModel, ColorModel dstColorModel, RenderingHints hints) { - return new BlendCompositeContext(srcColorModel, dstColorModel, hints); + return new BlendCompositeContext(srcColorModel, dstColorModel); } class BlendCompositeContext implements CompositeContext { private final ColorModel srcColorModel; private final ColorModel dstColorModel; - private final RenderingHints hints; - BlendCompositeContext(ColorModel srcColorModel, ColorModel dstColorModel, - RenderingHints hints) + BlendCompositeContext(ColorModel srcColorModel, ColorModel dstColorModel) { this.srcColorModel = srcColorModel; this.dstColorModel = dstColorModel; - this.hints = hints; } @Override @@ -127,13 +123,16 @@ public final class BlendComposite implements Composite int numDstComponents = dstIn.getNumBands(); boolean dstHasAlpha = (numDstComponents > numDstColorComponents); - int colorSpaceType = dstColorSpace.getType(); - boolean subtractive = (colorSpaceType != ColorSpace.TYPE_RGB) - && (colorSpaceType != ColorSpace.TYPE_GRAY); + int srcColorSpaceType = srcColorSpace.getType(); + int dstColorSpaceType = dstColorSpace.getType(); + boolean subtractive = (dstColorSpaceType != ColorSpace.TYPE_RGB) + && (dstColorSpaceType != ColorSpace.TYPE_GRAY); boolean blendModeIsSeparable = blendMode instanceof SeparableBlendMode; SeparableBlendMode separableBlendMode = blendModeIsSeparable ? (SeparableBlendMode) blendMode : null; + NonSeparableBlendMode nonSeparableBlendMode = !blendModeIsSeparable + ? (NonSeparableBlendMode) blendMode : null; boolean needsColorConversion = !srcColorSpace.equals(dstColorSpace); @@ -146,6 +145,8 @@ public final class BlendComposite implements Composite float[] srcColor = new float[numSrcColorComponents]; float[] srcConverted; + float[] dstConverted; + float[] rgbResult = blendModeIsSeparable ? null : new float[dstHasAlpha ? 4 : 3]; for (int y = y0; y < y1; y++) { @@ -167,21 +168,21 @@ public final class BlendComposite implements Composite float resultAlpha = dstAlpha + srcAlpha - srcAlpha * dstAlpha; float srcAlphaRatio = (resultAlpha > 0) ? srcAlpha / resultAlpha : 0; - // convert color - System.arraycopy(srcComponents, 0, srcColor, 0, numSrcColorComponents); - if (needsColorConversion) - { - // TODO - very very slow - Hash results??? - float[] cieXYZ = srcColorSpace.toCIEXYZ(srcColor); - srcConverted = dstColorSpace.fromCIEXYZ(cieXYZ); - } - else - { - srcConverted = srcColor; - } - if (separableBlendMode != null) { + // convert color + System.arraycopy(srcComponents, 0, srcColor, 0, numSrcColorComponents); + if (needsColorConversion) + { + // TODO - very very slow - Hash results??? + float[] cieXYZ = srcColorSpace.toCIEXYZ(srcColor); + srcConverted = dstColorSpace.fromCIEXYZ(cieXYZ); + } + else + { + srcConverted = srcColor; + } + for (int k = 0; k < numDstColorComponents; k++) { float srcValue = srcConverted[k]; @@ -207,7 +208,50 @@ public final class BlendComposite implements Composite } else { - // TODO - nonseparable modes + // Nonseparable blend modes are computed in RGB color space. + // TODO - CMYK color spaces need special treatment. + + if (srcColorSpaceType == ColorSpace.TYPE_RGB) + { + srcConverted = srcComponents; + } + else + { + srcConverted = srcColorSpace.toRGB(srcComponents); + } + + if (dstColorSpaceType == ColorSpace.TYPE_RGB) + { + dstConverted = dstComponents; + } + else + { + dstConverted = dstColorSpace.toRGB(dstComponents); + } + + nonSeparableBlendMode.blend(srcConverted, dstConverted, rgbResult); + + for (int k = 0; k < 3; k++) + { + float srcValue = srcConverted[k]; + float dstValue = dstConverted[k]; + float value = rgbResult[k]; + value = Math.max(Math.min(value, 1.0f), 0.0f); + value = srcValue + dstAlpha * (value - srcValue); + value = dstValue + srcAlphaRatio * (value - dstValue); + rgbResult[k] = value; + } + + if (dstColorSpaceType == ColorSpace.TYPE_RGB) + { + System.arraycopy(rgbResult, 0, dstComponents, 0, dstComponents.length); + } + else + { + float[] temp = dstColorSpace.fromRGB(rgbResult); + System.arraycopy(temp, 0, dstComponents, 0, + Math.min(dstComponents.length, temp.length)); + } } if (dstHasAlpha) @@ -220,10 +264,5 @@ public final class BlendComposite implements Composite } } } - - public RenderingHints getHints() - { - return hints; - } } } diff --git a/src/main/java/org/sejda/sambox/pdmodel/graphics/blend/BlendMode.java b/src/main/java/org/sejda/sambox/pdmodel/graphics/blend/BlendMode.java index f16c03f665e0d7007f1cbd15301dded0d007c84c..754db3bec66761f291d78025aee6d5d3943895bf 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/graphics/blend/BlendMode.java +++ b/src/main/java/org/sejda/sambox/pdmodel/graphics/blend/BlendMode.java @@ -30,42 +30,6 @@ import org.sejda.sambox.cos.COSName; */ public abstract class BlendMode { - BlendMode() - { - } - /** - * Determines the blend mode from the BM entry in the COS ExtGState. - * - * @param cosBlendMode name or array - * @return blending mode - */ - public static BlendMode getInstance(COSBase cosBlendMode) - { - BlendMode result = null; - if (cosBlendMode instanceof COSName) - { - result = BLEND_MODES.get(cosBlendMode); - } - else if (cosBlendMode instanceof COSArray) - { - COSArray cosBlendModeArray = (COSArray) cosBlendMode; - for (int i = 0; i < cosBlendModeArray.size(); i++) - { - result = BLEND_MODES.get(cosBlendModeArray.getObject(i)); - if (result != null) - { - break; - } - } - } - - if (result != null) - { - return result; - } - return BlendMode.COMPATIBLE; - } - public static final SeparableBlendMode NORMAL = new SeparableBlendMode() { @Override @@ -128,7 +92,16 @@ public abstract class BlendMode @Override public float blendChannel(float srcValue, float dstValue) { - return (srcValue < 1) ? Math.min(1, dstValue / (1 - srcValue)) : 1; + // See PDF 2.0 specification + if (dstValue == 0) + { + return 0; + } + if (dstValue >= 1 - srcValue) + { + return 1; + } + return dstValue / (1 - srcValue); } }; @@ -137,7 +110,16 @@ public abstract class BlendMode @Override public float blendChannel(float srcValue, float dstValue) { - return (srcValue > 0) ? 1 - Math.min(1, (1 - dstValue) / srcValue) : 0; + // See PDF 2.0 specification + if (dstValue == 1) + { + return 1; + } + if (1 - dstValue >= srcValue) + { + return 0; + } + return 1 - (1 - dstValue) / srcValue; } }; @@ -184,14 +166,215 @@ public abstract class BlendMode } }; - // this map *must* come after the declarations above, otherwise its values will be null + public static final NonSeparableBlendMode HUE = new NonSeparableBlendMode() + { + @Override + public void blend(float[] srcValues, float[] dstValues, float[] result) + { + float[] temp = new float[3]; + getSaturationRGB(dstValues, srcValues, temp); + getLuminosityRGB(dstValues, temp, result); + } + }; + + public static final NonSeparableBlendMode SATURATION = new NonSeparableBlendMode() + { + @Override + public void blend(float[] srcValues, float[] dstValues, float[] result) + { + getSaturationRGB(srcValues, dstValues, result); + } + }; + + public static final NonSeparableBlendMode COLOR = new NonSeparableBlendMode() + { + @Override + public void blend(float[] srcValues, float[] dstValues, float[] result) + { + getLuminosityRGB(dstValues, srcValues, result); + } + }; + + public static final NonSeparableBlendMode LUMINOSITY = new NonSeparableBlendMode() + { + @Override + public void blend(float[] srcValues, float[] dstValues, float[] result) + { + getLuminosityRGB(srcValues, dstValues, result); + } + }; + + // these maps *must* come after the BlendMode.* constant declarations, otherwise their values would be null private static final Map<COSName, BlendMode> BLEND_MODES = createBlendModeMap(); + BlendMode() + { + } + + /** + * Determines the blend mode from the BM entry in the COS ExtGState. + * + * @param cosBlendMode name or array + * @return blending mode + */ + public static BlendMode getInstance(COSBase cosBlendMode) + { + BlendMode result = null; + if (cosBlendMode instanceof COSName) + { + result = BLEND_MODES.get(cosBlendMode); + } + else if (cosBlendMode instanceof COSArray) + { + COSArray cosBlendModeArray = (COSArray) cosBlendMode; + for (int i = 0; i < cosBlendModeArray.size(); i++) + { + result = BLEND_MODES.get(cosBlendModeArray.getObject(i)); + if (result != null) + { + break; + } + } + } + + if (result != null) + { + return result; + } + return BlendMode.NORMAL; + } + + private static int get255Value(float val) + { + return (int) Math.floor(val >= 1.0 ? 255 : val * 255.0); + } + + private static void getSaturationRGB(float[] srcValues, float[] dstValues, float[] result) + { + int minb; + int maxb; + int mins; + int maxs; + int y; + int scale; + int r; + int g; + int b; + + int rd = get255Value(dstValues[0]); + int gd = get255Value(dstValues[1]); + int bd = get255Value(dstValues[2]); + int rs = get255Value(srcValues[0]); + int gs = get255Value(srcValues[1]); + int bs = get255Value(srcValues[2]); + + minb = Math.min(rd, Math.min(gd, bd)); + maxb = Math.max(rd, Math.max(gd, bd)); + if (minb == maxb) + { + /* backdrop has zero saturation, avoid divide by 0 */ + result[0] = gd / 255.0f; + result[1] = gd / 255.0f; + result[2] = gd / 255.0f; + return; + } + + mins = Math.min(rs, Math.min(gs, bs)); + maxs = Math.max(rs, Math.max(gs, bs)); + + scale = ((maxs - mins) << 16) / (maxb - minb); + y = (rd * 77 + gd * 151 + bd * 28 + 0x80) >> 8; + r = y + ((((rd - y) * scale) + 0x8000) >> 16); + g = y + ((((gd - y) * scale) + 0x8000) >> 16); + b = y + ((((bd - y) * scale) + 0x8000) >> 16); + + if (((r | g | b) & 0x100) == 0x100) + { + int scalemin; + int scalemax; + int min; + int max; + + min = Math.min(r, Math.min(g, b)); + max = Math.max(r, Math.max(g, b)); + + if (min < 0) + { + scalemin = (y << 16) / (y - min); + } + else + { + scalemin = 0x10000; + } + + if (max > 255) + { + scalemax = ((255 - y) << 16) / (max - y); + } + else + { + scalemax = 0x10000; + } + + scale = Math.min(scalemin, scalemax); + r = y + (((r - y) * scale + 0x8000) >> 16); + g = y + (((g - y) * scale + 0x8000) >> 16); + b = y + (((b - y) * scale + 0x8000) >> 16); + } + result[0] = r / 255.0f; + result[1] = g / 255.0f; + result[2] = b / 255.0f; + } + + private static void getLuminosityRGB(float[] srcValues, float[] dstValues, float[] result) + { + int delta; + int scale; + int r; + int g; + int b; + int y; + int rd = get255Value(dstValues[0]); + int gd = get255Value(dstValues[1]); + int bd = get255Value(dstValues[2]); + int rs = get255Value(srcValues[0]); + int gs = get255Value(srcValues[1]); + int bs = get255Value(srcValues[2]); + delta = ((rs - rd) * 77 + (gs - gd) * 151 + (bs - bd) * 28 + 0x80) >> 8; + r = rd + delta; + g = gd + delta; + b = bd + delta; + + if (((r | g | b) & 0x100) == 0x100) + { + y = (rs * 77 + gs * 151 + bs * 28 + 0x80) >> 8; + if (delta > 0) + { + int max; + max = Math.max(r, Math.max(g, b)); + scale = max == y ? 0 : ((255 - y) << 16) / (max - y); + } + else + { + int min; + min = Math.min(r, Math.min(g, b)); + scale = y == min ? 0 : (y << 16) / (y - min); + } + r = y + (((r - y) * scale + 0x8000) >> 16); + g = y + (((g - y) * scale + 0x8000) >> 16); + b = y + (((b - y) * scale + 0x8000) >> 16); + } + result[0] = r / 255.0f; + result[1] = g / 255.0f; + result[2] = b / 255.0f; + } + private static Map<COSName, BlendMode> createBlendModeMap() { Map<COSName, BlendMode> map = new HashMap<>(13); map.put(COSName.NORMAL, BlendMode.NORMAL); - map.put(COSName.COMPATIBLE, BlendMode.COMPATIBLE); + // BlendMode.COMPATIBLE should not be used + map.put(COSName.COMPATIBLE, BlendMode.NORMAL); map.put(COSName.MULTIPLY, BlendMode.MULTIPLY); map.put(COSName.SCREEN, BlendMode.SCREEN); map.put(COSName.OVERLAY, BlendMode.OVERLAY); @@ -203,7 +386,10 @@ public abstract class BlendMode map.put(COSName.SOFT_LIGHT, BlendMode.SOFT_LIGHT); map.put(COSName.DIFFERENCE, BlendMode.DIFFERENCE); map.put(COSName.EXCLUSION, BlendMode.EXCLUSION); - // TODO - non-separable blending modes + map.put(COSName.HUE, BlendMode.HUE); + map.put(COSName.SATURATION, BlendMode.SATURATION); + map.put(COSName.LUMINOSITY, BlendMode.LUMINOSITY); + map.put(COSName.COLOR, BlendMode.COLOR); return map; } } diff --git a/src/main/java/org/sejda/sambox/pdmodel/graphics/color/PDCalGray.java b/src/main/java/org/sejda/sambox/pdmodel/graphics/color/PDCalGray.java index dcdaeeb635d90b87450a699008ff9b30a22c21d9..04d07cf549fb28b7933ae16384e6abf53ffb4149 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/graphics/color/PDCalGray.java +++ b/src/main/java/org/sejda/sambox/pdmodel/graphics/color/PDCalGray.java @@ -21,6 +21,9 @@ import org.sejda.sambox.cos.COSFloat; import org.sejda.sambox.cos.COSName; import org.sejda.sambox.cos.COSNumber; +import java.util.HashMap; +import java.util.Map; + /** * A CalGray colour space is a special case of a single-component CIE-based * colour space. @@ -32,6 +35,11 @@ public final class PDCalGray extends PDCIEDictionaryBasedColorSpace { private final PDColor initialColor = new PDColor(new float[] { 0 }, this); + // PDFBOX-4119: cache the results for much improved performance + // cached values MUST be cloned, because they are modified by the caller. + // this can be observed in rendering of PDFBOX-1724 + private final Map<Float, float[]> map1 = new HashMap<Float, float[]>(); + /** * Create a new CalGray color space. */ @@ -77,13 +85,20 @@ public final class PDCalGray extends PDCIEDictionaryBasedColorSpace @Override public float[] toRGB(float[] value) { - // see implementation of toRGB in PDCabRGB, and PDFBOX-2971 + // see implementation of toRGB in PDCalRGB, and PDFBOX-2971 if (wpX == 1 && wpY == 1 && wpZ == 1) { float a = value[0]; + float[] result = map1.get(a); + if (result != null) + { + return result.clone(); + } float gamma = getGamma(); float powAG = (float) Math.pow(a, gamma); - return convXYZtoRGB(powAG, powAG, powAG); + result = convXYZtoRGB(powAG, powAG, powAG); + map1.put(a, result.clone()); + return result; } else { diff --git a/src/main/java/org/sejda/sambox/pdmodel/graphics/color/PDColorSpace.java b/src/main/java/org/sejda/sambox/pdmodel/graphics/color/PDColorSpace.java index e9bbea516350cfea5106569784dcc4dc77adc1b5..1aabafec762d1aa87fbb15416e19977c63526a99 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/graphics/color/PDColorSpace.java +++ b/src/main/java/org/sejda/sambox/pdmodel/graphics/color/PDColorSpace.java @@ -25,13 +25,10 @@ import java.awt.image.ComponentColorModel; import java.awt.image.WritableRaster; import java.io.IOException; -import org.sejda.sambox.cos.COSArray; -import org.sejda.sambox.cos.COSBase; -import org.sejda.sambox.cos.COSDictionary; -import org.sejda.sambox.cos.COSName; -import org.sejda.sambox.cos.COSObjectable; +import org.sejda.sambox.cos.*; import org.sejda.sambox.pdmodel.MissingResourceException; import org.sejda.sambox.pdmodel.PDResources; +import org.sejda.sambox.pdmodel.ResourceCache; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -54,7 +51,7 @@ public abstract class PDColorSpace implements COSObjectable */ public static PDColorSpace create(COSBase colorSpace) throws IOException { - return create(colorSpace, null); + return create(colorSpace, null, false); } /** @@ -72,6 +69,29 @@ public abstract class PDColorSpace implements COSObjectable return create(colorSpace, resources, false); } + public static PDColorSpace create(COSBase colorSpace, PDResources resources, boolean wasDefault) throws IOException { + boolean canCache = colorSpace.hasId() && resources != null && resources.getResourceCache() != null; + if(canCache) { + ResourceCache cache = resources.getResourceCache(); + PDColorSpace existing = cache.getColorSpace(colorSpace.id().objectIdentifier); + if(existing != null) { + LOG.debug("Using cached color space for {}", colorSpace.id().objectIdentifier); + return existing; + } + } + + PDColorSpace result = createUncached(colorSpace, resources, wasDefault); + + if(colorSpace.hasId() && resources != null) { + ResourceCache cache = resources.getResourceCache(); + if(cache != null) { + cache.put(colorSpace.id().objectIdentifier, result); + } + } + + return result; + } + /** * Creates a color space given a name or array. Abbreviated device color names are not supported here, please * replace them first. This method is for PDFBox internal use only, others should use {@link create(COSBase, @@ -84,7 +104,7 @@ public abstract class PDColorSpace implements COSObjectable * @throws MissingResourceException if the color space is missing in the resources dictionary * @throws IOException if the color space is unknown or cannot be created. */ - public static PDColorSpace create(COSBase colorSpace, PDResources resources, boolean wasDefault) + private static PDColorSpace createUncached(COSBase colorSpace, PDResources resources, boolean wasDefault) throws IOException { colorSpace = colorSpace.getCOSObject(); @@ -204,7 +224,7 @@ public abstract class PDColorSpace implements COSObjectable || name == COSName.DEVICEGRAY) { // not allowed in an array, but we sometimes encounter these regardless - return create(name, resources, wasDefault); + return createUncached(name, resources, wasDefault); } else { @@ -217,7 +237,7 @@ public abstract class PDColorSpace implements COSObjectable if (csAsDic.containsKey(COSName.COLORSPACE)) { LOG.warn("Found invalid color space defined as dictionary {}", csAsDic); - return create(csAsDic.getDictionaryObject(COSName.COLORSPACE), resources, + return createUncached(csAsDic.getDictionaryObject(COSName.COLORSPACE), resources, wasDefault); } } diff --git a/src/main/java/org/sejda/sambox/pdmodel/graphics/color/PDDeviceN.java b/src/main/java/org/sejda/sambox/pdmodel/graphics/color/PDDeviceN.java index 8bfa9af11106a8b5b85044e968be21a6f46865d1..d19220e5c42372d199fb25522922dc9f537a7610 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/graphics/color/PDDeviceN.java +++ b/src/main/java/org/sejda/sambox/pdmodel/graphics/color/PDDeviceN.java @@ -280,9 +280,9 @@ public class PDDeviceN extends PDSpecialColorSpace // private BufferedImage toRGBWithTintTransform(WritableRaster raster) throws IOException { - // map only in use if one color component - Map<Float, int[]> map1 = new HashMap<>(); - float key = 0; + // cache color mappings + Map<String, int[]> map1 = new HashMap<String, int[]>(); + String key = null; int width = raster.getWidth(); int height = raster.getHeight(); @@ -299,16 +299,17 @@ public class PDDeviceN extends PDSpecialColorSpace for (int x = 0; x < width; x++) { raster.getPixel(x, y, src); - if (numSrcComponents == 1) + // use a string representation as key + key = Float.toString(src[0]); + for (int s = 1; s < numSrcComponents; s++) { - int[] pxl = map1.get(src[0]); - if (pxl != null) - { - rgbRaster.setPixel(x, y, pxl); - continue; - } - // need to remember key because src is modified - key = src[0]; + key += "#" + Float.toString(src[s]); + } + int[] pxl = map1.get(key); + if (pxl != null) + { + rgbRaster.setPixel(x, y, pxl); + continue; } // scale to 0..1 @@ -327,14 +328,11 @@ public class PDDeviceN extends PDSpecialColorSpace { // scale to 0..255 rgb[s] = (int) (rgbFloat[s] * 255f); - } - - if (numSrcComponents == 1) - { - // must clone because rgb is reused - map1.put(key, rgb.clone()); } + // must clone because rgb is reused + map1.put(key, rgb.clone()); + rgbRaster.setPixel(x, y, rgb); } } diff --git a/src/main/java/org/sejda/sambox/pdmodel/graphics/color/PDDeviceRGB.java b/src/main/java/org/sejda/sambox/pdmodel/graphics/color/PDDeviceRGB.java index e32f57fba48c59d2aa033954dd4d91952979aa3e..f31f8cc196966d215ed4d03c629e6f7ecb69bf74 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/graphics/color/PDDeviceRGB.java +++ b/src/main/java/org/sejda/sambox/pdmodel/graphics/color/PDDeviceRGB.java @@ -16,15 +16,15 @@ */ package org.sejda.sambox.pdmodel.graphics.color; -import java.awt.Transparency; import java.awt.color.ColorSpace; import java.awt.image.BufferedImage; -import java.awt.image.ColorModel; -import java.awt.image.ComponentColorModel; import java.awt.image.WritableRaster; import java.io.IOException; +import java.util.StringTokenizer; import org.sejda.sambox.cos.COSName; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Colours in the DeviceRGB colour space are specified according to the additive @@ -38,6 +38,8 @@ public final class PDDeviceRGB extends PDDeviceColorSpace /** This is the single instance of this class. */ public static final PDDeviceRGB INSTANCE = new PDDeviceRGB(); + private static final Logger LOG = LoggerFactory.getLogger(PDDeviceRGB.class); + private final PDColor initialColor = new PDColor(new float[] { 0, 0, 0 }, this); private volatile ColorSpace awtColorSpace; @@ -55,6 +57,9 @@ public final class PDDeviceRGB extends PDDeviceColorSpace { return; } + + suggestKCMS(); + synchronized (this) { // we might have been waiting for another thread, so check again @@ -108,32 +113,57 @@ public final class PDDeviceRGB extends PDDeviceColorSpace public BufferedImage toRGBImage(WritableRaster raster) throws IOException { init(); - ColorModel colorModel = new ComponentColorModel(awtColorSpace, - false, false, Transparency.OPAQUE, raster.getDataBuffer().getDataType()); - - BufferedImage image = new BufferedImage(colorModel, raster, false, null); - // // WARNING: this method is performance sensitive, modify with care! // - // Please read PDFBOX-3854 and look at the related commits first. + // Please read PDFBOX-3854 and PDFBOX-2092 and look at the related commits first. // The current code returns TYPE_INT_RGB images which prevents slowness due to threads // blocking each other when TYPE_CUSTOM images are used. - // ColorConvertOp is not used here because it has a larger memory footprint and no further - // performance improvement. - // The multiparameter setRGB() call is not used because it brings no improvement. - - BufferedImage dest = new BufferedImage(image.getWidth(), image.getHeight(), - BufferedImage.TYPE_INT_RGB); - int width = image.getWidth(); - int height = image.getHeight(); - for (int x = 0; x < width; ++x) + BufferedImage image = new BufferedImage(raster.getWidth(), raster.getHeight(), BufferedImage.TYPE_INT_RGB); + image.setData(raster); + return image; + } + + private static void suggestKCMS() + { + String cmmProperty = System.getProperty("sun.java2d.cmm"); + if (isMinJdk8() && !"sun.java2d.cmm.kcms.KcmsServiceProvider".equals(cmmProperty)) + { + try + { + // Make sure that class exists + Class.forName("sun.java2d.cmm.kcms.KcmsServiceProvider"); + + LOG.info("To get higher rendering speed on JDK8 or later,"); + LOG.info(" use the option -Dsun.java2d.cmm=sun.java2d.cmm.kcms.KcmsServiceProvider"); + LOG.info(" or call System.setProperty(\"sun.java2d.cmm\", \"sun.java2d.cmm.kcms.KcmsServiceProvider\")"); + } + catch (ClassNotFoundException e) + { + LOG.debug("KCMS doesn't exist anymore. SO SAD!"); + } + } + } + + private static boolean isMinJdk8() + { + // strategy from lucene-solr/lucene/core/src/java/org/apache/lucene/util/Constants.java + String version = System.getProperty("java.specification.version"); + final StringTokenizer st = new StringTokenizer(version, "."); + try { - for (int y = 0; y < height; ++y) + int major = Integer.parseInt(st.nextToken()); + int minor = 0; + if (st.hasMoreTokens()) { - dest.setRGB(x, y, image.getRGB(x, y)); + minor = Integer.parseInt(st.nextToken()); } + return major > 1 || (major == 1 && minor >= 8); + } + catch (NumberFormatException nfe) + { + // maybe some new numbering scheme in the 22nd century + return true; } - return dest; } } diff --git a/src/main/java/org/sejda/sambox/pdmodel/graphics/color/PDICCBased.java b/src/main/java/org/sejda/sambox/pdmodel/graphics/color/PDICCBased.java index 34529fba0f2197ec47f0fe93ffc2327bdb57719d..f0b4baddbe03b237e4941dfcb2343874bdd2f467 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/graphics/color/PDICCBased.java +++ b/src/main/java/org/sejda/sambox/pdmodel/graphics/color/PDICCBased.java @@ -19,12 +19,15 @@ package org.sejda.sambox.pdmodel.graphics.color; import static org.sejda.util.RequireUtils.requireIOCondition; import java.awt.Color; +import java.awt.Transparency; import java.awt.color.CMMException; import java.awt.color.ColorSpace; import java.awt.color.ICC_ColorSpace; import java.awt.color.ICC_Profile; import java.awt.color.ProfileDataException; import java.awt.image.BufferedImage; +import java.awt.image.ComponentColorModel; +import java.awt.image.DataBuffer; import java.awt.image.WritableRaster; import java.io.IOException; import java.io.InputStream; @@ -131,6 +134,7 @@ public final class PDICCBased extends PDCIEBasedColorSpace } else { + profile = ensureDisplayProfile(profile); awtColorSpace = new ICC_ColorSpace(profile); iccProfile = profile; } @@ -149,29 +153,30 @@ public final class PDICCBased extends PDCIEBasedColorSpace awtColorSpace.toRGB(new float[awtColorSpace.getNumComponents()]); // this one triggers an exception for PDFBOX-3549 with KCMS new Color(awtColorSpace, new float[getNumberOfComponents()], 1f); + // PDFBOX-4015: this one triggers "CMMException: LCMS error 13" with LCMS + new ComponentColorModel(awtColorSpace, false, false, Transparency.OPAQUE, + DataBuffer.TYPE_BYTE); } } - catch (RuntimeException e) + catch (ProfileDataException e) { - if (e instanceof ProfileDataException || e instanceof CMMException - || e instanceof IllegalArgumentException - || e instanceof ArrayIndexOutOfBoundsException) - { - // fall back to alternateColorSpace color space - awtColorSpace = null; - alternateColorSpace = getAlternateColorSpace(); - if (alternateColorSpace.equals(PDDeviceRGB.INSTANCE)) - { - isRGB = true; - } - LOG.warn("Can't read embedded ICC profile (" + e.getLocalizedMessage() - + "), using alternate color space: " + alternateColorSpace.getName()); - initialColor = alternateColorSpace.getInitialColor(); - } - else - { - throw e; - } + fallbackToAlternateColorSpace(e); + } + catch (CMMException e) + { + fallbackToAlternateColorSpace(e); + } + catch (IllegalArgumentException e) + { + fallbackToAlternateColorSpace(e); + } + catch (ArrayIndexOutOfBoundsException e) + { + fallbackToAlternateColorSpace(e); + } + catch (IOException e) + { + fallbackToAlternateColorSpace(e); } finally { @@ -179,6 +184,19 @@ public final class PDICCBased extends PDCIEBasedColorSpace } } + private void fallbackToAlternateColorSpace(Exception e) throws IOException + { + awtColorSpace = null; + alternateColorSpace = getAlternateColorSpace(); + if (alternateColorSpace.equals(PDDeviceRGB.INSTANCE)) + { + isRGB = true; + } + LOG.warn("Can't read embedded ICC profile (" + e.getLocalizedMessage() + + "), using alternate color space: " + alternateColorSpace.getName()); + initialColor = alternateColorSpace.getInitialColor(); + } + /** * Returns true if the given profile is represents sRGB. */ @@ -190,6 +208,33 @@ public final class PDICCBased extends PDCIEBasedColorSpace return deviceModel.equals("sRGB"); } + // PDFBOX-4114: fix profile that has the wrong display class, + // as done by Harald Kuhr in twelvemonkeys JPEGImageReader.ensureDisplayProfile() + private static ICC_Profile ensureDisplayProfile(ICC_Profile profile) + { + if (profile.getProfileClass() != ICC_Profile.CLASS_DISPLAY) + { + byte[] profileData = profile.getData(); // Need to clone entire profile, due to a OpenJDK bug + + if (profileData[ICC_Profile.icHdrRenderingIntent] == ICC_Profile.icPerceptual) + { + LOG.warn("ICC profile is Perceptual, ignoring, treating as Display class"); + intToBigEndian(ICC_Profile.icSigDisplayClass, profileData, ICC_Profile.icHdrDeviceClass); + return ICC_Profile.getInstance(profileData); + } + } + return profile; + } + + private static void intToBigEndian(int value, byte[] array, int index) + { + array[index] = (byte) (value >> 24); + array[index + 1] = (byte) (value >> 16); + array[index + 2] = (byte) (value >> 8); + array[index + 3] = (byte) (value); + } + + @Override public float[] toRGB(float[] value) throws IOException { @@ -199,12 +244,25 @@ public final class PDICCBased extends PDCIEBasedColorSpace } if (awtColorSpace != null) { + // PDFBOX-2142: clamp bad values // WARNING: toRGB is very slow when used with LUT-based ICC profiles - return awtColorSpace.toRGB(value); + return awtColorSpace.toRGB(clampColors(awtColorSpace, value)); } return alternateColorSpace.toRGB(value); } + private float[] clampColors(ICC_ColorSpace cs, float[] value) + { + float[] result = new float[value.length]; + for (int i = 0; i < value.length; ++i) + { + float minValue = cs.getMinValue(i); + float maxValue = cs.getMaxValue(i); + result[i] = value[i] < minValue ? minValue : (value[i] > maxValue ? maxValue : value[i]); + } + return result; + } + @Override public BufferedImage toRGBImage(WritableRaster raster) throws IOException { diff --git a/src/main/java/org/sejda/sambox/pdmodel/graphics/color/PDIndexed.java b/src/main/java/org/sejda/sambox/pdmodel/graphics/color/PDIndexed.java index b08e570cc0b6be366f9a1bd5daf442a4e5bd7f62..5d146040e8c486d0036341bb6b468efabb16133e 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/graphics/color/PDIndexed.java +++ b/src/main/java/org/sejda/sambox/pdmodel/graphics/color/PDIndexed.java @@ -31,6 +31,7 @@ import org.sejda.sambox.cos.COSNull; import org.sejda.sambox.cos.COSNumber; import org.sejda.sambox.cos.COSStream; import org.sejda.sambox.cos.COSString; +import org.sejda.sambox.pdmodel.PDResources; import org.sejda.sambox.pdmodel.common.PDStream; /** @@ -66,13 +67,27 @@ public final class PDIndexed extends PDSpecialColorSpace } /** - * Creates a new Indexed color space from the given PDF array. + * Creates a new indexed color space from the given PDF array. * @param indexedArray the array containing the indexed parameters + * @throws java.io.IOException */ public PDIndexed(COSArray indexedArray) throws IOException + { + this(indexedArray, null); + } + + /** + * Creates a new indexed color space from the given PDF array. + * @param indexedArray the array containing the indexed parameters + * @param resources the resources, can be null. Allows to use its cache for the colorspace. + * @throws java.io.IOException + */ + public PDIndexed(COSArray indexedArray, PDResources resources) throws IOException { array = indexedArray; - baseColorSpace = PDColorSpace.create(array.getObject(1)); + // don't call getObject(1), we want to pass a reference if possible + // to profit from caching (PDFBOX-4149) + baseColorSpace = PDColorSpace.create(array.get(1), resources); readColorTable(); initRgbColorTable(); } diff --git a/src/main/java/org/sejda/sambox/pdmodel/graphics/form/PDFormXObject.java b/src/main/java/org/sejda/sambox/pdmodel/graphics/form/PDFormXObject.java index 8e2cdd7b89f7623bca16a6d2343af7f01ecfc69b..578977ade7ef67c29fda9e263118344f3342a3bd 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/graphics/form/PDFormXObject.java +++ b/src/main/java/org/sejda/sambox/pdmodel/graphics/form/PDFormXObject.java @@ -222,13 +222,7 @@ public class PDFormXObject extends PDXObject implements PDContentStream @Override public Matrix getMatrix() { - COSArray array = (COSArray) getCOSObject().getDictionaryObject(COSName.MATRIX); - if (array != null) - { - return new Matrix(array); - } - // default value is the identity matrix - return new Matrix(); + return Matrix.createMatrix(getCOSObject().getDictionaryObject(COSName.MATRIX)); } /** diff --git a/src/main/java/org/sejda/sambox/pdmodel/graphics/form/PDTransparencyGroupAttributes.java b/src/main/java/org/sejda/sambox/pdmodel/graphics/form/PDTransparencyGroupAttributes.java index 3d72fcb746d59bcb7f8b354ae76fea3446fbc956..d4847edcc169acd4e86b45880b2167fd37b7556c 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/graphics/form/PDTransparencyGroupAttributes.java +++ b/src/main/java/org/sejda/sambox/pdmodel/graphics/form/PDTransparencyGroupAttributes.java @@ -18,6 +18,7 @@ package org.sejda.sambox.pdmodel.graphics.form; import java.io.IOException; +import org.sejda.sambox.cos.COSBase; import org.sejda.sambox.cos.COSDictionary; import org.sejda.sambox.cos.COSName; import org.sejda.sambox.pdmodel.common.PDDictionaryWrapper; @@ -56,7 +57,11 @@ public final class PDTransparencyGroupAttributes extends PDDictionaryWrapper { if (colorSpace == null && getCOSObject().containsKey(COSName.CS)) { - colorSpace = PDColorSpace.create(getCOSObject().getDictionaryObject(COSName.CS)); + COSBase dictionaryObject = getCOSObject().getDictionaryObject(COSName.CS); + if(dictionaryObject != null) + { + colorSpace = PDColorSpace.create(dictionaryObject); + } } return colorSpace; } diff --git a/src/main/java/org/sejda/sambox/pdmodel/graphics/image/LosslessFactory.java b/src/main/java/org/sejda/sambox/pdmodel/graphics/image/LosslessFactory.java index bf0cd97277bcf6776006ef57fa43c84a3792ce00..40402b9ad9a1deeb20fb52b46927b1bf29015742 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/graphics/image/LosslessFactory.java +++ b/src/main/java/org/sejda/sambox/pdmodel/graphics/image/LosslessFactory.java @@ -17,9 +17,7 @@ package org.sejda.sambox.pdmodel.graphics.image; import java.awt.Transparency; import java.awt.image.BufferedImage; -import java.awt.image.WritableRaster; import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; import java.io.IOException; import javax.imageio.stream.MemoryCacheImageOutputStream; @@ -48,191 +46,120 @@ public final class LosslessFactory /** * Creates a new lossless encoded Image XObject from a Buffered Image. * + * @param document the document where the image will be created * @param image the buffered image to embed * @return a new Image XObject * @throws IOException if something goes wrong */ public static PDImageXObject createFromImage(BufferedImage image) throws IOException { - int bpc; - PDDeviceColorSpace deviceColorSpace; - - int height = image.getHeight(); - int width = image.getWidth(); - int[] rgbLineBuffer = new int[width]; - byte[] imageData; - if ((image.getType() == BufferedImage.TYPE_BYTE_GRAY && image.getColorModel().getPixelSize() <= 8) || (image.getType() == BufferedImage.TYPE_BYTE_BINARY && image.getColorModel().getPixelSize() == 1)) { - // grayscale images need one color per sample - bpc = image.getColorModel().getPixelSize(); - deviceColorSpace = PDDeviceGray.INSTANCE; - - FastByteArrayOutputStream bos = new FastByteArrayOutputStream( - (width * bpc / 8) + (width * bpc % 8 != 0 ? 1 : 0) * height); - try (MemoryCacheImageOutputStream mcios = new MemoryCacheImageOutputStream(bos)) - { - - for (int y = 0; y < height; ++y) - { - for (int pixel : image.getRGB(0, y, width, 1, rgbLineBuffer, 0, width)) - { - mcios.writeBits(pixel & 0xFF, bpc); - } - - int bitOffset = mcios.getBitOffset(); - if (bitOffset != 0) - { - mcios.writeBits(0, 8 - bitOffset); - } - } - mcios.flush(); - } - imageData = bos.toByteArray(); + return createFromGrayImage(image); } - else - { - // RGB - bpc = 8; - deviceColorSpace = PDDeviceRGB.INSTANCE; - imageData = new byte[width * height * 3]; - int byteIdx = 0; + return createFromRGBImage(image); + } + // grayscale images need one color per sample + private static PDImageXObject createFromGrayImage(BufferedImage image) throws IOException + { + int height = image.getHeight(); + int width = image.getWidth(); + int[] rgbLineBuffer = new int[width]; + int bpc = image.getColorModel().getPixelSize(); + FastByteArrayOutputStream baos = new FastByteArrayOutputStream( + ((width * bpc / 8) + (width * bpc % 8 != 0 ? 1 : 0)) * height); + try (MemoryCacheImageOutputStream mcios = new MemoryCacheImageOutputStream(baos)) + { for (int y = 0; y < height; ++y) { for (int pixel : image.getRGB(0, y, width, 1, rgbLineBuffer, 0, width)) { - imageData[byteIdx++] = (byte) ((pixel >> 16) & 0xFF); - imageData[byteIdx++] = (byte) ((pixel >> 8) & 0xFF); - imageData[byteIdx++] = (byte) (pixel & 0xFF); + mcios.writeBits(pixel & 0xFF, bpc); } - } - } - PDImageXObject pdImage = prepareImageXObject(imageData, image.getWidth(), image.getHeight(), - bpc, deviceColorSpace); - - // alpha -> soft mask - PDImage xAlpha = createAlphaFromARGBImage(image); - if (xAlpha != null) - { - pdImage.getCOSObject().setItem(COSName.SMASK, xAlpha); + int bitOffset = mcios.getBitOffset(); + if (bitOffset != 0) + { + mcios.writeBits(0, 8 - bitOffset); + } + } + mcios.flush(); } - - return pdImage; + return prepareImageXObject(baos.toByteArray(), image.getWidth(), image.getHeight(), bpc, + PDDeviceGray.INSTANCE); } - /** - * Creates a grayscale Flate encoded PDImageXObject from the alpha channel of an image. - * - * @param image an ARGB image. - * - * @return the alpha channel of an image as a grayscale image. - * - * @throws IOException if something goes wrong - */ - private static PDImageXObject createAlphaFromARGBImage(BufferedImage image) throws IOException + private static PDImageXObject createFromRGBImage(BufferedImage image) throws IOException { - // this implementation makes the assumption that the raster uses - // SinglePixelPackedSampleModel, i.e. the values can be used 1:1 for - // the stream. - // Sadly the type of the databuffer is TYPE_INT and not TYPE_BYTE. - if (!image.getColorModel().hasAlpha()) + int height = image.getHeight(); + int width = image.getWidth(); + int[] rgbLineBuffer = new int[width]; + int bpc = 8; + PDDeviceColorSpace deviceColorSpace = PDDeviceRGB.INSTANCE; + byte[] imageData = new byte[width * height * 3]; + int byteIdx = 0; + int alphaByteIdx = 0; + int alphaBitPos = 7; + int transparency = image.getTransparency(); + int apbc = transparency == Transparency.BITMASK ? 1 : 8; + byte[] alphaImageData; + if (transparency != Transparency.OPAQUE) { - return null; + alphaImageData = new byte[((width * apbc / 8) + (width * apbc % 8 != 0 ? 1 : 0)) + * height]; } - - // extract the alpha information - WritableRaster alphaRaster = image.getAlphaRaster(); - if (alphaRaster == null) + else { - // happens sometimes (PDFBOX-2654) despite colormodel claiming to have alpha - return createAlphaFromARGBImage2(image); + alphaImageData = new byte[0]; } - - int[] pixels = alphaRaster.getPixels(0, 0, alphaRaster.getWidth(), alphaRaster.getHeight(), - (int[]) null); - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - int bpc; - if (image.getTransparency() == Transparency.BITMASK) + for (int y = 0; y < height; ++y) { - bpc = 1; - MemoryCacheImageOutputStream mcios = new MemoryCacheImageOutputStream(bos); - int width = alphaRaster.getWidth(); - int p = 0; - for (int pixel : pixels) + for (int pixel : image.getRGB(0, y, width, 1, rgbLineBuffer, 0, width)) { - mcios.writeBit(pixel); - ++p; - if (p % width == 0) + imageData[byteIdx++] = (byte) ((pixel >> 16) & 0xFF); + imageData[byteIdx++] = (byte) ((pixel >> 8) & 0xFF); + imageData[byteIdx++] = (byte) (pixel & 0xFF); + if (transparency != Transparency.OPAQUE) { - while (mcios.getBitOffset() != 0) + // we have the alpha right here, so no need to do it separately + // as done prior April 2018 + if (transparency == Transparency.BITMASK) { - mcios.writeBit(0); + // write a bit + alphaImageData[alphaByteIdx] |= ((pixel >> 24) & 1) << alphaBitPos; + if (--alphaBitPos < 0) + { + alphaBitPos = 7; + ++alphaByteIdx; + } + } + else + { + // write a byte + alphaImageData[alphaByteIdx++] = (byte) ((pixel >> 24) & 0xFF); } } } - mcios.flush(); - mcios.close(); - } - else - { - bpc = 8; - for (int pixel : pixels) - { - bos.write(pixel); - } - } - - PDImageXObject pdImage = prepareImageXObject(bos.toByteArray(), image.getWidth(), - image.getHeight(), bpc, PDDeviceGray.INSTANCE); - return pdImage; - } - - // create alpha image the hard way: get the alpha through getRGB() - private static PDImageXObject createAlphaFromARGBImage2(BufferedImage bi) throws IOException - { - ByteArrayOutputStream bos = new ByteArrayOutputStream(); - int bpc; - if (bi.getTransparency() == Transparency.BITMASK) - { - bpc = 1; - MemoryCacheImageOutputStream mcios = new MemoryCacheImageOutputStream(bos); - for (int y = 0, h = bi.getHeight(); y < h; ++y) + // skip boundary if needed + if (transparency == Transparency.BITMASK && alphaBitPos != 7) { - for (int x = 0, w = bi.getWidth(); x < w; ++x) - { - int alpha = bi.getRGB(x, y) >>> 24; - mcios.writeBit(alpha); - } - while (mcios.getBitOffset() != 0) - { - mcios.writeBit(0); - } + alphaBitPos = 7; + ++alphaByteIdx; } - mcios.flush(); - mcios.close(); } - else + PDImageXObject pdImage = prepareImageXObject(imageData, image.getWidth(), image.getHeight(), + bpc, deviceColorSpace); + if (transparency != Transparency.OPAQUE) { - bpc = 8; - for (int y = 0, h = bi.getHeight(); y < h; ++y) - { - for (int x = 0, w = bi.getWidth(); x < w; ++x) - { - int alpha = bi.getRGB(x, y) >>> 24; - bos.write(alpha); - } - } + PDImageXObject pdMask = prepareImageXObject(alphaImageData, image.getWidth(), + image.getHeight(), apbc, PDDeviceGray.INSTANCE); + pdImage.getCOSObject().setItem(COSName.SMASK, pdMask); } - - PDImageXObject pdImage = prepareImageXObject(bos.toByteArray(), bi.getWidth(), - bi.getHeight(), bpc, PDDeviceGray.INSTANCE); - return pdImage; } @@ -240,6 +167,7 @@ public final class LosslessFactory * Create a PDImageXObject while making a decision whether not to compress, use Flate filter only, or Flate and LZW * filters. * + * @param document The document. * @param byteArray array with data. * @param width the image width * @param height the image height diff --git a/src/main/java/org/sejda/sambox/pdmodel/graphics/image/PDImageXObject.java b/src/main/java/org/sejda/sambox/pdmodel/graphics/image/PDImageXObject.java index f60108394d44a7bc2d3e1c3b088d269487353c02..8d80b6b528ba6e1824ec8e0b4ac27757cf11976f 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/graphics/image/PDImageXObject.java +++ b/src/main/java/org/sejda/sambox/pdmodel/graphics/image/PDImageXObject.java @@ -27,7 +27,9 @@ import java.io.File; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; +import java.lang.ref.SoftReference; import java.nio.ByteBuffer; +import java.util.List; import javax.imageio.ImageIO; @@ -36,6 +38,7 @@ import org.sejda.sambox.cos.COSArray; import org.sejda.sambox.cos.COSBase; import org.sejda.sambox.cos.COSName; import org.sejda.sambox.cos.COSStream; +import org.sejda.sambox.filter.DecodeResult; import org.sejda.sambox.pdmodel.PDResources; import org.sejda.sambox.pdmodel.common.PDMetadata; import org.sejda.sambox.pdmodel.common.PDStream; @@ -44,6 +47,8 @@ import org.sejda.sambox.pdmodel.graphics.color.PDColorSpace; import org.sejda.sambox.pdmodel.graphics.color.PDDeviceGray; import org.sejda.sambox.util.filetypedetector.FileType; import org.sejda.sambox.util.filetypedetector.FileTypeDetector; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * An Image XObject. @@ -53,7 +58,10 @@ import org.sejda.sambox.util.filetypedetector.FileTypeDetector; */ public final class PDImageXObject extends PDXObject implements PDImage { - private BufferedImage cachedImage; + + private static final Logger LOG = LoggerFactory.getLogger(PDImageXObject.class); + + private SoftReference<BufferedImage> cachedImage; private PDColorSpace colorSpace; private PDResources resources; // current resource dictionary (has color spaces) @@ -74,7 +82,6 @@ public final class PDImageXObject extends PDXObject implements PDImage /** * Creates an Image XObject in the given document. * - * @param document the current document * @throws java.io.IOException if there is an error creating the XObject. */ public PDImageXObject() throws IOException @@ -129,9 +136,15 @@ public final class PDImageXObject extends PDXObject implements PDImage public PDImageXObject(PDStream stream, PDResources resources) throws IOException { super(stream, COSName.IMAGE); - stream.getCOSObject().addAll(stream.getCOSObject().getDecodeResult().getParameters()); this.resources = resources; - this.colorSpace = stream.getCOSObject().getDecodeResult().getJPXColorSpace(); + List<COSName> filters = stream.getFilters(); + if (filters != null && !filters.isEmpty() + && COSName.JPX_DECODE.equals(filters.get(filters.size() - 1))) + { + DecodeResult decodeResult = stream.getCOSObject().getDecodeResult(); + stream.getCOSObject().addAll(decodeResult.getParameters()); + this.colorSpace = decodeResult.getJPXColorSpace(); + } } public static PDImageXObject createFromFile(String imagePath) throws IOException @@ -151,11 +164,21 @@ public final class PDImageXObject extends PDXObject implements PDImage } if (fileType.equals(FileType.TIFF)) { - return CCITTFactory.createFromFile(file); + try + { + return CCITTFactory.createFromFile(file); + } + catch (IOException ex) + { + LOG.warn("Reading as TIFF failed, setting fileType to PNG", ex); + // Plan B: try reading with ImageIO + // common exception: + // First image in tiff is not CCITT T4 or T6 compressed + } } // last resort, let's see if ImageIO can read it BufferedImage image = ImageIO.read(file); - requireNotNullArg(image, "Image type not supported " + file.getName()); + requireNotNullArg(image, "Image type " + fileType + " not supported " + file.getName()); return LosslessFactory.createFromImage(image); } @@ -212,7 +235,11 @@ public final class PDImageXObject extends PDXObject implements PDImage { if (cachedImage != null) { - return cachedImage; + BufferedImage cached = cachedImage.get(); + if (cached != null) + { + return cached; + } } // get image as RGB @@ -234,7 +261,7 @@ public final class PDImageXObject extends PDXObject implements PDImage } } - cachedImage = image; + cachedImage = new SoftReference<>(image); return image; } @@ -340,9 +367,12 @@ public final class PDImageXObject extends PDXObject implements PDImage { BufferedImage image2 = new BufferedImage(width, height, BufferedImage.TYPE_INT_RGB); Graphics2D g = image2.createGraphics(); - g.setRenderingHint(RenderingHints.KEY_INTERPOLATION, - RenderingHints.VALUE_INTERPOLATION_BICUBIC); - g.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY); + if (getInterpolate()) + { + g.setRenderingHint(RenderingHints.KEY_INTERPOLATION, + RenderingHints.VALUE_INTERPOLATION_BICUBIC); + g.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY); + } g.drawImage(image, 0, 0, width, height, 0, 0, image.getWidth(), image.getHeight(), null); g.dispose(); return image2; diff --git a/src/main/java/org/sejda/sambox/pdmodel/graphics/image/PDInlineImage.java b/src/main/java/org/sejda/sambox/pdmodel/graphics/image/PDInlineImage.java index 7e2c848720293d4ae22d6cde2c3925fefcec6e07..a453507eec3fc36b29653f4fdd7604e1b43c8a8f 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/graphics/image/PDInlineImage.java +++ b/src/main/java/org/sejda/sambox/pdmodel/graphics/image/PDInlineImage.java @@ -16,7 +16,7 @@ */ package org.sejda.sambox.pdmodel.graphics.image; -import java.awt.Paint; +import java.awt.*; import java.awt.image.BufferedImage; import java.io.ByteArrayInputStream; import java.io.IOException; diff --git a/src/main/java/org/sejda/sambox/pdmodel/graphics/optionalcontent/PDOptionalContentProperties.java b/src/main/java/org/sejda/sambox/pdmodel/graphics/optionalcontent/PDOptionalContentProperties.java index 5aff63783dc9a9811800e022f5837271e5b9daa1..aeb996ea33b14d7c45a36ba2c6e9ef621cf6944e 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/graphics/optionalcontent/PDOptionalContentProperties.java +++ b/src/main/java/org/sejda/sambox/pdmodel/graphics/optionalcontent/PDOptionalContentProperties.java @@ -86,7 +86,12 @@ public class PDOptionalContentProperties implements COSObjectable { this.dict = new COSDictionary(); this.dict.setItem(COSName.OCGS, new COSArray()); - this.dict.setItem(COSName.D, new COSDictionary()); + COSDictionary d = new COSDictionary(); + + // Name optional but required for PDF/A-3 + d.setString(COSName.NAME, "Top"); + + this.dict.setItem(COSName.D, d); } /** @@ -118,12 +123,20 @@ public class PDOptionalContentProperties implements COSObjectable private COSDictionary getD() { - COSDictionary d = (COSDictionary)this.dict.getDictionaryObject(COSName.D); - if (d == null) + COSBase base = this.dict.getDictionaryObject(COSName.D); + if (base instanceof COSDictionary) { - d = new COSDictionary(); - this.dict.setItem(COSName.D, d); //D is required + return (COSDictionary) base; } + + COSDictionary d = new COSDictionary(); + + // Name optional but required for PDF/A-3 + d.setString(COSName.NAME, "Top"); + + // D is required + this.dict.setItem(COSName.D, d); + return d; } @@ -244,42 +257,56 @@ public class PDOptionalContentProperties implements COSObjectable * @return true if the group is enabled */ public boolean isGroupEnabled(String groupName) + { + return isGroupEnabled(getGroup(groupName)); + } + + /** + * Indicates whether an optional content group is enabled. + * @param group the group object + * @return true if the group is enabled + */ + public boolean isGroupEnabled(PDOptionalContentGroup group) { //TODO handle Optional Content Configuration Dictionaries, //i.e. OCProperties/Configs + PDOptionalContentProperties.BaseState baseState = getBaseState(); + boolean enabled = !baseState.equals(BaseState.OFF); + //TODO What to do with BaseState.Unchanged? + + if (group == null) + { + return enabled; + } + COSDictionary d = getD(); - COSArray on = (COSArray)d.getDictionaryObject(COSName.ON); - if (on != null) + COSBase base = d.getDictionaryObject(COSName.ON); + if (base instanceof COSArray) { - for (COSBase o : on) + for (COSBase o : (COSArray) base) { - COSDictionary group = toDictionary(o); - String name = group.getString(COSName.NAME); - if (name.equals(groupName)) + COSDictionary dictionary = toDictionary(o); + if (dictionary == group.getCOSObject()) { return true; } } } - COSArray off = (COSArray)d.getDictionaryObject(COSName.OFF); - if (off != null) + base = d.getDictionaryObject(COSName.OFF); + if (base instanceof COSArray) { - for (COSBase o : off) + for (COSBase o : (COSArray) base) { - COSDictionary group = toDictionary(o); - String name = group.getString(COSName.NAME); - if (name.equals(groupName)) + COSDictionary dictionary = toDictionary(o); + if (dictionary == group.getCOSObject()) { return false; } } } - BaseState baseState = getBaseState(); - boolean enabled = !baseState.equals(BaseState.OFF); - //TODO What to do with BaseState.Unchanged? return enabled; } @@ -296,28 +323,49 @@ public class PDOptionalContentProperties implements COSObjectable */ public boolean setGroupEnabled(String groupName, boolean enable) { + return setGroupEnabled(getGroup(groupName), enable); + } + + /** + * Enables or disables an optional content group. + * @param group the group object + * @param enable true to enable, false to disable + * @return true if the group already had an on or off setting, false otherwise + */ + public boolean setGroupEnabled(PDOptionalContentGroup group, boolean enable) + { + COSArray on; + COSArray off; + COSDictionary d = getD(); - COSArray on = (COSArray)d.getDictionaryObject(COSName.ON); - if (on == null) + COSBase base = d.getDictionaryObject(COSName.ON); + if (!(base instanceof COSArray)) { on = new COSArray(); d.setItem(COSName.ON, on); } - COSArray off = (COSArray)d.getDictionaryObject(COSName.OFF); - if (off == null) + else + { + on = (COSArray) base; + } + base = d.getDictionaryObject(COSName.OFF); + if (!(base instanceof COSArray)) { off = new COSArray(); d.setItem(COSName.OFF, off); } + else + { + off = (COSArray) base; + } boolean found = false; if (enable) { for (COSBase o : off) { - COSDictionary group = toDictionary(o); - String name = group.getString(COSName.NAME); - if (name.equals(groupName)) + COSDictionary groupDictionary = toDictionary(o); + if (groupDictionary == group.getCOSObject()) { //enable group off.remove(o); @@ -331,9 +379,8 @@ public class PDOptionalContentProperties implements COSObjectable { for (COSBase o : on) { - COSDictionary group = toDictionary(o); - String name = group.getString(COSName.NAME); - if (name.equals(groupName)) + COSDictionary groupDictionary = toDictionary(o); + if (groupDictionary == group.getCOSObject()) { //disable group on.remove(o); @@ -345,14 +392,13 @@ public class PDOptionalContentProperties implements COSObjectable } if (!found) { - PDOptionalContentGroup ocg = getGroup(groupName); if (enable) { - on.add(ocg.getCOSObject()); + on.add(group.getCOSObject()); } else { - off.add(ocg.getCOSObject()); + off.add(group.getCOSObject()); } } return found; diff --git a/src/main/java/org/sejda/sambox/pdmodel/graphics/pattern/PDAbstractPattern.java b/src/main/java/org/sejda/sambox/pdmodel/graphics/pattern/PDAbstractPattern.java index 49ec6ab1d0f5bf6c94ccac06b7bcb642c84a3a68..d7d04d829b491cd17990e88954a8645b23237afc 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/graphics/pattern/PDAbstractPattern.java +++ b/src/main/java/org/sejda/sambox/pdmodel/graphics/pattern/PDAbstractPattern.java @@ -129,16 +129,7 @@ public abstract class PDAbstractPattern implements COSObjectable */ public Matrix getMatrix() { - COSArray array = (COSArray)getCOSObject().getDictionaryObject(COSName.MATRIX); - if (array != null) - { - return new Matrix(array); - } - else - { - // default value is the identity matrix - return new Matrix(); - } + return Matrix.createMatrix(getCOSObject().getDictionaryObject(COSName.MATRIX)); } /** diff --git a/src/main/java/org/sejda/sambox/pdmodel/graphics/pattern/PDShadingPattern.java b/src/main/java/org/sejda/sambox/pdmodel/graphics/pattern/PDShadingPattern.java index c6ba8beb64f91a3b1b9687f92fb119b781ddc1d6..2a9995863ae8c39aeadb08ac0605a7f4d3422de9 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/graphics/pattern/PDShadingPattern.java +++ b/src/main/java/org/sejda/sambox/pdmodel/graphics/pattern/PDShadingPattern.java @@ -18,6 +18,7 @@ package org.sejda.sambox.pdmodel.graphics.pattern; import java.io.IOException; +import org.sejda.sambox.cos.COSBase; import org.sejda.sambox.cos.COSDictionary; import org.sejda.sambox.cos.COSName; import org.sejda.sambox.pdmodel.graphics.shading.PDShading; @@ -64,12 +65,11 @@ public class PDShadingPattern extends PDAbstractPattern { if (extendedGraphicsState == null) { - COSDictionary dictionary = (COSDictionary)getCOSObject() - .getDictionaryObject(COSName.EXT_G_STATE); + COSBase base = getCOSObject().getDictionaryObject(COSName.EXT_G_STATE); - if( dictionary != null ) + if(base instanceof COSDictionary) { - extendedGraphicsState = new PDExtendedGraphicsState( dictionary ); + extendedGraphicsState = new PDExtendedGraphicsState((COSDictionary) base); } } return extendedGraphicsState; @@ -94,10 +94,10 @@ public class PDShadingPattern extends PDAbstractPattern { if (shading == null) { - COSDictionary dictionary = (COSDictionary) getCOSObject().getDictionaryObject(COSName.SHADING); - if( dictionary != null ) + COSBase base = getCOSObject().getDictionaryObject(COSName.SHADING); + if(base instanceof COSDictionary) { - shading = PDShading.create(dictionary); + shading = PDShading.create((COSDictionary) base); } } return shading; diff --git a/src/main/java/org/sejda/sambox/pdmodel/graphics/shading/GouraudShadingContext.java b/src/main/java/org/sejda/sambox/pdmodel/graphics/shading/GouraudShadingContext.java index b1bd3e70bcfe4a98e5c61570f7c736fa3294441e..208281e0fdb316af77c332f085eedf58d0b3df51 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/graphics/shading/GouraudShadingContext.java +++ b/src/main/java/org/sejda/sambox/pdmodel/graphics/shading/GouraudShadingContext.java @@ -101,7 +101,7 @@ abstract class GouraudShadingContext extends TriangleBasedShadingContext return new Vertex(p, colorComponentTab); } - void setTriangleList(List<ShadedTriangle> triangleList) + final void setTriangleList(List<ShadedTriangle> triangleList) { this.triangleList = triangleList; } diff --git a/src/main/java/org/sejda/sambox/pdmodel/graphics/shading/PDShading.java b/src/main/java/org/sejda/sambox/pdmodel/graphics/shading/PDShading.java index 964f68a0206f9b1917fee79e9e1b30c86dad009e..ed2093afa1de51399b90c6412c247ed01f9690e2 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/graphics/shading/PDShading.java +++ b/src/main/java/org/sejda/sambox/pdmodel/graphics/shading/PDShading.java @@ -253,36 +253,36 @@ public abstract class PDShading implements COSObjectable /** * Create the correct PD Model shading based on the COS base shading. * - * @param resourceDictionary the COS shading dictionary + * @param shadingDictionary the COS shading dictionary * @return the newly created shading resources object * @throws IOException if we are unable to create the PDShading object */ - public static PDShading create(COSDictionary resourceDictionary) throws IOException + public static PDShading create(COSDictionary shadingDictionary) throws IOException { PDShading shading = null; - int shadingType = resourceDictionary.getInt(COSName.SHADING_TYPE, 0); + int shadingType = shadingDictionary.getInt(COSName.SHADING_TYPE, 0); switch (shadingType) { case SHADING_TYPE1: - shading = new PDShadingType1(resourceDictionary); + shading = new PDShadingType1(shadingDictionary); break; case SHADING_TYPE2: - shading = new PDShadingType2(resourceDictionary); + shading = new PDShadingType2(shadingDictionary); break; case SHADING_TYPE3: - shading = new PDShadingType3(resourceDictionary); + shading = new PDShadingType3(shadingDictionary); break; case SHADING_TYPE4: - shading = new PDShadingType4(resourceDictionary); + shading = new PDShadingType4(shadingDictionary); break; case SHADING_TYPE5: - shading = new PDShadingType5(resourceDictionary); + shading = new PDShadingType5(shadingDictionary); break; case SHADING_TYPE6: - shading = new PDShadingType6(resourceDictionary); + shading = new PDShadingType6(shadingDictionary); break; case SHADING_TYPE7: - shading = new PDShadingType7(resourceDictionary); + shading = new PDShadingType7(shadingDictionary); break; default: throw new IOException("Error: Unknown shading type " + shadingType); diff --git a/src/main/java/org/sejda/sambox/pdmodel/graphics/shading/PDShadingType1.java b/src/main/java/org/sejda/sambox/pdmodel/graphics/shading/PDShadingType1.java index 2cd9cec840dcf8803af4b7b32be6356aac0d619b..babdd4e8f883a89f4e7d97eeec10490669b7c40a 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/graphics/shading/PDShadingType1.java +++ b/src/main/java/org/sejda/sambox/pdmodel/graphics/shading/PDShadingType1.java @@ -55,16 +55,7 @@ public class PDShadingType1 extends PDShading */ public Matrix getMatrix() { - COSArray array = (COSArray) getCOSObject().getDictionaryObject(COSName.MATRIX); - if (array != null) - { - return new Matrix(array); - } - else - { - // identity matrix is the default - return new Matrix(); - } + return Matrix.createMatrix(getCOSObject().getDictionaryObject(COSName.MATRIX)); } /** diff --git a/src/main/java/org/sejda/sambox/pdmodel/graphics/shading/PatchMeshesShadingContext.java b/src/main/java/org/sejda/sambox/pdmodel/graphics/shading/PatchMeshesShadingContext.java index b83528b025c6dcba6c5015d45fc0bf1c0dfb4b8e..3e2a477cfe19bf839d5dcc2e22ab17a05d47bb4a 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/graphics/shading/PatchMeshesShadingContext.java +++ b/src/main/java/org/sejda/sambox/pdmodel/graphics/shading/PatchMeshesShadingContext.java @@ -24,10 +24,7 @@ import java.awt.geom.Point2D; import java.awt.image.ColorModel; import java.io.EOFException; import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import javax.imageio.stream.ImageInputStream; import javax.imageio.stream.MemoryCacheImageInputStream; @@ -89,9 +86,18 @@ abstract class PatchMeshesShadingContext extends TriangleBasedShadingContext Matrix matrix, int controlPoints) throws IOException { COSDictionary dict = shadingType.getCOSObject(); + if (!(dict instanceof COSStream)) + { + return Collections.emptyList(); + } int bitsPerFlag = shadingType.getBitsPerFlag(); PDRange rangeX = shadingType.getDecodeForParameter(0); PDRange rangeY = shadingType.getDecodeForParameter(1); + if (Float.compare(rangeX.getMin(), rangeX.getMax()) == 0 || + Float.compare(rangeY.getMin(), rangeY.getMax()) == 0) + { + return Collections.emptyList(); + } PDRange[] colRange = new PDRange[numberOfColorComponents]; for (int i = 0; i < numberOfColorComponents; ++i) { diff --git a/src/main/java/org/sejda/sambox/pdmodel/graphics/shading/Type4ShadingContext.java b/src/main/java/org/sejda/sambox/pdmodel/graphics/shading/Type4ShadingContext.java index 761dc09440514bdfe917d69e06d7f94eb7b37f04..7aabdc6d9c86be916a183eac342f27d3164a822d 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/graphics/shading/Type4ShadingContext.java +++ b/src/main/java/org/sejda/sambox/pdmodel/graphics/shading/Type4ShadingContext.java @@ -23,6 +23,7 @@ import java.awt.image.ColorModel; import java.io.EOFException; import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import javax.imageio.stream.ImageInputStream; @@ -71,8 +72,17 @@ class Type4ShadingContext extends GouraudShadingContext AffineTransform xform, Matrix matrix) throws IOException { COSDictionary dict = freeTriangleShadingType.getCOSObject(); + if (!(dict instanceof COSStream)) + { + return Collections.emptyList(); + } PDRange rangeX = freeTriangleShadingType.getDecodeForParameter(0); PDRange rangeY = freeTriangleShadingType.getDecodeForParameter(1); + if (Float.compare(rangeX.getMin(), rangeX.getMax()) == 0 || + Float.compare(rangeY.getMin(), rangeY.getMax()) == 0) + { + return Collections.emptyList(); + } PDRange[] colRange = new PDRange[numberOfColorComponents]; for (int i = 0; i < numberOfColorComponents; ++i) { diff --git a/src/main/java/org/sejda/sambox/pdmodel/graphics/shading/Type5ShadingContext.java b/src/main/java/org/sejda/sambox/pdmodel/graphics/shading/Type5ShadingContext.java index ce72140bd5b5b1329bb34313a402325faadcd523..cb2e0ff4287758acadce5d059e7876ed753db3a8 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/graphics/shading/Type5ShadingContext.java +++ b/src/main/java/org/sejda/sambox/pdmodel/graphics/shading/Type5ShadingContext.java @@ -23,6 +23,7 @@ import java.awt.image.ColorModel; import java.io.EOFException; import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import javax.imageio.stream.ImageInputStream; @@ -68,9 +69,18 @@ class Type5ShadingContext extends GouraudShadingContext private List<ShadedTriangle> collectTriangles(PDShadingType5 latticeTriangleShadingType, AffineTransform xform, Matrix matrix) throws IOException { - COSDictionary cosDictionary = latticeTriangleShadingType.getCOSObject(); + COSDictionary dict = latticeTriangleShadingType.getCOSObject(); + if (!(dict instanceof COSStream)) + { + return Collections.emptyList(); + } PDRange rangeX = latticeTriangleShadingType.getDecodeForParameter(0); PDRange rangeY = latticeTriangleShadingType.getDecodeForParameter(1); + if (Float.compare(rangeX.getMin(), rangeX.getMax()) == 0 || + Float.compare(rangeY.getMin(), rangeY.getMax()) == 0) + { + return Collections.emptyList(); + } int numPerRow = latticeTriangleShadingType.getVerticesPerRow(); PDRange[] colRange = new PDRange[numberOfColorComponents]; for (int i = 0; i < numberOfColorComponents; ++i) @@ -80,7 +90,7 @@ class Type5ShadingContext extends GouraudShadingContext List<Vertex> vlist = new ArrayList<Vertex>(); long maxSrcCoord = (long) Math.pow(2, bitsPerCoordinate) - 1; long maxSrcColor = (long) Math.pow(2, bitsPerColorComponent) - 1; - COSStream cosStream = (COSStream) cosDictionary; + COSStream cosStream = (COSStream) dict; try (ImageInputStream mciis = new MemoryCacheImageInputStream( cosStream.getUnfilteredStream())) diff --git a/src/main/java/org/sejda/sambox/pdmodel/graphics/state/PDExtendedGraphicsState.java b/src/main/java/org/sejda/sambox/pdmodel/graphics/state/PDExtendedGraphicsState.java index c057e5779657b2a4f942e39253b13f445e237ab2..25e67b3b16a17fb944286fe4ef0061fc8d56ec7b 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/graphics/state/PDExtendedGraphicsState.java +++ b/src/main/java/org/sejda/sambox/pdmodel/graphics/state/PDExtendedGraphicsState.java @@ -16,6 +16,8 @@ */ package org.sejda.sambox.pdmodel.graphics.state; +import static java.util.Objects.nonNull; + import java.io.IOException; import org.sejda.sambox.cos.COSArray; @@ -264,18 +266,17 @@ public class PDExtendedGraphicsState implements COSObjectable */ public PDLineDashPattern getLineDashPattern() { - PDLineDashPattern retval = null; - COSArray dp = (COSArray) dict.getDictionaryObject(COSName.D); - if (dp != null) + COSArray dp = dict.getDictionaryObject(COSName.D, COSArray.class); + if (nonNull(dp) && dp.size() == 2) { - COSArray array = new COSArray(); - dp.addAll(dp); - dp.remove(dp.size() - 1); - int phase = dp.getInt(dp.size() - 1); - - retval = new PDLineDashPattern(array, phase); + COSBase dashArray = dp.getObject(0); + COSBase phase = dp.getObject(1); + if (dashArray instanceof COSArray && phase instanceof COSNumber) + { + return new PDLineDashPattern((COSArray) dashArray, ((COSNumber) phase).intValue()); + } } - return retval; + return null; } /** diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/action/OpenMode.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/OpenMode.java new file mode 100644 index 0000000000000000000000000000000000000000..53afc42630b0b4bafeabf9c73b646a89b0bc0b97 --- /dev/null +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/OpenMode.java @@ -0,0 +1,40 @@ +/* + * Copyright 2018 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.sejda.sambox.pdmodel.interactive.action; + +/** + * This will specify whether to open the destination document in a new window. + * + * @author Tilman Hausherr + */ +public enum OpenMode +{ + /** + * The viewer application should behave in accordance with the current user preference. + */ + USER_PREFERENCE, + + /** + * Destination document will replace the current document in the same window. + */ + SAME_WINDOW, + + /** + * Open the destination document in a new window. + */ + NEW_WINDOW +} diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionEmbeddedGoTo.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionEmbeddedGoTo.java new file mode 100644 index 0000000000000000000000000000000000000000..bb12b8adfb30e0aa4d558bfff9a35fa4b13be1bd --- /dev/null +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionEmbeddedGoTo.java @@ -0,0 +1,191 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.sejda.sambox.pdmodel.interactive.action; + +import java.io.IOException; + +import org.sejda.sambox.cos.COSArray; +import org.sejda.sambox.cos.COSBase; +import org.sejda.sambox.cos.COSBoolean; +import org.sejda.sambox.cos.COSDictionary; +import org.sejda.sambox.cos.COSName; +import org.sejda.sambox.pdmodel.common.filespecification.PDFileSpecification; +import org.sejda.sambox.pdmodel.interactive.documentnavigation.destination.PDDestination; +import org.sejda.sambox.pdmodel.interactive.documentnavigation.destination.PDPageDestination; + +/** + * This represents a embedded go-to action that can be executed in a PDF document. + * + * @author Ben Litchfield + * @author Panagiotis Toumasis + * @author Tilman Hausherr + */ +public class PDActionEmbeddedGoTo extends PDAction +{ + /** + * This type of action this object represents. + */ + public static final String SUB_TYPE = "GoToE"; + + /** + * Default constructor. + */ + public PDActionEmbeddedGoTo() + { + setSubType(SUB_TYPE); + } + + /** + * Constructor. + * + * @param a The action dictionary. + */ + public PDActionEmbeddedGoTo(COSDictionary a) + { + super(a); + } + + /** + * This will get the destination to jump to. + * + * @return The D entry of the specific go-to action dictionary. + * + * @throws IOException If there is an error creating the destination. + */ + public PDDestination getDestination() throws IOException + { + return PDDestination.create(getCOSObject().getDictionaryObject(COSName.D)); + } + + /** + * This will set the destination to jump to. + * + * @param d The destination. + * + * @throws IllegalArgumentException if the destination is not a page dictionary object. + */ + public void setDestination(PDDestination d) + { + if (d instanceof PDPageDestination) + { + PDPageDestination pageDest = (PDPageDestination) d; + COSArray destArray = pageDest.getCOSObject(); + if (destArray.size() >= 1) + { + COSBase page = destArray.getObject(0); + if (!(page instanceof COSDictionary)) + { + throw new IllegalArgumentException("Destination of a GoToE action must be " + + "a page dictionary object"); + } + } + } + getCOSObject().setItem(COSName.D, d); + } + + /** + * This will get the file in which the destination is located. + * + * @return The F entry of the specific embedded go-to action dictionary. + * + * @throws IOException If there is an error creating the file spec. + */ + public PDFileSpecification getFile() throws IOException + { + return PDFileSpecification.createFS(getCOSObject().getDictionaryObject(COSName.F)); + } + + /** + * This will set the file in which the destination is located. + * + * @param fs The file specification. + */ + public void setFile(PDFileSpecification fs) + { + getCOSObject().setItem(COSName.F, fs); + } + + /** + * This will specify whether to open the destination document in a new window, in the same + * window, or behave in accordance with the current user preference. + * + * @return A flag specifying how to open the destination document. + */ + public OpenMode getOpenInNewWindow() + { + if (getCOSObject().getDictionaryObject(COSName.NEW_WINDOW) instanceof COSBoolean) + { + COSBoolean b = (COSBoolean) getCOSObject().getDictionaryObject(COSName.NEW_WINDOW); + return b.getValue() ? OpenMode.NEW_WINDOW : OpenMode.SAME_WINDOW; + } + return OpenMode.USER_PREFERENCE; + } + + /** + * This will specify whether to open the destination document in a new window. + * + * @param value The flag value. + */ + public void setOpenInNewWindow(OpenMode value) + { + if (null == value) + { + getCOSObject().removeItem(COSName.NEW_WINDOW); + return; + } + switch (value) + { + case USER_PREFERENCE: + getCOSObject().removeItem(COSName.NEW_WINDOW); + break; + case SAME_WINDOW: + getCOSObject().setBoolean(COSName.NEW_WINDOW, false); + break; + case NEW_WINDOW: + getCOSObject().setBoolean(COSName.NEW_WINDOW, true); + break; + default: + // shouldn't happen unless the enum type is changed + break; + } + } + + /** + * Get the target directory. + * + * @return the target directory or null if there is none. + */ + public PDTargetDirectory getTargetDirectory() + { + COSBase base = getCOSObject().getDictionaryObject(COSName.T); + if (base instanceof COSDictionary) + { + return new PDTargetDirectory((COSDictionary) base); + } + return null; + } + + /** + * Sets the target directory. + * + * @param targetDirectory the target directory. + */ + public void setTargetDirectory(PDTargetDirectory targetDirectory) + { + getCOSObject().setItem(COSName.T, targetDirectory); + } +} diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionFactory.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionFactory.java index f9fdc401651781515d8ae82b048eeac6fb0376a4..fc1e921a813b287cfa5a1c78fedce44bb4fdcdd8 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionFactory.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionFactory.java @@ -99,6 +99,10 @@ public final class PDActionFactory { return new PDActionThread(action); } + else if (PDActionEmbeddedGoTo.SUB_TYPE.equals(type)) + { + return new PDActionEmbeddedGoTo(action); + } } return null; } diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionGoTo.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionGoTo.java index d9f61cb1dec5d59bf51d0978210fdac98487ab86..b9633a35445cfb017bc3dcb669a91f26d35f520e 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionGoTo.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionGoTo.java @@ -43,7 +43,6 @@ public class PDActionGoTo extends PDAction */ public PDActionGoTo() { - super(); setSubType( SUB_TYPE ); } diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionImportData.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionImportData.java index a1e85dbb6d92e5c37c2a4ca7f2c483731eabed33..74307f942fe1dc850556864641dfde996019e179 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionImportData.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionImportData.java @@ -38,7 +38,6 @@ public class PDActionImportData extends PDAction */ public PDActionImportData() { - action = new COSDictionary(); setSubType(SUB_TYPE); } diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionJavaScript.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionJavaScript.java index 2b1e979ef3de2b001b53c7f6ad568833f8994c21..547946e310ce9276c694bd331a262cc3cef72f4b 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionJavaScript.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionJavaScript.java @@ -39,7 +39,6 @@ public class PDActionJavaScript extends PDAction */ public PDActionJavaScript() { - super(); setSubType(SUB_TYPE); } diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionLaunch.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionLaunch.java index fb8d522edb4e90ec4cdc2c0696d2aaeb54e27bc3..a8d3c4268d1eb779899141e160c95c18f269a83d 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionLaunch.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionLaunch.java @@ -16,6 +16,7 @@ */ package org.sejda.sambox.pdmodel.interactive.action; +import org.sejda.sambox.cos.COSBoolean; import org.sejda.sambox.cos.COSDictionary; import org.sejda.sambox.cos.COSName; import org.sejda.sambox.pdmodel.common.filespecification.FileSpecifications; @@ -204,26 +205,47 @@ public class PDActionLaunch extends PDAction } /** - * This will specify whether to open the destination document in a new window. - * If this flag is false, the destination document will replace the current - * document in the same window. If this entry is absent, the viewer application - * should behave in accordance with the current user preference. This entry is - * ignored if the file designated by the F entry is not a PDF document. + * This will specify whether to open the destination document in a new window, in the same + * window, or behave in accordance with the current user preference. * - * @return A flag specifying whether to open the destination document in a new window. + * @return A flag specifying how to open the destination document. */ - public boolean shouldOpenInNewWindow() + public OpenMode getOpenInNewWindow() { - return action.getBoolean( "NewWindow", true ); + if (getCOSObject().getDictionaryObject(COSName.NEW_WINDOW) instanceof COSBoolean) + { + COSBoolean b = (COSBoolean) getCOSObject().getDictionaryObject(COSName.NEW_WINDOW); + return b.getValue() ? OpenMode.NEW_WINDOW : OpenMode.SAME_WINDOW; + } + return OpenMode.USER_PREFERENCE; } /** - * This will specify the destination document to open in a new window. + * This will specify whether to open the destination document in a new window. * * @param value The flag value. */ - public void setOpenInNewWindow( boolean value ) + public void setOpenInNewWindow(OpenMode value) { - action.setBoolean( "NewWindow", value ); + if (null == value) + { + getCOSObject().removeItem(COSName.NEW_WINDOW); + return; + } + switch (value) + { + case USER_PREFERENCE: + getCOSObject().removeItem(COSName.NEW_WINDOW); + break; + case SAME_WINDOW: + getCOSObject().setBoolean(COSName.NEW_WINDOW, false); + break; + case NEW_WINDOW: + getCOSObject().setBoolean(COSName.NEW_WINDOW, true); + break; + default: + // shouldn't happen unless the enum type is changed + break; + } } } diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionMovie.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionMovie.java index 0aecf8931eff5fb97738fe54de20f615d119fd9b..c99243c34b51513877cfe23b25e72571308279b0 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionMovie.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionMovie.java @@ -36,7 +36,6 @@ public class PDActionMovie extends PDAction */ public PDActionMovie() { - action = new COSDictionary(); setSubType(SUB_TYPE); } diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionNamed.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionNamed.java index 7a1a6b0254686054cb19497de956a292a6391878..869a8282504a4380ac6dd7e52119d5824fb63676 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionNamed.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionNamed.java @@ -33,7 +33,6 @@ public class PDActionNamed extends PDAction */ public PDActionNamed() { - action = new COSDictionary(); setSubType(SUB_TYPE); } diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionRemoteGoTo.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionRemoteGoTo.java index 4ce02ef04958fdeafb2530d4deb52c7f5c357811..ba0ebb2b037ad25d77d7dc297a737323508937f6 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionRemoteGoTo.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionRemoteGoTo.java @@ -17,6 +17,7 @@ package org.sejda.sambox.pdmodel.interactive.action; import org.sejda.sambox.cos.COSBase; +import org.sejda.sambox.cos.COSBoolean; import org.sejda.sambox.cos.COSDictionary; import org.sejda.sambox.cos.COSName; import org.sejda.sambox.pdmodel.common.filespecification.FileSpecifications; @@ -40,7 +41,6 @@ public class PDActionRemoteGoTo extends PDAction */ public PDActionRemoteGoTo() { - action = new COSDictionary(); setSubType(SUB_TYPE); } @@ -130,24 +130,47 @@ public class PDActionRemoteGoTo extends PDAction } /** - * This will specify whether to open the destination document in a new window. If this flag is false, the - * destination document will replace the current document in the same window. If this entry is absent, the viewer - * application should behave in accordance with the current user preference. + * This will specify whether to open the destination document in a new window, in the same + * window, or behave in accordance with the current user preference. * - * @return A flag specifying whether to open the destination document in a new window. + * @return A flag specifying how to open the destination document. */ - public boolean shouldOpenInNewWindow() + public OpenMode getOpenInNewWindow() { - return action.getBoolean("NewWindow", true); + if (getCOSObject().getDictionaryObject(COSName.NEW_WINDOW) instanceof COSBoolean) + { + COSBoolean b = (COSBoolean) getCOSObject().getDictionaryObject(COSName.NEW_WINDOW); + return b.getValue() ? OpenMode.NEW_WINDOW : OpenMode.SAME_WINDOW; + } + return OpenMode.USER_PREFERENCE; } /** - * This will specify the destination document to open in a new window. + * This will specify whether to open the destination document in a new window. * * @param value The flag value. */ - public void setOpenInNewWindow(boolean value) + public void setOpenInNewWindow(OpenMode value) { - action.setBoolean("NewWindow", value); + if (null == value) + { + getCOSObject().removeItem(COSName.NEW_WINDOW); + return; + } + switch (value) + { + case USER_PREFERENCE: + getCOSObject().removeItem(COSName.NEW_WINDOW); + break; + case SAME_WINDOW: + getCOSObject().setBoolean(COSName.NEW_WINDOW, false); + break; + case NEW_WINDOW: + getCOSObject().setBoolean(COSName.NEW_WINDOW, true); + break; + default: + // shouldn't happen unless the enum type is changed + break; + } } } diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionResetForm.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionResetForm.java index a680e7ec50e4a89a787a66a7e1bf51307939085f..f96f93ff27ad20b2f7c35c6778f971d01acf0ae0 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionResetForm.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionResetForm.java @@ -38,7 +38,6 @@ public class PDActionResetForm extends PDAction */ public PDActionResetForm() { - action = new COSDictionary(); setSubType(SUB_TYPE); } diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionSound.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionSound.java index ea05789361f02e1146d62a58a4ee4810c9186a1a..f0d23458b9ab4015b675f8ed70d3bb785b4a1971 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionSound.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionSound.java @@ -38,7 +38,6 @@ public class PDActionSound extends PDAction */ public PDActionSound() { - action = new COSDictionary(); setSubType(SUB_TYPE); } diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionURI.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionURI.java index e28a3b60daae9cb95d421538eeb39e93df32dd07..2a2ec952268266a5c280ce7276f3bf767b82e52d 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionURI.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDActionURI.java @@ -42,7 +42,6 @@ public class PDActionURI extends PDAction */ public PDActionURI() { - action = new COSDictionary(); setSubType(SUB_TYPE); } diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDTargetDirectory.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDTargetDirectory.java new file mode 100644 index 0000000000000000000000000000000000000000..5755b08039802e915f90a297a006729818e8f52c --- /dev/null +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/action/PDTargetDirectory.java @@ -0,0 +1,276 @@ +/* + * Copyright 2018 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.sejda.sambox.pdmodel.interactive.action; + +import org.sejda.sambox.cos.*; +import org.sejda.sambox.pdmodel.interactive.documentnavigation.destination.PDNamedDestination; + +/** + * A target dictionary specifying path information to the target document. Each target dictionary + * specifies one element in the full path to the target and may have nested target dictionaries + * specifying additional elements. + * + * @author Tilman Hausherr + */ +public class PDTargetDirectory implements COSObjectable +{ + private final COSDictionary dict; + + /** + * Default constructor, creates target directory. + */ + public PDTargetDirectory() + { + dict = new COSDictionary(); + } + + /** + * Create a target directory from an existing dictionary. + * + * @param dictionary The existing graphics state. + */ + public PDTargetDirectory(COSDictionary dictionary) + { + dict = dictionary; + } + + /** + * This will get the underlying dictionary that this class acts on. + * + * @return The underlying dictionary for this class. + */ + @Override + public COSDictionary getCOSObject() + { + return dict; + } + + /** + * Get the relationship between the current document and the target (which may be an + * intermediate target). + * + * @return the relationship as a name. Valid values are P (the target is the parent of the + * current document) and C (the target is a child of the current document). Invalid values or + * null are also returned. + */ + public COSName getRelationship() + { + COSBase base = dict.getItem(COSName.R); + if (base instanceof COSName) + { + return (COSName) base; + } + return null; + } + + /** + * Set the relationship between the current document and the target (which may be an + * intermediate target). + * + * @param relationship Valid values are P (the target is the parent of the current document) and + * C (the target is a child of the current document). + * + * throws IllegalArgumentException if the parameter is not P or C. + */ + public void setRelationship(COSName relationship) + { + if (!COSName.P.equals(relationship) && !COSName.C.equals(relationship)) + { + throw new IllegalArgumentException("The only valid are P or C, not " + relationship.getName()); + } + dict.setItem(COSName.R, relationship); + } + + /** + * Get the name of the file as found in the EmbeddedFiles name tree. This is only to be used if + * the target is a child of the current document. + * + * @return a filename or null if there is none. + */ + public String getFilename() + { + return dict.getString(COSName.N); + } + + /** + * Sets the name of the file as found in the EmbeddedFiles name tree. This is only to be used if + * the target is a child of the current document. + * + * @param filename a filename or null if the entry is to be deleted. + */ + public void setFilename(String filename) + { + dict.setString(COSName.N, filename); + } + + /** + * Get the target directory. If this entry is absent, the current document is the target file + * containing the destination. + * + * @return the target directory or null if the current document is the target file containing + * the destination. + */ + public PDTargetDirectory getTargetDirectory() + { + COSBase base = dict.getDictionaryObject(COSName.T); + if (base instanceof COSDictionary) + { + return new PDTargetDirectory((COSDictionary) base); + } + return null; + } + + /** + * Sets the target directory. + * + * @param targetDirectory the target directory or null if the current document is the target + * file containing the destination. + */ + public void setTargetDirectory(PDTargetDirectory targetDirectory) + { + dict.setItem(COSName.T, targetDirectory); + } + + /** + * If the value in the /P entry is an integer, this will get the page number (zero-based) in the + * current document containing the file attachment annotation. + * + * @return the zero based page number or -1 if the /P entry value is missing or not a number. + */ + public int getPageNumber() + { + COSBase base = dict.getDictionaryObject(COSName.P); + if (base instanceof COSInteger) + { + return ((COSInteger) base).intValue(); + } + return -1; + } + + /** + * Set the page number (zero-based) in the current document containing the file attachment + * annotation. + * + * @param pageNumber the zero based page number. If this is < 0 then the entry is removed. + */ + public void setPageNumber(int pageNumber) + { + if (pageNumber < 0) + { + dict.removeItem(COSName.P); + } + else + { + dict.setInt(COSName.P, pageNumber); + } + } + + /** + * If the value in the /P entry is a string, this will get a named destination in the current + * document that provides the page number of the file attachment annotation. + * + * @return a named destination or null if the /P entry value is missing or not a string. + */ + public PDNamedDestination getNamedDestination() + { + COSBase base = dict.getDictionaryObject(COSName.P); + if (base instanceof COSString) + { + return new PDNamedDestination((COSString) base); + } + return null; + } + + /** + * This will set a named destination in the current document that provides the page number of + * the file attachment annotation. + * + * @param dest a named destination or null if the entry is to be removed. + */ + public void setNamedDestination(PDNamedDestination dest) + { + if (dest == null) + { + dict.removeItem(COSName.P); + } + else + { + dict.setItem(COSName.P, dest); + } + } + + /** + * If the value in the /A entry is an integer, this will get the index (zero-based) of the + * annotation in the /Annots array of the page specified by the /P entry. + * + * @return the zero based page number or -1 if the /P entry value is missing or not a number. + */ + public int getAnnotationIndex() + { + COSBase base = dict.getDictionaryObject(COSName.A); + if (base instanceof COSInteger) + { + return ((COSInteger) base).intValue(); + } + return -1; + } + + /** + * This will set the index (zero-based) of the annotation in the /Annots array of the page + * specified by the /P entry. + * + * @param index the zero based index. If this is < 0 then the entry is removed. + */ + public void setAnnotationIndex(int index) + { + if (index < 0) + { + dict.removeItem(COSName.A); + } + else + { + dict.setInt(COSName.A, index); + } + } + + /** + * If the value in the /A entry is a string, this will get the value of the /NM entry in the + * annotation dictionary. + * + * @return the /NM value of an annotation dictionary or null if the /A entry value is missing or + * not a string. + */ + public String getAnnotationName() + { + COSBase base = dict.getDictionaryObject(COSName.A); + if (base instanceof COSString) + { + return ((COSString) base).getString(); + } + return null; + } + + /** + * This will get the value of the /NM entry in the annotation dictionary. + * + * @param name the /NM value of an annotation dictionary or null if the entry is to be removed. + */ + public void setAnnotationName(String name) + { + dict.setString(COSName.A, name); + } +} diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/AnnotationFilter.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/AnnotationFilter.java new file mode 100644 index 0000000000000000000000000000000000000000..76b4484ce59e4e28388f4b9fa6305cf9b17d3c77 --- /dev/null +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/AnnotationFilter.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.sejda.sambox.pdmodel.interactive.annotation; + +/** + * Simple interface allowing the use of an annotation filter visitor. + * + * @author <a href="mailto:maxime.veron.pro@gmail.com">Maxime Veron</a> + * + */ +public interface AnnotationFilter +{ + boolean accept(PDAnnotation annotation); +} + diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotation.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotation.java index 81c36279ea2bd611ae12d368ee2dfa0591c8d976..bea8020663227d542890440c847f4a8a04ec5acb 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotation.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotation.java @@ -159,10 +159,6 @@ public abstract class PDAnnotation extends PDDictionaryWrapper // see 12.5.6.10 Text Markup Annotations return new PDAnnotationTextMarkup(annotDic); } - else if (PDAnnotationLink.SUB_TYPE.equals(subtype)) - { - return new PDAnnotationLink(annotDic); - } else if (COSName.WIDGET.getName().equals(subtype)) { return new PDAnnotationWidget(annotDic); diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationFileAttachment.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationFileAttachment.java index fcf11d666f7c23bbb8e00c114f48082e9d51822e..741ce918f742d93bd3fac8ae3ce0502cf496f198 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationFileAttachment.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationFileAttachment.java @@ -55,13 +55,13 @@ public class PDAnnotationFileAttachment extends PDAnnotationMarkup */ public PDAnnotationFileAttachment() { - getCOSObject().setItem(COSName.SUBTYPE, COSName.getPDFName(SUB_TYPE)); + getCOSObject().setName(COSName.SUBTYPE, SUB_TYPE); } /** * Creates a Link annotation from a COSDictionary, expected to be a correct object definition. * - * @param field the PDF objet to represent as a field. + * @param field the PDF object to represent as a field. */ public PDAnnotationFileAttachment(COSDictionary field) { @@ -95,16 +95,16 @@ public class PDAnnotationFileAttachment extends PDAnnotationMarkup */ public String getAttachmentName() { - return getCOSObject().getNameAsString("Name", ATTACHMENT_NAME_PUSH_PIN); + return getCOSObject().getNameAsString(COSName.NAME, ATTACHMENT_NAME_PUSH_PIN); } /** - * Set the name used to draw the attachement icon. See the ATTACHMENT_NAME_XXX constants. + * Set the name used to draw the attachment icon. See the ATTACHMENT_NAME_XXX constants. * * @param name The name of the visual icon to draw. */ - public void setAttachementName(String name) + public void setAttachmentName(String name) { - getCOSObject().setName("Name", name); + getCOSObject().setName(COSName.NAME, name); } } diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationLine.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationLine.java index 9964edec730134007b3d220cc177534480b0f6ff..1e08ee613612ab8a3e971100776f598486bc5deb 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationLine.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationLine.java @@ -111,7 +111,7 @@ public class PDAnnotationLine extends PDAnnotationMarkup */ public PDAnnotationLine() { - getCOSObject().setItem(COSName.SUBTYPE, COSName.getPDFName(SUB_TYPE)); + getCOSObject().setName(COSName.SUBTYPE, SUB_TYPE); // Dictionary value L is mandatory, fill in with arbitary value setLine(new float[] { 0, 0, 0, 0 }); } @@ -306,7 +306,7 @@ public class PDAnnotationLine extends PDAnnotationMarkup */ public float getLeaderLineLength() { - return this.getCOSObject().getFloat(COSName.LL); + return this.getCOSObject().getFloat(COSName.LL, 0); } /** @@ -326,7 +326,7 @@ public class PDAnnotationLine extends PDAnnotationMarkup */ public float getLeaderLineExtensionLength() { - return this.getCOSObject().getFloat(COSName.LLE); + return this.getCOSObject().getFloat(COSName.LLE, 0); } /** @@ -346,7 +346,7 @@ public class PDAnnotationLine extends PDAnnotationMarkup */ public float getLeaderLineOffsetLength() { - return this.getCOSObject().getFloat(COSName.LLO); + return this.getCOSObject().getFloat(COSName.LLO, 0); } /** @@ -366,7 +366,7 @@ public class PDAnnotationLine extends PDAnnotationMarkup */ public String getCaptionPositioning() { - return this.getCOSObject().getString(COSName.CP); + return this.getCOSObject().getNameAsString(COSName.CP); } /** @@ -376,7 +376,7 @@ public class PDAnnotationLine extends PDAnnotationMarkup */ public void setCaptionPositioning(String captionPositioning) { - this.getCOSObject().setString(COSName.CP, captionPositioning); + this.getCOSObject().setName(COSName.CP, captionPositioning); } /** diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationLink.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationLink.java index 648b660aec387357eab00e1dc4d0eddb95a64497..aadbc7e51ba566febdfd3aecb14b78a0ab6586b8 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationLink.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationLink.java @@ -65,8 +65,7 @@ public class PDAnnotationLink extends PDAnnotation */ public PDAnnotationLink() { - super(); - getCOSObject().setItem(COSName.SUBTYPE, COSName.getPDFName(SUB_TYPE)); + getCOSObject().setName(COSName.SUBTYPE, SUB_TYPE); } /** diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationPopup.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationPopup.java index f79ab748222d9cf05ee92c7e7f31c2b55eed2253..a81184cc1b917bcfa4e9457024ebec9daa27b496 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationPopup.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationPopup.java @@ -36,7 +36,7 @@ public class PDAnnotationPopup extends PDAnnotation */ public PDAnnotationPopup() { - getCOSObject().setItem(COSName.SUBTYPE, COSName.getPDFName(SUB_TYPE)); + getCOSObject().setName(COSName.SUBTYPE, SUB_TYPE); } /** diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationRubberStamp.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationRubberStamp.java index 90e175ce61efb3a3477054b28fc1d3065c007280..63bb3ac59f7d672b30070046456e36778b98a0b5 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationRubberStamp.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationRubberStamp.java @@ -98,8 +98,7 @@ public class PDAnnotationRubberStamp extends PDAnnotationMarkup */ public PDAnnotationRubberStamp() { - super(); - getCOSObject().setItem(COSName.SUBTYPE, COSName.getPDFName(SUB_TYPE)); + getCOSObject().setName(COSName.SUBTYPE, SUB_TYPE); } /** diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationSquareCircle.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationSquareCircle.java index 9b125cbf80b7416e6dc465a8821bd56d8365d799..64b5a9e1b21785ad09df15e7729cb16d10b6d510 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationSquareCircle.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationSquareCircle.java @@ -37,7 +37,7 @@ public class PDAnnotationSquareCircle extends PDAnnotationMarkup */ public static final String SUB_TYPE_SQUARE = "Square"; /** - * Constant for an Eliptical type of annotation. + * Constant for an elliptical type of annotation. */ public static final String SUB_TYPE_CIRCLE = "Circle"; @@ -54,7 +54,7 @@ public class PDAnnotationSquareCircle extends PDAnnotationMarkup /** * Creates a Line annotation from a COSDictionary, expected to be a correct object definition. * - * @param field the PDF objet to represent as a field. + * @param field the PDF object to represent as a field. */ public PDAnnotationSquareCircle(COSDictionary field) { diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationText.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationText.java index e8d12930c464b880411390720f44b8911ff84e01..1a8cf5708b3135230ce535c96c94eb5a4435c1de 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationText.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAnnotationText.java @@ -76,7 +76,7 @@ public class PDAnnotationText extends PDAnnotationMarkup */ public PDAnnotationText() { - getCOSObject().setItem(COSName.SUBTYPE, COSName.getPDFName(SUB_TYPE)); + getCOSObject().setName(COSName.SUBTYPE, SUB_TYPE); } /** diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAppearanceCharacteristicsDictionary.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAppearanceCharacteristicsDictionary.java index d718638eafd78015e134a4bcc9a16d3d2ceca79a..9cde2794ad06fe0b06ff23bd0efc2b6f2f51e55f 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAppearanceCharacteristicsDictionary.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/annotation/PDAppearanceCharacteristicsDictionary.java @@ -108,7 +108,7 @@ public class PDAppearanceCharacteristicsDictionary extends PDDictionaryWrapper */ public String getNormalCaption() { - return this.getCOSObject().getString("CA"); + return this.getCOSObject().getString(COSName.CA); } /** @@ -118,7 +118,7 @@ public class PDAppearanceCharacteristicsDictionary extends PDDictionaryWrapper */ public void setNormalCaption(String caption) { - this.getCOSObject().setString("CA", caption); + this.getCOSObject().setString(COSName.CA, caption); } /** @@ -138,7 +138,7 @@ public class PDAppearanceCharacteristicsDictionary extends PDDictionaryWrapper */ public void setRolloverCaption(String caption) { - this.getCOSObject().setString("RC", caption); + this.getCOSObject().setString(COSName.RC, caption); } /** @@ -148,7 +148,7 @@ public class PDAppearanceCharacteristicsDictionary extends PDDictionaryWrapper */ public String getAlternateCaption() { - return this.getCOSObject().getString("AC"); + return this.getCOSObject().getString(COSName.AC); } /** @@ -158,7 +158,7 @@ public class PDAppearanceCharacteristicsDictionary extends PDDictionaryWrapper */ public void setAlternateCaption(String caption) { - this.getCOSObject().setString("AC", caption); + this.getCOSObject().setString(COSName.AC, caption); } /** @@ -168,7 +168,7 @@ public class PDAppearanceCharacteristicsDictionary extends PDDictionaryWrapper */ public PDFormXObject getNormalIcon() { - COSStream i = this.getCOSObject().getDictionaryObject("I", COSStream.class); + COSStream i = this.getCOSObject().getDictionaryObject(COSName.I, COSStream.class); if (nonNull(i)) { return new PDFormXObject(i); @@ -183,7 +183,7 @@ public class PDAppearanceCharacteristicsDictionary extends PDDictionaryWrapper */ public PDFormXObject getRolloverIcon() { - COSStream i = this.getCOSObject().getDictionaryObject("RI", COSStream.class); + COSStream i = this.getCOSObject().getDictionaryObject(COSName.RI, COSStream.class); if (nonNull(i)) { return new PDFormXObject(i); @@ -198,7 +198,7 @@ public class PDAppearanceCharacteristicsDictionary extends PDDictionaryWrapper */ public PDFormXObject getAlternateIcon() { - COSStream i = this.getCOSObject().getDictionaryObject("IX", COSStream.class); + COSStream i = this.getCOSObject().getDictionaryObject(COSName.IX, COSStream.class); if (nonNull(i)) { return new PDFormXObject(i); diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/documentnavigation/outline/PDOutlineItem.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/documentnavigation/outline/PDOutlineItem.java index 7ceeea8b197421e287168dacd60706779e6dcf2e..ae927424832d143bedfe7b8f415c86eab7a82ec2 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/documentnavigation/outline/PDOutlineItem.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/documentnavigation/outline/PDOutlineItem.java @@ -240,8 +240,8 @@ public final class PDOutlineItem extends PDOutlineNode PDPageDestination pageDestination = null; if (dest instanceof PDNamedDestination) { - pageDestination = doc.getDocumentCatalog().findNamedDestinationPage( - (PDNamedDestination) dest); + pageDestination = doc.getDocumentCatalog() + .findNamedDestinationPage((PDNamedDestination) dest); if (pageDestination == null) { return null; @@ -277,8 +277,8 @@ public final class PDOutlineItem extends PDOutlineNode */ public PDAction getAction() { - return PDActionFactory.createAction((COSDictionary) getCOSObject().getDictionaryObject( - COSName.A)); + return PDActionFactory + .createAction(getCOSObject().getDictionaryObject(COSName.A, COSDictionary.class)); } /** diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/form/AppearanceGeneratorHelper.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/form/AppearanceGeneratorHelper.java index afd26743b03b59e2fdacbc2119072afb6d7dc73f..e7a31998c07198bc10183c4065a8438f3ac9bba3 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/form/AppearanceGeneratorHelper.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/form/AppearanceGeneratorHelper.java @@ -18,13 +18,17 @@ package org.sejda.sambox.pdmodel.interactive.form; import static java.util.Arrays.asList; import static java.util.Objects.nonNull; +import static java.util.Optional.ofNullable; import static org.sejda.io.CountingWritableByteChannel.from; +import static org.sejda.util.RequireUtils.requireNotNullArg; import java.awt.geom.AffineTransform; import java.awt.geom.Point2D; import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; import org.sejda.sambox.contentstream.operator.Operator; import org.sejda.sambox.cos.COSName; @@ -84,6 +88,8 @@ class AppearanceGeneratorHelper */ private static final float DEFAULT_FONT_SIZE = 12; + private static final int MINIMUM_LINES_TO_FIT_IN_A_MULTILINE_FIELD = 5; + /** * The default padding applied by Acrobat to the fields bbox. */ @@ -123,6 +129,7 @@ class AppearanceGeneratorHelper && widget.getNormalAppearanceStream().getResources() != null) { PDResources widgetResources = widget.getNormalAppearanceStream().getResources(); + Map<COSName, PDFont> missingFonts = new HashMap<>(); for (COSName fontResourceName : widgetResources.getFontNames()) { try @@ -131,7 +138,7 @@ class AppearanceGeneratorHelper { LOG.debug("Adding font resource " + fontResourceName + " from widget to AcroForm"); - acroFormResources.put(fontResourceName, + missingFonts.put(fontResourceName, widgetResources.getFont(fontResourceName)); } } @@ -140,6 +147,12 @@ class AppearanceGeneratorHelper LOG.warn("Unable to match field level font with AcroForm font"); } } + + // add all missing font resources from widget to AcroForm + for (COSName key : missingFonts.keySet()) + { + acroFormResources.put(key, missingFonts.get(key)); + } } } } @@ -253,7 +266,15 @@ class AppearanceGeneratorHelper { COSString da = (COSString) widget.getCOSObject().getDictionaryObject(COSName.DA); PDResources dr = field.getAcroForm().getDefaultResources(); - return new PDDefaultAppearanceString(da, dr); + try + { + return new PDDefaultAppearanceString(da, dr); + } + catch (IOException ex) + { + LOG.warn("Failed to process default appearance string for widget {}, will use fallback default appearance", widget); + return new PDDefaultAppearanceString(); + } } private int resolveRotation(PDAnnotationWidget widget) @@ -415,24 +436,20 @@ class AppearanceGeneratorHelper contents.clip(); // get the font - // field's defined appearance font has priority // callers might have determined that the default font does not support rendering the field's value // so the font was substituted to another one, which has better unicode support // see PDVariableText.setAppearanceOverrideFont() - PDFont font = field.getAppearanceFont(); + PDFont font = ofNullable(field.getAppearanceFont()) + .orElseGet(() -> defaultAppearance.getFont()); - // fallback to default appearance - if (font == null) - { - font = defaultAppearance.getFont(); - } + requireNotNullArg(font, "font is null, check whether /DA entry is incomplete or incorrect"); - // calculate the fontSize (because 0 = autosize) float fontSize = defaultAppearance.getFontSize(); if (fontSize == 0) { + // calculate the fontSize (because 0 = autosize) fontSize = calculateFontSize(font, contentRect); } @@ -459,7 +476,7 @@ class AppearanceGeneratorHelper if (field instanceof PDTextField && ((PDTextField) field).isMultiline()) { - y = contentRect.getUpperRightY() - fontBoundingBoxAtSize; + y = contentRect.getUpperRightY() - calculateLineHeight(font, fontScaleY); } else { @@ -493,7 +510,7 @@ class AppearanceGeneratorHelper // chars if (shallComb()) { - insertGeneratedCombAppearance(contents, appearanceStream, font, fontSize); + insertGeneratedCombAppearance(contents, bbox, font, fontSize); } else if (field instanceof PDListBox) { @@ -508,7 +525,7 @@ class AppearanceGeneratorHelper appearanceStyle.setFontSize(fontSize); // Adobe Acrobat uses the font's bounding box for the leading between the lines - appearanceStyle.setLeading(font.getBoundingBox().getHeight() * fontScaleY); + appearanceStyle.setLeading(calculateLineHeight(font, fontScaleY)); PlainTextFormatter formatter = new PlainTextFormatter.Builder(contents) .style(appearanceStyle).text(textContent).width(contentRect.getWidth()) @@ -574,13 +591,13 @@ class AppearanceGeneratorHelper * Generate the appearance for comb fields. * * @param contents the content stream to write to - * @param appearanceStream the appearance stream used + * @param bbox the bbox used * @param font the font to be used * @param fontSize the font size to be used * @throws IOException */ - private void insertGeneratedCombAppearance(PDPageContentStream contents, - PDAppearanceStream appearanceStream, PDFont font, float fontSize) throws IOException + private void insertGeneratedCombAppearance(PDPageContentStream contents, PDRectangle bbox, + PDFont font, float fontSize) throws IOException { // TODO: Currently the quadding is not taken into account @@ -589,12 +606,12 @@ class AppearanceGeneratorHelper int maxLen = ((PDTextField) field).getMaxLen(); int numChars = Math.min(value.length(), maxLen); - PDRectangle paddingEdge = applyPadding(appearanceStream.getBBox(), 1); + PDRectangle paddingEdge = applyPadding(bbox, 1); - float combWidth = appearanceStream.getBBox().getWidth() / maxLen; + float combWidth = bbox.getWidth() / maxLen; float ascentAtFontSize = font.getFontDescriptor().getAscent() / FONTSCALE * fontSize; float baselineOffset = paddingEdge.getLowerLeftY() - + (appearanceStream.getBBox().getHeight() - ascentAtFontSize) / 2; + + (bbox.getHeight() - ascentAtFontSize) / 2; float prevCharWidth = 0f; @@ -717,44 +734,77 @@ class AppearanceGeneratorHelper } } + private float calculateLineHeight(PDFont font, float fontScaleY) throws IOException + { + float fontBoundingBoxAtSize = font.getBoundingBox().getHeight() * fontScaleY; + float fontCapAtSize = font.getFontDescriptor().getCapHeight() * fontScaleY; + float fontDescentAtSize = font.getFontDescriptor().getDescent() * fontScaleY; + + float lineHeight = fontCapAtSize - fontDescentAtSize; + if (lineHeight < 0) + { + lineHeight = fontBoundingBoxAtSize; + } + + return lineHeight; + } + /** * My "not so great" method for calculating the fontsize. It does not work superb, but it handles ok. * * @return the calculated font-size * @throws IOException If there is an error getting the font information. */ - private float calculateFontSize(PDFont font, PDRectangle contentRect) throws IOException + float calculateFontSize(PDFont font, PDRectangle contentRect) throws IOException { - float fontSize = defaultAppearance.getFontSize(); + float yScalingFactor = FONTSCALE * font.getFontMatrix().getScaleY(); + float xScalingFactor = FONTSCALE * font.getFontMatrix().getScaleX(); - // zero is special, it means the text is auto-sized - if (fontSize == 0) + if (isMultiLine()) { - if (isMultiLine()) - { - // Acrobat defaults to 12 for multiline text with size 0 - return DEFAULT_FONT_SIZE; - } - float yScalingFactor = FONTSCALE * font.getFontMatrix().getScaleY(); - float xScalingFactor = FONTSCALE * font.getFontMatrix().getScaleX(); + // Acrobat defaults to 12 for multiline text with size 0 + // PDFBOX decided to just return that and finish with it + // return DEFAULT_FONT_SIZE; + + // SAMBOX specifics below + // We calculate a font size that fits at least 5 lines + // We detect faux multiline fields (text fields flagged as multiline which have a small height to just fit + // one line) - // fit width - float width = font.getStringWidth(value) * font.getFontMatrix().getScaleX(); - float widthBasedFontSize = contentRect.getWidth() / width * xScalingFactor; + float lineHeight = calculateLineHeight(font, font.getFontMatrix().getScaleY()); + float scaledContentHeight = contentRect.getHeight() * yScalingFactor; - // fit height - float height = (font.getFontDescriptor().getCapHeight() - + -font.getFontDescriptor().getDescent()) * font.getFontMatrix().getScaleY(); - if (height <= 0) + boolean looksLikeFauxMultiline = calculateLineHeight(font, DEFAULT_FONT_SIZE / FONTSCALE) > scaledContentHeight; + boolean userTypedMultipleLines = new PlainText(value).getParagraphs().size() > 1; + + if (looksLikeFauxMultiline && !userTypedMultipleLines) + { + // faux multiline detected + // because 1 line written with the default font size would not fit the height + // just continue to the non multiline part of the algorithm + + LOG.warn("Faux multiline field found: {}", field.getFullyQualifiedName()); + } + else { - height = font.getBoundingBox().getHeight() * font.getFontMatrix().getScaleY(); + // calculate a font size which fits at least x lines + float fontSize = scaledContentHeight + / (MINIMUM_LINES_TO_FIT_IN_A_MULTILINE_FIELD * lineHeight); + // don't return a font size larger than the default + return Math.min(fontSize, DEFAULT_FONT_SIZE); } + } - float heightBasedFontSize = contentRect.getHeight() / height * yScalingFactor; + // fit width + float width = font.getStringWidth(value) * font.getFontMatrix().getScaleX(); + float widthBasedFontSize = contentRect.getWidth() / width * xScalingFactor; - return Math.min(heightBasedFontSize, widthBasedFontSize); - } - return fontSize; + // fit height + float height = calculateLineHeight(font, font.getFontMatrix().getScaleY()); + + float heightBasedFontSize = contentRect.getHeight() / height * yScalingFactor; + + return Math.min(heightBasedFontSize, widthBasedFontSize); } /** diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/form/FieldUtils.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/form/FieldUtils.java index 48de2a3037f3e5fc107e98258b1074409a88056e..831ee71fa2570082cd8cc456ae8edb624db74323 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/form/FieldUtils.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/form/FieldUtils.java @@ -173,23 +173,24 @@ public final class FieldUtils } else if (items instanceof COSArray) { - COSArray array = (COSArray) items; - - List<String> result = new ArrayList<>(); - int numItems = ((COSArray) items).size(); - for (int i = 0; i < numItems; i++) + List<String> entryList = new ArrayList<>(); + for (COSBase entry : (COSArray) items) { - COSBase item = array.get(i); - if(item instanceof COSArray) + if (entry instanceof COSString) + { + entryList.add(((COSString) entry).getString()); + } + else if (entry instanceof COSArray) { - COSArray pair = (COSArray) array.get(i); - COSString displayValue = (COSString) pair.get(pairIdx); - result.add(displayValue.getString()); - } else if(item instanceof COSString) { - result.add(((COSString) item).getString()); + COSArray cosArray = (COSArray) entry; + if (cosArray.size() >= pairIdx + 1 + && cosArray.get(pairIdx) instanceof COSString) + { + entryList.add(((COSString) cosArray.get(pairIdx)).getString()); + } } } - return result; + return entryList; } return Collections.emptyList(); } diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/form/PDAcroForm.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/form/PDAcroForm.java index 0e96bed02f327b433a0ae13e9f87777c462acf37..f6fa7c5cd7cc4c46935724566115ada84cf3f423 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/form/PDAcroForm.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/form/PDAcroForm.java @@ -16,14 +16,19 @@ */ package org.sejda.sambox.pdmodel.interactive.form; +import static java.util.Objects.isNull; import static java.util.Objects.nonNull; import static java.util.Optional.ofNullable; +import static java.util.function.Function.identity; +import static java.util.stream.Collectors.toMap; import java.io.IOException; import java.util.ArrayList; -import java.util.Collections; import java.util.Iterator; import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; import org.sejda.sambox.cos.COSArray; import org.sejda.sambox.cos.COSArrayList; @@ -174,6 +179,12 @@ public final class PDAcroForm extends PDDictionaryWrapper */ public void flatten(List<PDField> fields, boolean refreshAppearances) throws IOException { + // Nothing to flatten if there are no fields provided + if (fields.isEmpty()) + { + return; + } + // for dynamic XFA forms there is no flatten as this would mean to do a rendering // from the XFA content into a static PDF. if (xfaIsDynamic()) @@ -194,22 +205,24 @@ public final class PDAcroForm extends PDDictionaryWrapper // the content stream to write to PDPageContentStream contentStream; + Map<COSDictionary, PDAnnotationWidget> toFlatten = widgets(fields); // preserve all non widget annotations for (PDPage page : document.getPages()) { isContentStreamWrapped = false; - List<PDAnnotation> annotations = new ArrayList<PDAnnotation>(); + List<PDAnnotation> annotations = new ArrayList<>(); for (PDAnnotation annotation : page.getAnnotations()) { - if (!(annotation instanceof PDAnnotationWidget)) + PDAnnotationWidget widget = toFlatten.get(annotation.getCOSObject()); + if (isNull(widget)) { annotations.add(annotation); } else if (!annotation.isInvisible() && !annotation.isHidden() - && annotation.getNormalAppearanceStream() != null) + && nonNull(annotation.getNormalAppearanceStream())) { if (!isContentStreamWrapped) { @@ -276,8 +289,7 @@ public final class PDAcroForm extends PDDictionaryWrapper page.setAnnotations(annotations); } - // remove the fields - setFields(Collections.<PDField> emptyList()); + removeFields(fields); // remove XFA for hybrid forms getCOSObject().removeItem(COSName.XFA); @@ -332,20 +344,19 @@ public final class PDAcroForm extends PDDictionaryWrapper */ public List<PDField> getFields() { - List<PDField> pdFields = new ArrayList<>(); - COSArray fields = getCOSObject().getDictionaryObject(COSName.FIELDS, COSArray.class); - if (nonNull(fields)) + return fieldsFromArray(getCOSObject().getDictionaryObject(COSName.FIELDS, COSArray.class)); + } + + private List<PDField> fieldsFromArray(COSArray array) + { + if (nonNull(array) && array.size() > 0) { - for (COSBase field : fields) - { - if (nonNull(field) && field.getCOSObject() instanceof COSDictionary) - { - pdFields.add(PDField.fromDictionary(this, (COSDictionary) field.getCOSObject(), - null)); - } - } + return array.stream().filter(Objects::nonNull).map(COSBase::getCOSObject) + .filter(d -> d instanceof COSDictionary) + .map(d -> PDField.fromDictionary(this, (COSDictionary) d, null)) + .collect(Collectors.toList()); } - return pdFields; + return new ArrayList<>(); } /** @@ -447,6 +458,16 @@ public final class PDAcroForm extends PDDictionaryWrapper getCOSObject().setString(COSName.DA, daValue); } + public List<PDField> getCalculationOrder() + { + return fieldsFromArray(getCOSObject().getDictionaryObject(COSName.CO, COSArray.class)); + } + + public void setCalculationOrder(COSArray co) + { + getCOSObject().setItem(COSName.CO, co); + } + /** * True if the viewing application should construct the appearances of all field widgets. The default value is * false. @@ -601,7 +622,7 @@ public final class PDAcroForm extends PDDictionaryWrapper */ private boolean resolveNeedsTranslation(PDAppearanceStream appearanceStream) { - boolean needsTranslation = false; + boolean needsTranslation = true; PDResources resources = appearanceStream.getResources(); if (resources != null && resources.getXObjectNames().iterator().hasNext()) @@ -621,9 +642,9 @@ public final class PDAcroForm extends PDDictionaryWrapper PDRectangle bbox = ((PDFormXObject) xObject).getBBox(); float llX = bbox.getLowerLeftX(); float llY = bbox.getLowerLeftY(); - if (llX == 0 && llY == 0) + if (Float.compare(llX, 0) != 0 && Float.compare(llY, 0) != 0) { - needsTranslation = true; + needsTranslation = false; } } } @@ -651,4 +672,34 @@ public final class PDAcroForm extends PDDictionaryWrapper PDResources resources = appearanceStream.getResources(); return resources != null && resources.getXObjectNames().iterator().hasNext(); } + + private Map<COSDictionary, PDAnnotationWidget> widgets(List<PDField> fields) + { + return fields.stream().flatMap(f -> f.getWidgets().stream()) + .collect(toMap(w -> w.getCOSObject(), identity())); + + } + + private void removeFields(List<PDField> fields) + { + for (PDField current : fields) + { + if (current.isTerminal()) + { + if (nonNull(current.getParent())) + { + current.getParent().removeChild(current); + } + else + { + // it's a root field + removeField(current); + } + } + else + { + LOG.warn("Unable to remove non terminal field {}", current.getFullyQualifiedName()); + } + } + } } diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/form/PDDefaultAppearanceString.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/form/PDDefaultAppearanceString.java index 26464b6cb25b6b4373fcb9e0a38531134e2846fc..7068b5586e08c3500bdfa9164f91cd8521b74d5e 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/form/PDDefaultAppearanceString.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/form/PDDefaultAppearanceString.java @@ -92,6 +92,10 @@ class PDDefaultAppearanceString processAppearanceStringOperators(defaultAppearance.getBytes()); } + PDDefaultAppearanceString() throws IOException { + this(null, null); + } + /** * Processes the operators of the given content stream. * @@ -248,7 +252,7 @@ class PDDefaultAppearanceString /** * Returns the font. */ - PDFont getFont() throws IOException + PDFont getFont() { return font; } @@ -309,7 +313,11 @@ class PDDefaultAppearanceString { fontSize = zeroFontSize; } - contents.setFont(getFont(), fontSize); + + if(getFont() != null) + { + contents.setFont(getFont(), fontSize); + } if (getFontColor() != null) { diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/form/PDNonTerminalField.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/form/PDNonTerminalField.java index 6643a6067a7ac5d49fcdb7f09c2037848fc559f2..63d6a6eb29bcf0a51d9591ae958001c36a46ce19 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/form/PDNonTerminalField.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/form/PDNonTerminalField.java @@ -30,6 +30,8 @@ import org.sejda.sambox.cos.COSDictionary; import org.sejda.sambox.cos.COSInteger; import org.sejda.sambox.cos.COSName; import org.sejda.sambox.pdmodel.interactive.annotation.PDAnnotationWidget; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * A non terminal field in an interactive form. @@ -41,6 +43,9 @@ import org.sejda.sambox.pdmodel.interactive.annotation.PDAnnotationWidget; */ public class PDNonTerminalField extends PDField { + + private static final Logger LOG = LoggerFactory.getLogger(PDNonTerminalField.class); + /** * Constructor. * @@ -89,6 +94,11 @@ public class PDNonTerminalField extends PDField { if (nonNull(kid) && kid.getCOSObject() instanceof COSDictionary) { + if (kid.getCOSObject() == this.getCOSObject()) + { + LOG.warn("Child field is same object as parent"); + continue; + } children.add(PDField.fromDictionary(getAcroForm(), (COSDictionary) kid.getCOSObject(), this)); } diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/form/PDTerminalField.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/form/PDTerminalField.java index 380474218b0fa975c5563190b97f50af9e4cac83..b5e48f182cbb227c1e15f6a9045821a5ddc45fc4 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/form/PDTerminalField.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/form/PDTerminalField.java @@ -16,19 +16,20 @@ */ package org.sejda.sambox.pdmodel.interactive.form; +import static java.util.Objects.isNull; import static java.util.Objects.nonNull; import java.io.IOException; -import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; import java.util.List; +import java.util.stream.Collectors; import org.sejda.sambox.cos.COSArray; import org.sejda.sambox.cos.COSArrayList; -import org.sejda.sambox.cos.COSBase; import org.sejda.sambox.cos.COSDictionary; import org.sejda.sambox.cos.COSInteger; import org.sejda.sambox.cos.COSName; -import org.sejda.sambox.cos.COSNull; import org.sejda.sambox.pdmodel.interactive.action.PDFormFieldAdditionalActions; import org.sejda.sambox.pdmodel.interactive.annotation.PDAnnotationWidget; @@ -106,25 +107,21 @@ public abstract class PDTerminalField extends PDField @Override public List<PDAnnotationWidget> getWidgets() { - List<PDAnnotationWidget> widgets = new ArrayList<>(); - COSArray kids = (COSArray) getCOSObject().getDictionaryObject(COSName.KIDS); - if (kids == null) + COSArray kids = getCOSObject().getDictionaryObject(COSName.KIDS, COSArray.class); + if (isNull(kids)) { // the field itself is a widget - widgets.add(new PDAnnotationWidget(getCOSObject())); + return Arrays.asList(new PDAnnotationWidget(getCOSObject())); } - else if (kids.size() > 0) + if (kids.size() > 0) { - // there are multiple widgets - for (COSBase kid : kids) - { - if (nonNull(kid) && !COSNull.NULL.equals(kid.getCOSObject())) - { - widgets.add(new PDAnnotationWidget((COSDictionary) kid.getCOSObject())); - } - } + return kids.stream().filter(k -> nonNull(k)).map(k -> k.getCOSObject()) + .filter(k -> k instanceof COSDictionary) + .map(k -> new PDAnnotationWidget((COSDictionary) k)) + .collect(Collectors.toList()); + } - return widgets; + return Collections.emptyList(); } /** diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/form/PDVariableText.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/form/PDVariableText.java index 3027cfc431dab32873953dcb7f4553968003eaa8..03e93fb67553c53bbee6f1212eb2c4bb1311a8f4 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/form/PDVariableText.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/form/PDVariableText.java @@ -91,9 +91,16 @@ public abstract class PDVariableText extends PDTerminalField */ PDDefaultAppearanceString getDefaultAppearanceString() throws IOException { - COSString da = (COSString) getInheritableAttribute(COSName.DA); - PDResources dr = getAcroForm().getDefaultResources(); - return new PDDefaultAppearanceString(da, dr); + try + { + COSString da = (COSString) getInheritableAttribute(COSName.DA); + PDResources dr = getAcroForm().getDefaultResources(); + return new PDDefaultAppearanceString(da, dr); + } + catch (IOException ex) + { + return new PDDefaultAppearanceString(); + } } /** diff --git a/src/main/java/org/sejda/sambox/pdmodel/interactive/measurement/PDViewportDictionary.java b/src/main/java/org/sejda/sambox/pdmodel/interactive/measurement/PDViewportDictionary.java index 0515a8520bfea63ba27b6cc39e00daf5a0ae03d0..6b89b67c6aae8053ae0ebea14d10e889fec3d71f 100644 --- a/src/main/java/org/sejda/sambox/pdmodel/interactive/measurement/PDViewportDictionary.java +++ b/src/main/java/org/sejda/sambox/pdmodel/interactive/measurement/PDViewportDictionary.java @@ -16,10 +16,7 @@ */ package org.sejda.sambox.pdmodel.interactive.measurement; -import org.sejda.sambox.cos.COSArray; -import org.sejda.sambox.cos.COSDictionary; -import org.sejda.sambox.cos.COSName; -import org.sejda.sambox.cos.COSObjectable; +import org.sejda.sambox.cos.*; import org.sejda.sambox.pdmodel.common.PDRectangle; /** @@ -34,7 +31,7 @@ public class PDViewportDictionary implements COSObjectable */ public static final String TYPE = "Viewport"; - private COSDictionary viewportDictionary; + private final COSDictionary viewportDictionary; /** * Constructor. @@ -83,10 +80,10 @@ public class PDViewportDictionary implements COSObjectable */ public PDRectangle getBBox() { - COSArray bbox = (COSArray)this.getCOSObject().getDictionaryObject("BBox"); - if (bbox != null) + COSBase bbox = this.getCOSObject().getDictionaryObject(COSName.BBOX); + if (bbox instanceof COSArray) { - return new PDRectangle(bbox); + return new PDRectangle((COSArray) bbox); } return null; } @@ -98,7 +95,7 @@ public class PDViewportDictionary implements COSObjectable */ public void setBBox(PDRectangle rectangle) { - this.getCOSObject().setItem("BBox", rectangle); + this.getCOSObject().setItem(COSName.BBOX, rectangle); } /** @@ -128,10 +125,10 @@ public class PDViewportDictionary implements COSObjectable */ public PDMeasureDictionary getMeasure() { - COSDictionary measure = (COSDictionary)this.getCOSObject().getDictionaryObject("Measure"); - if (measure != null) + COSBase base = this.getCOSObject().getDictionaryObject(COSName.MEASURE); + if (base instanceof COSDictionary) { - return new PDMeasureDictionary(measure); + return new PDMeasureDictionary((COSDictionary) base); } return null; } @@ -143,7 +140,7 @@ public class PDViewportDictionary implements COSObjectable */ public void setMeasure(PDMeasureDictionary measure) { - this.getCOSObject().setItem("Measure", measure); + this.getCOSObject().setItem(COSName.MEASURE, measure); } } diff --git a/src/main/java/org/sejda/sambox/rendering/ImageType.java b/src/main/java/org/sejda/sambox/rendering/ImageType.java index af9ca2c8587e4b577ee8d14bc8131f1969060c58..bfdd65c7212e88c65751a118c1e84e52bf0e6617 100644 --- a/src/main/java/org/sejda/sambox/rendering/ImageType.java +++ b/src/main/java/org/sejda/sambox/rendering/ImageType.java @@ -27,7 +27,7 @@ public enum ImageType BINARY { @Override - int toBufferedImageType() + public int toBufferedImageType() { return BufferedImage.TYPE_BYTE_BINARY; } @@ -37,7 +37,7 @@ public enum ImageType GRAY { @Override - int toBufferedImageType() + public int toBufferedImageType() { return BufferedImage.TYPE_BYTE_GRAY; } @@ -47,7 +47,7 @@ public enum ImageType RGB { @Override - int toBufferedImageType() + public int toBufferedImageType() { return BufferedImage.TYPE_INT_RGB; } @@ -57,11 +57,11 @@ public enum ImageType ARGB { @Override - int toBufferedImageType() + public int toBufferedImageType() { return BufferedImage.TYPE_INT_ARGB; } }; - abstract int toBufferedImageType(); + public abstract int toBufferedImageType(); } diff --git a/src/main/java/org/sejda/sambox/rendering/PDFRenderer.java b/src/main/java/org/sejda/sambox/rendering/PDFRenderer.java index ed69f9d0d0454a2791435f4846a56699881b243d..ea13ba5ced67a374f3b2df82fd59186ccd555c19 100644 --- a/src/main/java/org/sejda/sambox/rendering/PDFRenderer.java +++ b/src/main/java/org/sejda/sambox/rendering/PDFRenderer.java @@ -21,9 +21,15 @@ import java.awt.Graphics2D; import java.awt.image.BufferedImage; import java.io.IOException; +import org.sejda.sambox.cos.COSName; import org.sejda.sambox.pdmodel.PDDocument; import org.sejda.sambox.pdmodel.PDPage; +import org.sejda.sambox.pdmodel.PDResources; import org.sejda.sambox.pdmodel.common.PDRectangle; +import org.sejda.sambox.pdmodel.graphics.blend.BlendMode; +import org.sejda.sambox.pdmodel.graphics.state.PDExtendedGraphicsState; +import org.sejda.sambox.pdmodel.interactive.annotation.AnnotationFilter; +import org.sejda.sambox.pdmodel.interactive.annotation.PDAnnotation; /** * Renders a PDF document to an AWT BufferedImage. This class may be overridden in order to perform custom rendering. @@ -35,6 +41,18 @@ public class PDFRenderer protected final PDDocument document; // TODO keep rendering state such as caches here + /** + * Default annotations filter, returns all annotations + */ + private AnnotationFilter annotationFilter = new AnnotationFilter() + { + @Override + public boolean accept(PDAnnotation annotation) + { + return true; + } + }; + /** * Creates a new PDFRenderer. * @@ -45,6 +63,29 @@ public class PDFRenderer this.document = document; } + /** + * Return the AnnotationFilter. + * + * @return the AnnotationFilter + */ + public AnnotationFilter getAnnotationsFilter() + { + return annotationFilter; + } + + /** + * Set the AnnotationFilter. + * + * <p> + * Allows to only render annotation accepted by the filter. + * + * @param annotationsFilter the AnnotationFilter + */ + public void setAnnotationsFilter(AnnotationFilter annotationsFilter) + { + this.annotationFilter = annotationsFilter; + } + /** * Returns the given page as an RGB image at 72 DPI * @@ -84,10 +125,12 @@ public class PDFRenderer } /** - * @param page the zero-based index of the page to be converted + * Returns the given page as an RGB image at the given DPI. + * + * @param pageIndex the zero-based index of the page to be converted * @param dpi the DPI (dots per inch) to render at * @param imageType the type of image to return - * @return the page rendered as a {@link BufferedImage} + * @return the rendered page image * @throws IOException if the PDF cannot be read */ public BufferedImage renderImageWithDPI(int pageIndex, float dpi, ImageType imageType) @@ -97,40 +140,16 @@ public class PDFRenderer } /** - * @param page the zero-based index of the page to be converted - * @param dpi the DPI (dots per inch) to render at - * @param bufferedImageType the type of image to return - * @return the page rendered as a {@link BufferedImage} - * @throws IOException if the PDF cannot be read - */ - public BufferedImage renderImageWithDPI(int page, float dpi, int bufferedImageType) - throws IOException - { - return renderImage(page, dpi / 72f, bufferedImageType); - } - - /** + * Returns the given page as an RGB or ARGB image at the given scale. + * * @param pageIndex the zero-based index of the page to be converted * @param scale the scaling factor, where 1 = 72 DPI - * @param bufferedImageType the type of image to return - * @return the page rendered as a {@link BufferedImage} + * @param imageType the type of image to return + * @return the rendered page image * @throws IOException if the PDF cannot be read */ public BufferedImage renderImage(int pageIndex, float scale, ImageType imageType) throws IOException - { - return renderImage(pageIndex, scale, imageType.toBufferedImageType()); - } - - /** - * @param pageIndex the zero-based index of the page to be converted - * @param scale the scaling factor, where 1 = 72 DPI - * @param bufferedImageType the type of image to return - * @return the page rendered as a {@link BufferedImage} - * @throws IOException if the PDF cannot be read - */ - public BufferedImage renderImage(int pageIndex, float scale, int bufferedImageType) - throws IOException { PDPage page = document.getPage(pageIndex); @@ -141,20 +160,30 @@ public class PDFRenderer int heightPx = Math.round(heightPt * scale); int rotationAngle = page.getRotation(); + int bimType = imageType.toBufferedImageType(); + if (imageType != ImageType.ARGB && hasBlendMode(page)) + { + // PDFBOX-4095: if the PDF has blending on the top level, draw on transparent background + // Inpired from PDF.js: if a PDF page uses any blend modes other than Normal, + // PDF.js renders everything on a fully transparent RGBA canvas. + // Finally when the page has been rendered, PDF.js draws the RGBA canvas on a white canvas. + bimType = BufferedImage.TYPE_INT_ARGB; + } + // swap width and height BufferedImage image; if (rotationAngle == 90 || rotationAngle == 270) { - image = new BufferedImage(heightPx, widthPx, bufferedImageType); + image = new BufferedImage(heightPx, widthPx, bimType); } else { - image = new BufferedImage(widthPx, heightPx, bufferedImageType); + image = new BufferedImage(widthPx, heightPx, bimType); } - // use a transparent background if the imageType supports alpha + // use a transparent background if the image type supports alpha Graphics2D g = image.createGraphics(); - if (bufferedImageType == BufferedImage.TYPE_INT_ARGB) + if (image.getType() == BufferedImage.TYPE_INT_ARGB) { g.setBackground(new Color(0, 0, 0, 0)); } @@ -164,7 +193,7 @@ public class PDFRenderer } g.clearRect(0, 0, image.getWidth(), image.getHeight()); - transform(g, page, scale); + transform(g, page, scale, scale); // the end-user may provide a custom PageDrawer PageDrawerParameters parameters = new PageDrawerParameters(this, page); @@ -173,6 +202,19 @@ public class PDFRenderer g.dispose(); + if (image.getType() != imageType.toBufferedImageType()) + { + // PDFBOX-4095: draw temporary transparent image on white background + BufferedImage newImage = new BufferedImage(image.getWidth(), image.getHeight(), + imageType.toBufferedImageType()); + Graphics2D dstGraphics = newImage.createGraphics(); + dstGraphics.setBackground(Color.WHITE); + dstGraphics.clearRect(0, 0, image.getWidth(), image.getHeight()); + dstGraphics.drawImage(image, 0, 0, null); + dstGraphics.dispose(); + image = newImage; + } + return image; } @@ -198,11 +240,27 @@ public class PDFRenderer */ public void renderPageToGraphics(int pageIndex, Graphics2D graphics, float scale) throws IOException + { + renderPageToGraphics(pageIndex, graphics, scale, scale); + } + + /** + * Renders a given page to an AWT Graphics2D instance. + * + * @param pageIndex the zero-based index of the page to be converted + * @param graphics the Graphics2D on which to draw the page + * @param scaleX the scale to draw the page at for the x-axis + * @param scaleY the scale to draw the page at for the y-axis + * @throws IOException if the PDF cannot be read + */ + public void renderPageToGraphics(int pageIndex, Graphics2D graphics, float scaleX, float scaleY) + throws IOException + { PDPage page = document.getPage(pageIndex); // TODO need width/wight calculations? should these be in PageDrawer? - transform(graphics, page, scale); + transform(graphics, page, scaleX, scaleY); PDRectangle cropBox = page.getCropBox(); graphics.clearRect(0, 0, (int) cropBox.getWidth(), (int) cropBox.getHeight()); @@ -213,10 +271,10 @@ public class PDFRenderer drawer.drawPage(graphics, cropBox); } - /// scale rotate translate - private void transform(Graphics2D graphics, PDPage page, float scale) + // scale rotate translate + private void transform(Graphics2D graphics, PDPage page, float scaleX, float scaleY) { - graphics.scale(scale, scale); + graphics.scale(scaleX, scaleY); // TODO should we be passing the scale to PageDrawer rather than messing with Graphics? int rotationAngle = page.getRotation(); @@ -251,6 +309,34 @@ public class PDFRenderer */ protected PageDrawer createPageDrawer(PageDrawerParameters parameters) throws IOException { - return new PageDrawer(parameters); + PageDrawer pageDrawer = new PageDrawer(parameters); + pageDrawer.setAnnotationFilter(annotationFilter); + return pageDrawer; + } + + private boolean hasBlendMode(PDPage page) + { + // check the current resources for blend modes + PDResources resources = page.getResources(); + if (resources == null) + { + return false; + } + for (COSName name : resources.getExtGStateNames()) + { + PDExtendedGraphicsState extGState = resources.getExtGState(name); + if (extGState == null) + { + // can happen if key exists but no value + // see PDFBOX-3950-23EGDHXSBBYQLKYOKGZUOVYVNE675PRD.pdf + continue; + } + BlendMode blendMode = extGState.getBlendMode(); + if (blendMode != BlendMode.NORMAL) + { + return true; + } + } + return false; } } diff --git a/src/main/java/org/sejda/sambox/rendering/PageDrawer.java b/src/main/java/org/sejda/sambox/rendering/PageDrawer.java index 77d1e2ea6e75bc79f8fccbce7d70ab2c5cc73a61..24998bc7cf27ebf7558cf06fe2c4810e03654ba2 100644 --- a/src/main/java/org/sejda/sambox/rendering/PageDrawer.java +++ b/src/main/java/org/sejda/sambox/rendering/PageDrawer.java @@ -43,7 +43,9 @@ import java.awt.image.DataBufferByte; import java.awt.image.Raster; import java.awt.image.WritableRaster; import java.io.IOException; +import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Map; import org.sejda.sambox.contentstream.PDFGraphicsStreamEngine; @@ -76,6 +78,7 @@ import org.sejda.sambox.pdmodel.graphics.shading.PDShading; import org.sejda.sambox.pdmodel.graphics.state.PDGraphicsState; import org.sejda.sambox.pdmodel.graphics.state.PDSoftMask; import org.sejda.sambox.pdmodel.graphics.state.RenderingMode; +import org.sejda.sambox.pdmodel.interactive.annotation.AnnotationFilter; import org.sejda.sambox.pdmodel.interactive.annotation.PDAnnotation; import org.sejda.sambox.pdmodel.interactive.annotation.PDAnnotationLink; import org.sejda.sambox.pdmodel.interactive.annotation.PDAnnotationMarkup; @@ -90,8 +93,9 @@ import org.slf4j.LoggerFactory; * * <p> * If you want to do custom graphics processing rather than Graphics2D rendering, then you should subclass - * PDFGraphicsStreamEngine instead. Subclassing PageDrawer is only suitable for cases where the goal is to render onto a - * Graphics2D surface. + * {@link PDFGraphicsStreamEngine} instead. Subclassing PageDrawer is only suitable for cases where the goal is to + * render onto a {@link Graphics2D} surface. In that case you'll also have to subclass {@link PDFRenderer} and modify + * {@link PDFRenderer#createPageDrawer(PageDrawerParameters)}. * * @author Ben Litchfield */ @@ -122,14 +126,26 @@ public class PageDrawer extends PDFGraphicsStreamEngine // last clipping path private Area lastClip; - // buffered clipping area for text being drawn - private Area textClippingArea; + // shapes of glyphs being drawn to be used for clipping + private List<Shape> textClippings; // glyph cache - private final Map<PDFont, Glyph2D> fontGlyph2D = new HashMap<PDFont, Glyph2D>(); + private final Map<PDFont, Glyph2D> fontGlyph2D = new HashMap<>(); private final TilingPaintFactory tilingPaintFactory = new TilingPaintFactory(this); + /** + * Default annotations filter, returns all annotations + */ + private AnnotationFilter annotationFilter = new AnnotationFilter() + { + @Override + public boolean accept(PDAnnotation annotation) + { + return true; + } + }; + /** * Constructor. * @@ -142,6 +158,29 @@ public class PageDrawer extends PDFGraphicsStreamEngine this.renderer = parameters.getRenderer(); } + /** + * Return the AnnotationFilter. + * + * @return the AnnotationFilter + */ + public AnnotationFilter getAnnotationFilter() + { + return annotationFilter; + } + + /** + * Set the AnnotationFilter. + * + * <p> + * Allows to only render annotation accepted by the filter. + * + * @param annotationFilter the AnnotationFilter + */ + public void setAnnotationFilter(AnnotationFilter annotationFilter) + { + this.annotationFilter = annotationFilter; + } + /** * Returns the parent renderer. */ @@ -203,7 +242,7 @@ public class PageDrawer extends PDFGraphicsStreamEngine processPage(getPage()); - for (PDAnnotation annotation : getPage().getAnnotations()) + for (PDAnnotation annotation : getPage().getAnnotations(annotationFilter)) { showAnnotation(annotation); } @@ -333,8 +372,8 @@ public class PageDrawer extends PDFGraphicsStreamEngine */ private void beginTextClip() { - // buffer the text clip because it represents a single clipping area - textClippingArea = new Area(); + // buffer the text clippings because they represents a single clipping area + textClippings = new ArrayList<>(); } /** @@ -346,10 +385,17 @@ public class PageDrawer extends PDFGraphicsStreamEngine RenderingMode renderingMode = state.getTextState().getRenderingMode(); // apply the buffered clip as one area - if (renderingMode.isClip() && !textClippingArea.isEmpty()) + if (renderingMode.isClip() && !textClippings.isEmpty()) { - state.intersectClippingPath(textClippingArea); - textClippingArea = null; + // PDFBOX-4150: this is much faster than using textClippingArea.add(new Area(glyph)) + // https://stackoverflow.com/questions/21519007/fast-union-of-shapes-in-java + GeneralPath path = new GeneralPath(); + for (Shape shape : textClippings) + { + path.append(shape, false); + } + state.intersectClippingPath(path); + textClippings = new ArrayList<>(); // PDFBOX-3681: lastClip needs to be reset, because after intersection it is still the same // object, thus setClip() would believe that it is cached. @@ -387,8 +433,11 @@ public class PageDrawer extends PDFGraphicsStreamEngine GeneralPath path = glyph2D.getPathForCharacterCode(code); if (path != null) { - // stretch non-embedded glyph if it does not match the width contained in the PDF - if (!font.isEmbedded()) + // Stretch non-embedded glyph if it does not match the height/width contained in the PDF. + // Vertical fonts have zero X displacement, so the following code scales to 0 if we don't skip it. + // TODO: How should vertical fonts be handled? + if (!font.isEmbedded() && !font.isVertical() && !font.isStandard14() + && font.hasExplicitWidth(code)) { float fontWidth = font.getWidthFromFont(code); if (fontWidth > 0 && // ignore spaces @@ -421,7 +470,7 @@ public class PageDrawer extends PDFGraphicsStreamEngine if (renderingMode.isClip()) { - textClippingArea.add(new Area(glyph)); + textClippings.add(glyph); } } } @@ -1153,7 +1202,12 @@ public class PageDrawer extends PDFGraphicsStreamEngine lastClip = null; // TODO support more annotation flags (Invisible, NoZoom, NoRotate) // Example for NoZoom can be found in p5 of PDFBOX-2348 - int deviceType = graphics.getDeviceConfiguration().getDevice().getType(); + int deviceType = -1; + if (graphics.getDeviceConfiguration() != null + && graphics.getDeviceConfiguration().getDevice() != null) + { + deviceType = graphics.getDeviceConfiguration().getDevice().getType(); + } if (deviceType == GraphicsDevice.TYPE_PRINTER && !annotation.isPrinted()) { return; @@ -1371,13 +1425,16 @@ public class PageDrawer extends PDFGraphicsStreamEngine // adjust bbox (x,y) position at the initial scale + cropbox float x = bbox.getLowerLeftX() - pageSize.getLowerLeftX(); float y = pageSize.getUpperRightY() - bbox.getUpperRightY(); - graphics.translate(x * xScale, y * yScale); if (flipTG) { graphics.translate(0, image.getHeight()); graphics.scale(1, -1); } + else + { + graphics.translate(x * xScale, y * yScale); + } PDSoftMask softMask = getGraphicsState().getSoftMask(); if (softMask != null) diff --git a/src/main/java/org/sejda/sambox/rendering/TTFGlyph2D.java b/src/main/java/org/sejda/sambox/rendering/TTFGlyph2D.java index 7ac31ff62dd7259ebf06beea1127dbe4da600d71..3834c12b5d86255c1dd7c887b63a0916eae33bd1 100644 --- a/src/main/java/org/sejda/sambox/rendering/TTFGlyph2D.java +++ b/src/main/java/org/sejda/sambox/rendering/TTFGlyph2D.java @@ -114,6 +114,13 @@ final class TTFGlyph2D implements Glyph2D */ public GeneralPath getPathForGID(int gid, int code) throws IOException { + if (gid == 0 && !isCIDFont && code == 10 && font.isStandard14()) + { + // PDFBOX-4001 return empty path for line feed on std14 + // need to catch this early because all "bad" glyphs have gid 0 + LOG.warn("No glyph for code " + code + " in font " + font.getName()); + return new GeneralPath(); + } GeneralPath glyphPath = glyphs.get(gid); if (glyphPath == null) { @@ -123,8 +130,8 @@ final class TTFGlyph2D implements Glyph2D { int cid = ((PDType0Font) font).codeToCID(code); String cidHex = String.format("%04x", cid); - LOG.warn("No glyph for " + code + " (CID " + cidHex + ") in font " - + font.getName()); + LOG.warn("No glyph for code " + code + " (CID " + cidHex + ") in font " + + font.getName()); } else { diff --git a/src/main/java/org/sejda/sambox/rendering/TilingPaintFactory.java b/src/main/java/org/sejda/sambox/rendering/TilingPaintFactory.java index 9474631d46bfa14ee14b4a351f148eb99486aa22..dafa61c6704a591db760af10ce70c8b5c5d490a3 100644 --- a/src/main/java/org/sejda/sambox/rendering/TilingPaintFactory.java +++ b/src/main/java/org/sejda/sambox/rendering/TilingPaintFactory.java @@ -18,6 +18,7 @@ package org.sejda.sambox.rendering; import java.awt.geom.AffineTransform; import java.io.IOException; +import java.lang.ref.WeakReference; import java.util.Map; import java.util.WeakHashMap; @@ -35,7 +36,7 @@ import org.sejda.sambox.util.Matrix; class TilingPaintFactory { private final PageDrawer drawer; - private final Map<TilingPaintParameter, TilingPaint> weakCache = new WeakHashMap<>(); + private final Map<TilingPaintParameter, WeakReference<TilingPaint>> weakCache = new WeakHashMap<>(); TilingPaintFactory(PageDrawer drawer) { @@ -45,14 +46,19 @@ class TilingPaintFactory TilingPaint create(PDTilingPattern pattern, PDColorSpace colorSpace, PDColor color, AffineTransform xform) throws IOException { - TilingPaint paint; - TilingPaintParameter tilingPaintParameter = new TilingPaintParameter( - drawer.getInitialMatrix(), pattern.getCOSObject(), colorSpace, color, xform); - paint = weakCache.get(tilingPaintParameter); + TilingPaint paint = null; + TilingPaintParameter tilingPaintParameter + = new TilingPaintParameter(drawer.getInitialMatrix(), pattern.getCOSObject(), colorSpace, color, xform); + WeakReference<TilingPaint> weakRef = weakCache.get(tilingPaintParameter); + if (weakRef != null) + { + // PDFBOX-4058: additional WeakReference makes gc work better + paint = weakRef.get(); + } if (paint == null) { paint = new TilingPaint(drawer, pattern, colorSpace, color, xform); - weakCache.put(tilingPaintParameter, paint); + weakCache.put(tilingPaintParameter, new WeakReference(paint)); } return paint; } diff --git a/src/main/java/org/sejda/sambox/rendering/Type1Glyph2D.java b/src/main/java/org/sejda/sambox/rendering/Type1Glyph2D.java index 494fbb3856101e2f3fb3bc7f91fe09d8f6d2b2c0..6e90e88c31bb38ba842ac92f1c06096920e57f47 100644 --- a/src/main/java/org/sejda/sambox/rendering/Type1Glyph2D.java +++ b/src/main/java/org/sejda/sambox/rendering/Type1Glyph2D.java @@ -58,7 +58,14 @@ final class Type1Glyph2D implements Glyph2D String name = font.getEncoding().getName(code); if (!font.hasGlyph(name)) { - LOG.warn("No glyph for " + code + " (" + name + ") in font " + font.getName()); + LOG.warn("No glyph for code " + code + " (" + name + ") in font " + font.getName()); + if (code == 10 && font.isStandard14()) + { + // PDFBOX-4001 return empty path for line feed on std14 + path = new GeneralPath(); + cache.put(code, path); + return path; + } } // todo: can this happen? should it be encapsulated? diff --git a/src/main/java/org/sejda/sambox/text/PDFTextStreamEngine.java b/src/main/java/org/sejda/sambox/text/PDFTextStreamEngine.java index a502f02a8557849e5f6f7e2f2a555db636c65b8d..8d6771302d49ac210a7708deac6563553db77cdc 100644 --- a/src/main/java/org/sejda/sambox/text/PDFTextStreamEngine.java +++ b/src/main/java/org/sejda/sambox/text/PDFTextStreamEngine.java @@ -46,14 +46,7 @@ import org.sejda.sambox.contentstream.operator.text.ShowTextLine; import org.sejda.sambox.contentstream.operator.text.ShowTextLineAndSpace; import org.sejda.sambox.pdmodel.PDPage; import org.sejda.sambox.pdmodel.common.PDRectangle; -import org.sejda.sambox.pdmodel.font.PDCIDFont; -import org.sejda.sambox.pdmodel.font.PDCIDFontType2; -import org.sejda.sambox.pdmodel.font.PDFont; -import org.sejda.sambox.pdmodel.font.PDFontDescriptor; -import org.sejda.sambox.pdmodel.font.PDSimpleFont; -import org.sejda.sambox.pdmodel.font.PDTrueTypeFont; -import org.sejda.sambox.pdmodel.font.PDType0Font; -import org.sejda.sambox.pdmodel.font.PDType3Font; +import org.sejda.sambox.pdmodel.font.*; import org.sejda.sambox.pdmodel.font.encoding.GlyphList; import org.sejda.sambox.pdmodel.graphics.state.PDGraphicsState; import org.sejda.sambox.util.Matrix; diff --git a/src/main/java/org/sejda/sambox/text/PDFTextStripper.java b/src/main/java/org/sejda/sambox/text/PDFTextStripper.java index a66855c3fe47b1d9b2234822cde889e7e6048fcb..6ed0ffda0121515b70de1cc9d2236a8b433ae6c0 100644 --- a/src/main/java/org/sejda/sambox/text/PDFTextStripper.java +++ b/src/main/java/org/sejda/sambox/text/PDFTextStripper.java @@ -738,7 +738,7 @@ public class PDFTextStripper extends PDFTextStreamEngine /** * Write the line separator value to the output stream. * - * @throws IOException If there is a problem writing out the lineseparator to the document. + * @throws IOException If there is a problem writing out the line separator to the document. */ protected void writeLineSeparator() throws IOException { @@ -748,7 +748,7 @@ public class PDFTextStripper extends PDFTextStreamEngine /** * Write the word separator value to the output stream. * - * @throws IOException If there is a problem writing out the wordseparator to the document. + * @throws IOException If there is a problem writing out the word separator to the document. */ protected void writeWordSeparator() throws IOException { diff --git a/src/main/java/org/sejda/sambox/text/TextPosition.java b/src/main/java/org/sejda/sambox/text/TextPosition.java index 33bf6a65b99922862f08d2c695fd666a6807896e..6f7cf5e483cf7c965f8dbc50987a3a0dc2ed7203 100644 --- a/src/main/java/org/sejda/sambox/text/TextPosition.java +++ b/src/main/java/org/sejda/sambox/text/TextPosition.java @@ -692,7 +692,13 @@ public class TextPosition */ public boolean isVisible() { - return new Rectangle2D.Float(0, 0, pageWidth, pageHeight).contains(getX(), getY()); + Rectangle2D.Float rectangle = new Rectangle2D.Float(0, 0, pageWidth, pageHeight); + if(this.rotation == 90 || this.rotation == 270) { + // flip width and height + rectangle = new Rectangle2D.Float(0, 0, pageHeight, pageWidth); + } + + return rectangle.contains(getX(), getY()); } /** diff --git a/src/main/java/org/sejda/sambox/text/TextPositionComparator.java b/src/main/java/org/sejda/sambox/text/TextPositionComparator.java index b851b62ec8dd2c4169d6bcfa6dc55ec7bd897430..2ddf8dda10b4fca5379dfb9956230870c1e13e1b 100644 --- a/src/main/java/org/sejda/sambox/text/TextPositionComparator.java +++ b/src/main/java/org/sejda/sambox/text/TextPositionComparator.java @@ -32,13 +32,10 @@ public class TextPositionComparator implements Comparator<TextPosition> public int compare(TextPosition pos1, TextPosition pos2) { // only compare text that is in the same direction - if (pos1.getDir() < pos2.getDir()) + int cmp1 = Float.compare(pos1.getDir(), pos2.getDir()); + if (cmp1 != 0) { - return -1; - } - else if (pos1.getDir() > pos2.getDir()) - { - return 1; + return cmp1; } // get the text direction adjusted coordinates @@ -59,22 +56,11 @@ public class TextPositionComparator implements Comparator<TextPosition> pos2YBottom >= pos1YTop && pos2YBottom <= pos1YBottom || pos1YBottom >= pos2YTop && pos1YBottom <= pos2YBottom) { - if (x1 < x2) - { - return -1; - } - else if (x1 > x2) - { - return 1; - } - else - { - return 0; - } + return Float.compare(x1, x2); } else if (pos1YBottom < pos2YBottom) { - return - 1; + return -1; } else { diff --git a/src/main/java/org/sejda/sambox/util/Matrix.java b/src/main/java/org/sejda/sambox/util/Matrix.java index 26b2771dcc2b4804086f417d4c38eb3d25d630e4..7cdd5b702ac86dc3ce7a2c46035567029e39481f 100644 --- a/src/main/java/org/sejda/sambox/util/Matrix.java +++ b/src/main/java/org/sejda/sambox/util/Matrix.java @@ -21,6 +21,7 @@ import java.awt.geom.Point2D; import java.util.Arrays; import org.sejda.sambox.cos.COSArray; +import org.sejda.sambox.cos.COSBase; import org.sejda.sambox.cos.COSFloat; import org.sejda.sambox.cos.COSNumber; @@ -55,12 +56,12 @@ public final class Matrix implements Cloneable public Matrix(COSArray array) { single = new float[DEFAULT_SINGLE.length]; - single[0] = ((COSNumber) array.getObject(0)).floatValue(); - single[1] = ((COSNumber) array.getObject(1)).floatValue(); - single[3] = ((COSNumber) array.getObject(2)).floatValue(); - single[4] = ((COSNumber) array.getObject(3)).floatValue(); - single[6] = ((COSNumber) array.getObject(4)).floatValue(); - single[7] = ((COSNumber) array.getObject(5)).floatValue(); + single[0] = ((COSNumber)array.getObject(0)).floatValue(); + single[1] = ((COSNumber)array.getObject(1)).floatValue(); + single[3] = ((COSNumber)array.getObject(2)).floatValue(); + single[4] = ((COSNumber)array.getObject(3)).floatValue(); + single[6] = ((COSNumber)array.getObject(4)).floatValue(); + single[7] = ((COSNumber)array.getObject(5)).floatValue(); single[8] = 1; } @@ -94,6 +95,36 @@ public final class Matrix implements Cloneable single[7] = (float)at.getTranslateY(); } + /** + * Convenience method to be used when creating a matrix from unverified data. If the parameter + * is a COSArray with at least six numbers, a Matrix object is created from the first six + * numbers and returned. If not, then the identity Matrix is returned. + * + * @param base a COS object, preferably a COSArray with six numbers. + * + * @return a Matrix object. + */ + public static Matrix createMatrix(COSBase base) + { + if (!(base instanceof COSArray)) + { + return new Matrix(); + } + COSArray array = (COSArray) base; + if (array.size() < 6) + { + return new Matrix(); + } + for (int i = 0; i < 6; ++i) + { + if (!(array.getObject(i) instanceof COSNumber)) + { + return new Matrix(); + } + } + return new Matrix(array); + } + /** * This method resets the numbers in this Matrix to the original values, which are * the values that a newly constructed Matrix would have. diff --git a/src/main/java/org/sejda/sambox/util/filetypedetector/FileTypeDetector.java b/src/main/java/org/sejda/sambox/util/filetypedetector/FileTypeDetector.java index 125e8b44d8abca3cc0d8a2b2efa13811ccb65701..0c07e937366e50c7f34d86cc176c8648e4d498ec 100644 --- a/src/main/java/org/sejda/sambox/util/filetypedetector/FileTypeDetector.java +++ b/src/main/java/org/sejda/sambox/util/filetypedetector/FileTypeDetector.java @@ -63,8 +63,8 @@ public final class FileTypeDetector ROOT.addPath(FileType.PCX, new byte[] { 0x0A, 0x05, 0x01 }); ROOT.addPath(FileType.RIFF, "RIFF".getBytes(StandardCharsets.ISO_8859_1)); - ROOT.addPath(FileType.ARW, "II".getBytes(StandardCharsets.ISO_8859_1), - new byte[] { 0x2a, 0x00, 0x08, 0x00 }); + // https://github.com/drewnoakes/metadata-extractor/issues/217 + // root.addPath(FileType.ARW, "II".getBytes(Charsets.ISO_8859_1), new byte[]{0x2a, 0x00, 0x08, 0x00}) ROOT.addPath(FileType.CRW, "II".getBytes(StandardCharsets.ISO_8859_1), new byte[] { 0x1a, 0x00, 0x00, 0x00 }, "HEAPCCDR".getBytes(StandardCharsets.ISO_8859_1));