diff --git a/NOTICES.txt b/NOTICES.txt index b31cbe758..6f4b7a7f2 100644 --- a/NOTICES.txt +++ b/NOTICES.txt @@ -10,4 +10,12 @@ Includes the Adobe Glyph List Copyright 1997, 1998, 2002, 2007, 2010 Adobe Systems Incorporated. Includes the Zapf Dingbats Glyph List -Copyright 2002, 2010 Adobe Systems Incorporated. \ No newline at end of file +Copyright 2002, 2010 Adobe Systems Incorporated. + +------------------------------------------------------------------ + +Apache JBig2 ImageIO Plugin +Copyright 2017 The Apache Software Foundation + +Based on source code originally developed in the JBig2 project. +Copyright (C) 1995-2017 levigo holding GmbH \ No newline at end of file diff --git a/src/UglyToad.PdfPig.Tests/Filters/Jbig2/ArithmeticDecoderTest.cs b/src/UglyToad.PdfPig.Tests/Filters/Jbig2/ArithmeticDecoderTest.cs new file mode 100644 index 000000000..c38f7692d --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Filters/Jbig2/ArithmeticDecoderTest.cs @@ -0,0 +1,167 @@ +namespace UglyToad.PdfPig.Tests.Filters.Jbig2 +{ + using UglyToad.PdfPig.Filters.Jbig2; + using UglyToad.PdfPig.Tests.Images; + using Xunit; + + public class ArithmeticDecoderTest + { + private static readonly long[][] Tracedata = new[]{ + new []{ 0L, 0x8000L, 0x42638000L, 0x3D9C0000L }, new []{ 0L, 0xAC02L, 0x84C70000L, 0x273A0000L }, + new []{ 0L, 0xF002L, 0xA18C7600L, 0x4E758800L }, new []{ 0L, 0xD801L, 0x898B7600L, 0x4E758800L }, + new []{ 0L, 0xC000L, 0x718A7600L, 0x4E758800L }, new []{ 0L, 0xA7FFL, 0x59897600L, 0x4E758800L }, + new []{ 0L, 0x8FFEL, 0x41887600L, 0x4E758800L }, new []{ 0L, 0xEFFAL, 0x530EEC00L, 0x9CEB1000L }, + new []{ 0L, 0xE539L, 0x484DEC00L, 0x9CEB1000L }, new []{ 0L, 0xDA78L, 0x3D8CEC00L, 0x9CEB1000L }, + new []{ 0L, 0xCFB7L, 0x32CBEC00L, 0x9CEB1000L }, new []{ 0L, 0xC4F6L, 0x280AEC00L, 0x9CEB1000L }, + new []{ 0L, 0xBA35L, 0x1D49EC00L, 0x9CEB1000L }, new []{ 0L, 0xAF74L, 0x1288EC00L, 0x9CEB1000L }, + new []{ 1L, 0xA4B3L, 0x07C7EC00L, 0x9CEB1000L }, new []{ 0L, 0xAC10L, 0x7C7EC000L, 0x2F910000L }, + new []{ 0L, 0x900FL, 0x607DC000L, 0x2F910000L }, new []{ 0L, 0xE81CL, 0x88F98000L, 0x5F220000L }, + new []{ 0L, 0xD21BL, 0x72F88000L, 0x5F220000L }, new []{ 0L, 0xBC1AL, 0x5CF78000L, 0x5F220000L }, + new []{ 0L, 0xA619L, 0x46F68000L, 0x5F220000L }, new []{ 0L, 0x9018L, 0x30F58000L, 0x5F220000L }, + new []{ 0L, 0xF42EL, 0x35E90000L, 0xBE440000L }, new []{ 0L, 0xE32DL, 0x24E80000L, 0xBE440000L }, + new []{ 0L, 0xD22CL, 0x13E70000L, 0xBE440000L }, new []{ 1L, 0xC12BL, 0x02E60000L, 0xBE440000L }, + new []{ 0L, 0x8808L, 0x1737E000L, 0x70D01800L }, new []{ 1L, 0xE80EL, 0x066DC000L, 0xE1A03000L }, + new []{ 0L, 0x9008L, 0x336E0000L, 0x5C998000L }, new []{ 0L, 0xF40EL, 0x3ADA0000L, 0xB9330000L }, + new []{ 0L, 0xE00DL, 0x26D90000L, 0xB9330000L }, new []{ 1L, 0xCC0CL, 0x12D80000L, 0xB9330000L }, + new []{ 0L, 0xA008L, 0x96C70800L, 0x0940F000L }, new []{ 0L, 0x8807L, 0x7EC60800L, 0x0940F000L }, + new []{ 0L, 0xE00CL, 0xCD8A1000L, 0x1281E000L }, new []{ 0L, 0xCA0BL, 0xB7891000L, 0x1281E000L }, + new []{ 0L, 0xB40AL, 0xA1881000L, 0x1281E000L }, new []{ 0L, 0x9E09L, 0x8B871000L, 0x1281E000L }, + new []{ 0L, 0x8808L, 0x75861000L, 0x1281E000L }, new []{ 0L, 0xE40EL, 0xBF0A2000L, 0x2503C000L }, + new []{ 0L, 0xD00DL, 0xAB092000L, 0x2503C000L }, new []{ 0L, 0xBC0CL, 0x97082000L, 0x2503C000L }, + new []{ 0L, 0xA80BL, 0x83072000L, 0x2503C000L }, new []{ 0L, 0x940AL, 0x6F062000L, 0x2503C000L }, + new []{ 0L, 0x8009L, 0x5B052000L, 0x2503C000L }, new []{ 0L, 0xD810L, 0x8E084000L, 0x4A078000L }, + new []{ 0L, 0xC60FL, 0x7C074000L, 0x4A078000L }, new []{ 0L, 0xB40EL, 0x6A064000L, 0x4A078000L }, + new []{ 0L, 0xA20DL, 0x58054000L, 0x4A078000L }, new []{ 0L, 0x900CL, 0x46044000L, 0x4A078000L }, + new []{ 0L, 0xFC16L, 0x68068000L, 0x940F0000L }, new []{ 0L, 0xEB15L, 0x57058000L, 0x940F0000L }, + new []{ 0L, 0xDA14L, 0x46048000L, 0x940F0000L }, new []{ 0L, 0xC913L, 0x35038000L, 0x940F0000L }, + new []{ 0L, 0xB812L, 0x24028000L, 0x940F0000L }, new []{ 0L, 0xA711L, 0x13018000L, 0x940F0000L }, + new []{ 1L, 0x9610L, 0x02008000L, 0x940F0000L }, new []{ 1L, 0x8808L, 0x10068400L, 0x78017800L }, + new []{ 0L, 0xA008L, 0x80342000L, 0x1FD3C000L }, new []{ 0L, 0x8807L, 0x68332000L, 0x1FD3C000L }, + new []{ 0L, 0xE00CL, 0xA0644000L, 0x3FA78000L }, new []{ 0L, 0xCA0BL, 0x8A634000L, 0x3FA78000L }, + new []{ 0L, 0xB40AL, 0x74624000L, 0x3FA78000L }, new []{ 0L, 0x9E09L, 0x5E614000L, 0x3FA78000L }, + new []{ 0L, 0x8808L, 0x48604000L, 0x3FA78000L }, new []{ 0L, 0xE40EL, 0x64BE8000L, 0x7F4F0000L }, + new []{ 0L, 0xD00DL, 0x50BD8000L, 0x7F4F0000L }, new []{ 0L, 0xBC0CL, 0x3CBC8000L, 0x7F4F0000L }, + new []{ 0L, 0xA80BL, 0x28BB8000L, 0x7F4F0000L }, new []{ 0L, 0x940AL, 0x14BA8000L, 0x7F4F0000L }, + new []{ 1L, 0x8009L, 0x00B98000L, 0x7F4F0000L }, new []{ 1L, 0xA008L, 0x05CD0C00L, 0x9A3AF000L }, + new []{ 0L, 0xC008L, 0x2E686000L, 0x919F8000L }, new []{ 1L, 0x9E07L, 0x0C676000L, 0x919F8000L }, + new []{ 0L, 0x8804L, 0x319D8000L, 0x56660000L }, new []{ 1L, 0xC006L, 0x13390000L, 0xACCC0000L }, + new []{ 0L, 0x9004L, 0x4CE41000L, 0x431FEC00L }, new []{ 0L, 0xC006L, 0x39C62000L, 0x863FD800L }, + new []{ 1L, 0x9805L, 0x11C52000L, 0x863FD800L }, new []{ 0L, 0xA004L, 0x47148000L, 0x58EF6000L }, + new []{ 1L, 0xD806L, 0x26270000L, 0xB1DEC000L }, new []{ 0L, 0xC004L, 0x989C0000L, 0x27670000L }, + new []{ 0L, 0x8C03L, 0x649B0000L, 0x27670000L }, new []{ 0L, 0xB004L, 0x61340400L, 0x4ECFFA00L }, + new []{ 0L, 0x8003L, 0x31330400L, 0x4ECFFA00L }, new []{ 1L, 0xA004L, 0x02640800L, 0x9D9FF400L }, + new []{ 1L, 0xA004L, 0x09902000L, 0x9673D000L }, new []{ 1L, 0xD004L, 0x26408000L, 0xA9C34000L }, + new []{ 0L, 0xE004L, 0x99020000L, 0x47010000L }, new []{ 0L, 0x9803L, 0x51010000L, 0x47010000L }, + new []{ 1L, 0xA004L, 0x12004000L, 0x8E03BE00L }, new []{ 0L, 0xE004L, 0x48010000L, 0x9802F800L }, + new []{ 1L, 0x9803L, 0x00000000L, 0x9802F800L }, new []{ 0L, 0x9002L, 0x00000000L, 0x9001F000L }, + new []{ 1L, 0xA202L, 0x00000000L, 0xA201E000L }, new []{ 0L, 0x9002L, 0x00000000L, 0x9001C000L }, + new []{ 1L, 0xA202L, 0x00000000L, 0xA2018000L }, new []{ 0L, 0x9002L, 0x00000000L, 0x90010000L }, + new []{ 1L, 0xA202L, 0x00000000L, 0xA201FE00L }, new []{ 0L, 0x9002L, 0x00000000L, 0x9001FC00L }, + new []{ 1L, 0xA202L, 0x00000000L, 0xA201F800L }, new []{ 0L, 0x9002L, 0x00000000L, 0x9001F000L }, + new []{ 1L, 0xA202L, 0x00000000L, 0xA201E000L }, new []{ 0L, 0x9002L, 0x00000000L, 0x9001C000L }, + new []{ 1L, 0xA202L, 0x00000000L, 0xA2018000L }, new []{ 0L, 0x9002L, 0x00000000L, 0x90010000L }, + new []{ 1L, 0xA202L, 0x00000000L, 0xA201FE00L }, new []{ 0L, 0x9002L, 0x00000000L, 0x9001FC00L }, + new []{ 1L, 0xA202L, 0x00000000L, 0xA201F800L }, new []{ 0L, 0x9002L, 0x00000000L, 0x9001F000L }, + new []{ 1L, 0xA202L, 0x00000000L, 0xA201E000L }, new []{ 0L, 0x9002L, 0x00000000L, 0x9001C000L }, + new []{ 1L, 0xA202L, 0x00000000L, 0xA2018000L }, new []{ 0L, 0x9002L, 0x00000000L, 0x90010000L }, + new []{ 1L, 0xA202L, 0x00008200L, 0xA2017C00L }, new []{ 0L, 0x9002L, 0x00010400L, 0x9000F800L }, + new []{ 1L, 0xA202L, 0x00020800L, 0xA1FFF000L }, new []{ 0L, 0x9002L, 0x00041000L, 0x8FFDE000L }, + new []{ 1L, 0xA202L, 0x00082000L, 0xA1F9C000L }, new []{ 0L, 0x9002L, 0x00104000L, 0x8FF18000L }, + new []{ 1L, 0xA202L, 0x00208000L, 0xA1E10000L }, new []{ 0L, 0x9002L, 0x00410000L, 0x8FC00000L }, + new []{ 1L, 0xA202L, 0x00821A00L, 0xA17FE400L }, new []{ 0L, 0x9002L, 0x01043400L, 0x8EFDC800L }, + new []{ 1L, 0xA202L, 0x02086800L, 0x9FF99000L }, new []{ 0L, 0x9002L, 0x0410D000L, 0x8BF12000L }, + new []{ 1L, 0xA202L, 0x0821A000L, 0x99E04000L }, new []{ 0L, 0x9002L, 0x10434000L, 0x7FBE8000L }, + new []{ 1L, 0xA202L, 0x20868000L, 0x817B0000L }, new []{ 0L, 0x9002L, 0x410D0000L, 0x4EF40000L }, + new []{ 0L, 0xA202L, 0x821B7600L, 0x1FE68800L }, new []{ 0L, 0xB402L, 0x7434EC00L, 0x3FCD1000L }, + new []{ 0L, 0xF802L, 0x7867D800L, 0x7F9A2000L }, new []{ 0L, 0xC401L, 0x4466D800L, 0x7F9A2000L }, + new []{ 1L, 0x9000L, 0x1065D800L, 0x7F9A2000L }, new []{ 0L, 0xD004L, 0x41976000L, 0x8E6C8000L }, + new []{ 1L, 0x9803L, 0x09966000L, 0x8E6C8000L }, new []{ 1L, 0xE004L, 0x26598000L, 0xB9AA0000L }, + new []{ 0L, 0x9002L, 0x4CB30000L, 0x434E0000L }, new []{ 0L, 0xA202L, 0x99670C00L, 0x089AF200L }, + new []{ 0L, 0xB402L, 0xA2CC1800L, 0x1135E400L }, new []{ 0L, 0xF802L, 0xD5963000L, 0x226BC800L }, + new []{ 0L, 0xC401L, 0xA1953000L, 0x226BC800L }, new []{ 0L, 0x9000L, 0x6D943000L, 0x226BC800L }, + new []{ 0L, 0xB7FEL, 0x73266000L, 0x44D79000L }, new []{ 0L, 0x87FDL, 0x43256000L, 0x44D79000L }, + new []{ 1L, 0xAFF8L, 0x2648C000L, 0x89AF2000L }, new []{ 0L, 0xA004L, 0x99230000L, 0x06E08000L }, + new []{ 0L, 0xD806L, 0xCA440000L, 0x0DC10000L }, new []{ 0L, 0xA805L, 0x9A430000L, 0x0DC10000L }, + new []{ 0L, 0xF008L, 0xD485E800L, 0x1B821600L }, new []{ 0L, 0xC807L, 0xAC84E800L, 0x1B821600L }, + new []{ 0L, 0xA006L, 0x8483E800L, 0x1B821600L }, new []{ 0L, 0xF00AL, 0xB905D000L, 0x37042C00L }, + new []{ 0L, 0xCC09L, 0x9504D000L, 0x37042C00L }, new []{ 0L, 0xA808L, 0x7103D000L, 0x37042C00L }, + new []{ 0L, 0x8407L, 0x4D02D000L, 0x37042C00L }, new []{ 0L, 0xC00CL, 0x5203A000L, 0x6E085800L }, + new []{ 0L, 0x9E0BL, 0x3002A000L, 0x6E085800L }, new []{ 0L, 0xF814L, 0x1C034000L, 0xDC10B000L }, + new []{ 1L, 0xDC13L, 0x00024000L, 0xDC10B000L }, new []{ 1L, 0xE008L, 0x00120000L, 0xDFF58000L }, + new []{ 1L, 0x9004L, 0x00486200L, 0x8FBB9C00L }, new []{ 1L, 0xC004L, 0x01218800L, 0xBEE27000L }, + new []{ 1L, 0xD004L, 0x04862000L, 0xCB7DC000L }, new []{ 1L, 0xE004L, 0x12188000L, 0xCDEB0000L }, + new []{ 0L, 0x9002L, 0x24310000L, 0x6BD00000L }, new []{ 0L, 0xA202L, 0x4862FE00L, 0x599F0000L }, + new []{ 1L, 0xB402L, 0x00C3FC00L, 0xB33E0000L }, new []{ 1L, 0xE004L, 0x030FF000L, 0xDCF40000L }, + new []{ 0L, 0x9002L, 0x061FE000L, 0x89E20000L }, new []{ 1L, 0xA202L, 0x0C3FC000L, 0x95C20000L }, + new []{ 0L, 0x9002L, 0x187F8000L, 0x77820000L }, new []{ 1L, 0xA202L, 0x30FF0000L, 0x71020000L }, + new []{ 1L, 0x9002L, 0x61FFFE00L, 0x2E020000L }, new []{ 1L, 0xFC04L, 0x43FBF800L, 0xB8080000L }, + new []{ 1L, 0xA802L, 0x87F7F000L, 0x200A0000L }, new []{ 0L, 0xA402L, 0x63EDE000L, 0x40140000L }, + new []{ 0L, 0x9C02L, 0x1BD9C000L, 0x80280000L }, new []{ 1L, 0xAC02L, 0x37B38000L, 0x744E0000L }, + new []{ 1L, 0xA802L, 0x6F670000L, 0x389A0000L }, new []{ 1L, 0xA402L, 0x32CE2000L, 0x7133DC00L }, + new []{ 1L, 0xAC02L, 0x659C4000L, 0x4665B800L }, new []{ 0L, 0xB002L, 0x23368000L, 0x8CCB7000L }, + new []{ 1L, 0xA202L, 0x466D0000L, 0x5B94E000L }, new []{ 1L, 0xA802L, 0x8CDA0000L, 0x1B27C000L }, + new []{ 1L, 0xAE02L, 0x77B20000L, 0x364F8000L }, new []{ 1L, 0xCC02L, 0x5F620000L, 0x6C9F0000L }, + new []{ 0L, 0x9401L, 0x27610000L, 0x6C9F0000L }, new []{ 1L, 0xE004L, 0x9D87FC00L, 0x427C0000L }, + new []{ 1L, 0x9803L, 0x5586FC00L, 0x427C0000L }, new []{ 0L, 0xA004L, 0x1B0BF800L, 0x84F80000L }, + new []{ 1L, 0xE004L, 0x6C2FE000L, 0x73D40000L }, new []{ 0L, 0x9803L, 0x242EE000L, 0x73D40000L }, + new []{ 1L, 0x9002L, 0x485DC000L, 0x47A40000L }, new []{ 1L, 0xA202L, 0x90BB8000L, 0x11460000L }, + new []{ 1L, 0xB402L, 0x91750000L, 0x228C0000L }, new []{ 1L, 0xF802L, 0xB2E8DC00L, 0x45192000L }, + new []{ 1L, 0xC401L, 0x7EE7DC00L, 0x45192000L }, new []{ 1L, 0x9000L, 0x4AE6DC00L, 0x45192000L }, + new []{ 0L, 0xB7FEL, 0x2DCBB800L, 0x8A324000L }, new []{ 1L, 0xC004L, 0xB72EE000L, 0x08D50000L }, + new []{ 1L, 0x8C03L, 0x832DE000L, 0x08D50000L }, new []{ 1L, 0xB004L, 0x9E59C000L, 0x11AA0000L }, + new []{ 1L, 0x8003L, 0x6E58C000L, 0x11AA0000L }, new []{ 1L, 0xA004L, 0x7CAF8000L, 0x23540000L }, + new []{ 1L, 0xF006L, 0xA95D0000L, 0x46A80000L }, new []{ 1L, 0xCC05L, 0x855C0000L, 0x46A80000L }, + new []{ 1L, 0xA804L, 0x615B0000L, 0x46A80000L }, new []{ 1L, 0x8403L, 0x3D5A0000L, 0x46A80000L }, + new []{ 1L, 0xC004L, 0x32B28E00L, 0x8D517000L }, new []{ 0L, 0x9E03L, 0x10B18E00L, 0x8D517000L }, + new []{ 1L, 0x8804L, 0x42C63800L, 0x453DC000L }, new []{ 1L, 0xC006L, 0x358A7000L, 0x8A7B8000L }, + new []{ 0L, 0x9C05L, 0x11897000L, 0x8A7B8000L }, new []{ 1L, 0x9004L, 0x4625C000L, 0x49DE0000L }, + new []{ 1L, 0xC006L, 0x2C498000L, 0x93BC0000L }, new []{ 0L, 0x9805L, 0x04488000L, 0x93BC0000L }, + new []{ 0L, 0xA004L, 0x11223400L, 0x8EE1CA00L }, new []{ 1L, 0xD004L, 0x4488D000L, 0x8B7B2800L }, + new []{ 0L, 0x9803L, 0x0C87D000L, 0x8B7B2800L }, new []{ 0L, 0xE004L, 0x321F4000L, 0xADE4A000L }, + new []{ 0L, 0x9002L, 0x643E8000L, 0x2BC34000L }, new []{ 0L, 0xFC04L, 0x4CF60000L, 0xAF0D0000L }, + new []{ 0L, 0xA802L, 0x99EDB600L, 0x0E144800L }, new []{ 1L, 0xA402L, 0x87D96C00L, 0x1C289000L }, + new []{ 0L, 0x9C02L, 0x63B0D800L, 0x38512000L }, new []{ 0L, 0x8C02L, 0x1B5FB000L, 0x70A24000L }, + new []{ 1L, 0xAC02L, 0x36BF6000L, 0x75428000L }, new []{ 1L, 0xA802L, 0x6D7EC000L, 0x3A830000L }, + new []{ 1L, 0xA402L, 0x2EFB8000L, 0x75060000L }, new []{ 1L, 0xAC02L, 0x5DF70000L, 0x4E0A0000L }, + new []{ 0L, 0xB002L, 0x13ECD400L, 0x9C152A00L }, new []{ 1L, 0xA202L, 0x27D9A800L, 0x7A285400L }, + new []{ 0L, 0xA802L, 0x4FB35000L, 0x584EA800L }, new []{ 0L, 0xA202L, 0x9F66A000L, 0x029B5000L }, + new []{ 0L, 0x9C02L, 0x96CB4000L, 0x0536A000L }, new []{ 1L, 0x8C02L, 0x81948000L, 0x0A6D4000L }, + new []{ 1L, 0xD804L, 0xAE4E0000L, 0x29B50000L }, new []{ 0L, 0x8203L, 0x584D0000L, 0x29B50000L }, + new []{ 1L, 0xB008L, 0x09337C00L, 0xA6D48000L }, new []{ 0L, 0xAC02L, 0x1266F800L, 0x999B0000L }, + new []{ 1L, 0xAC02L, 0x24CDF000L, 0x87340000L }, new []{ 0L, 0xAC02L, 0x499BE000L, 0x62660000L }, + new []{ 0L, 0xAC02L, 0x9337C000L, 0x18CA0000L }, new []{ 0L, 0xAC02L, 0x7A6D8000L, 0x31940000L }, + new []{ 1L, 0xB002L, 0x4CD90000L, 0x63280000L }, new []{ 1L, 0xA202L, 0x99B3FE00L, 0x084E0000L }, + new []{ 1L, 0x9C02L, 0x8B65FC00L, 0x109C0000L }, new []{ 0L, 0x8C02L, 0x6AC9F800L, 0x21380000L }, + new []{ 1L, 0xD804L, 0x5323E000L, 0x84E00000L }, new []{ 1L, 0xAC02L, 0xA647C000L, 0x05BA0000L }, + new []{ 1L, 0xAC02L, 0xA08D8000L, 0x0B740000L }, new []{ 1L, 0xB002L, 0x99190000L, 0x16E80000L }, + new []{ 1L, 0xBE02L, 0x9031FE00L, 0x2DD00000L }, new []{ 1L, 0xEC02L, 0x9061FC00L, 0x5BA00000L }}; + + [Fact] + public void DecodeTest() + { + var iis = new ImageInputStream(ImageHelpers.LoadFileBytes("arith-encoded-testsequence.bin")); + var decoder = new ArithmeticDecoder(iis); + + var cx = new CX(1, 0); + for (int i = 0; i < 257; i++) + { + decoder.Decode(cx); + } + } + + [Fact] + public void DecodeTestWithTracadataComparison() + { + var iis = new ImageInputStream(ImageHelpers.LoadFileBytes("arith-encoded-testsequence.bin")); + var decoder = new ArithmeticDecoder(iis); + var cx = new CX(1, 0); + + for (int i = 0; i < 255; i++) + { + Assert.Equal(Tracedata[i][0], decoder.Decode(cx)); + Assert.Equal(Tracedata[i + 1][1], decoder.A); + Assert.Equal(Tracedata[i + 1][2], decoder.C); + } + } + } +} diff --git a/src/UglyToad.PdfPig.Tests/Filters/Jbig2/ArithmeticIntegerDecoderTest.cs b/src/UglyToad.PdfPig.Tests/Filters/Jbig2/ArithmeticIntegerDecoderTest.cs new file mode 100644 index 000000000..a97217550 --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Filters/Jbig2/ArithmeticIntegerDecoderTest.cs @@ -0,0 +1,21 @@ +namespace UglyToad.PdfPig.Tests.Filters.Jbig2 +{ + using Xunit; + using UglyToad.PdfPig.Filters.Jbig2; + using UglyToad.PdfPig.Tests.Images; + + public class ArithmeticIntegerDecoderTest + { + [Fact] + public void DecodeTest() + { + var iis = new ImageInputStream(ImageHelpers.LoadFileBytes("arith-encoded-testsequence.bin")); + var ad = new ArithmeticDecoder(iis); + var aid = new ArithmeticIntegerDecoder(ad); + + long result = aid.Decode(null); + + Assert.Equal(1, result); + } + } +} diff --git a/src/UglyToad.PdfPig.Tests/Filters/Jbig2/BitmapTest.cs b/src/UglyToad.PdfPig.Tests/Filters/Jbig2/BitmapTest.cs new file mode 100644 index 000000000..72c8153cc --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Filters/Jbig2/BitmapTest.cs @@ -0,0 +1,86 @@ +namespace UglyToad.PdfPig.Tests.Filters.Jbig2 +{ + using System; + using Xunit; + using UglyToad.PdfPig.Filters.Jbig2; + + public class BitmapTest + { + [Fact] + public void GetPixelAndSetPixelTest() + { + var bitmap = new Jbig2Bitmap(37, 49); + Assert.Equal(0, bitmap.GetPixel(3, 19)); + + bitmap.SetPixel(3, 19, (byte)1); + + Assert.Equal(1, bitmap.GetPixel(3, 19)); + } + + [Fact] + public void GetByteAndSetByteTest() + { + var bitmap = new Jbig2Bitmap(16, 16); + + byte value = (byte)4; + bitmap.SetByte(0, value); + bitmap.SetByte(31, value); + + Assert.Equal(value, bitmap.GetByte(0)); + Assert.Equal(value, bitmap.GetByte(31)); + } + + [Fact] + public void GetByteThrowsExceptionTest() + { + var bitmap = new Jbig2Bitmap(16, 16); + + Action action = () => bitmap.GetByte(32); + + Assert.Throws(action); + } + + [Fact] + public void SetByteThrowsExceptionTest() + { + var bitmap = new Jbig2Bitmap(16, 16); + + Action action = () => bitmap.SetByte(32, (byte)0); + + Assert.Throws(action); + } + + [Fact] + public void GetByteAsIntegerTest() + { + var bitmap = new Jbig2Bitmap(16, 16); + + var byteValue = (byte)4; + int integerValue = byteValue; + bitmap.SetByte(0, byteValue); + bitmap.SetByte(31, byteValue); + + Assert.Equal(integerValue, bitmap.GetByteAsInteger(0)); + Assert.Equal(integerValue, bitmap.GetByteAsInteger(31)); + + } + + [Fact] + public void GetHeightTest() + { + int height = 16; + var bitmap = new Jbig2Bitmap(1, height); + + Assert.Equal(height, bitmap.Height); + } + + [Fact] + public void GetWidthTest() + { + int width = 16; + var bitmap = new Jbig2Bitmap(width, 1); + + Assert.Equal(width, bitmap.Width); + } + } +} diff --git a/src/UglyToad.PdfPig.Tests/Filters/Jbig2/ChecksumTest.cs b/src/UglyToad.PdfPig.Tests/Filters/Jbig2/ChecksumTest.cs new file mode 100644 index 000000000..f6ebb01e9 --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Filters/Jbig2/ChecksumTest.cs @@ -0,0 +1,94 @@ +namespace UglyToad.PdfPig.Tests.Filters.Jbig2 +{ + using System.Collections.Generic; + using System.Security.Cryptography; + using System.Text; + using UglyToad.PdfPig.Filters.Jbig2; + using UglyToad.PdfPig.Tests.Images; + using Xunit; + + public class ChecksumTest + { + // See: https://jbig2dec.com/tests/index.html + public static readonly IEnumerable Data = + new object[][] { + new[] { "042_1.jb2", "69-26-6629-1793-107941058147-58-79-37-31-79" }, + new[] { "042_2.jb2", "69-26-6629-1793-107941058147-58-79-37-31-79" }, + new[] { "042_3.jb2", "69-26-6629-1793-107941058147-58-79-37-31-79" }, + new[] { "042_4.jb2", "69-26-6629-1793-107941058147-58-79-37-31-79" }, + new[] { "042_5.jb2", "69-26-6629-1793-107941058147-58-79-37-31-79" }, + new[] { "042_6.jb2", "69-26-6629-1793-107941058147-58-79-37-31-79" }, + new[] { "042_7.jb2", "69-26-6629-1793-107941058147-58-79-37-31-79" }, + new[] { "042_8.jb2", "69-26-6629-1793-107941058147-58-79-37-31-79" }, + new[] { "042_9.jb2", "69-26-6629-1793-107941058147-58-79-37-31-79" }, + new[] { "042_10.jb2", "69-26-6629-1793-107941058147-58-79-37-31-79" }, + new[] { "042_11.jb2", "69-26-6629-1793-107941058147-58-79-37-31-79" }, + new[] { "042_12.jb2", "69-26-6629-1793-107941058147-58-79-37-31-79" }, + // NOTE: These two can neither we nor PDFBox decode without error :-( + // new[] { "042_13.jb2", "69-26-6629-1793-107941058147-58-79-37-31-79" }, + // new[] { "042_14.jb2", "69-26-6629-1793-107941058147-58-79-37-31-79" }, + new[] { "042_15.jb2", "69-26-6629-1793-107941058147-58-79-37-31-79" }, + new[] { "042_16.jb2", "69-26-6629-1793-107941058147-58-79-37-31-79" }, + new[] { "042_17.jb2", "69-26-6629-1793-107941058147-58-79-37-31-79" }, + new[] { "042_18.jb2", "69-26-6629-1793-107941058147-58-79-37-31-79" }, + new[] { "042_19.jb2", "69-26-6629-1793-107941058147-58-79-37-31-79" }, + new[] { "042_20.jb2", "69-26-6629-1793-107941058147-58-79-37-31-79" }, + new[] { "042_21.jb2", "69-26-6629-1793-107941058147-58-79-37-31-79" }, + new[] { "042_22.jb2", "69-26-6629-1793-107941058147-58-79-37-31-79" }, + new[] { "042_23.jb2", "69-26-6629-1793-107941058147-58-79-37-31-79" }, + new[] { "042_24.jb2", "69-26-6629-1793-107941058147-58-79-37-31-79" }, + new[] { "042_25.jb2", "69-26-6629-1793-107941058147-58-79-37-31-79" }, + new[] { "amb_1.jb2", "58311272494-318210035-125100-344625-126-79" }, + new[] { "amb_2.jb2", "58311272494-318210035-125100-344625-126-79" }, + new[] { "002.jb2", "-12713-4587-92-651657111-57121-1582564895" }, + new[] { "003.jb2", "-37-108-89-33-78-5019-966-96-124-9675-1-108-24" }, + new[] { "004.jb2", "-10709436-24-59-48-217114-37-85-3126-24" }, + new[] { "005.jb2", "712610586-1224021396100112-102-77-1177851" }, + new[] { "006.jb2", "-8719-116-83-83-35-3425-64-528667602154-25" }, + new[] { "007.jb2", "6171-125-109-20-128-71925295955793-127-41-122" }, + //new[] { "sampledata.jb2", "104-68-555325117-4757-48527676-9775-8432" }, + //new[] { "sampledata_page1.jb2", "104-68-555325117-4757-48527676-9775-8432" }, + //new[] { "sampledata_page2.jb2", "104-68-555325117-4757-48527676-9775-8432" }, + //new[] { "sampledata_page3.jb2", "-7825-56-41-30-19-719536-3678580-61-2586" }, + new[] { "20123110001.jb2", "60-96-101-2458-3335024-5468-5-11068-78-80" }, + new[] { "20123110002.jb2", "-28-921048181-117-48-96126-110-9-2865611113" }, + new[] { "20123110003.jb2", "-3942-239351-28-56-729169-5839122-439231" }, + new[] { "20123110004.jb2", "-49-101-28-20-57-4-24-17-9352104-106-118-122-122" }, + new[] { "20123110005.jb2", "-48221261779-94-838820-127-114110-2-88-80-106" }, + new[] { "20123110006.jb2", "81-11870-63-30124-1614-45838-53-123-41639" }, + new[] { "20123110007.jb2", "12183-49124728346-29-124-9-10775-63-44116103" }, + new[] { "20123110008.jb2", "15-74-49-45958458-67-2545-96-119-122-60100-35" }, + new[] { "20123110009.jb2", "36115-114-28-123-3-70-87-113-4197-8512396113-65" }, + new[] { "20123110010.jb2", "-109-1069-61-1576-67-43122406037-75-1091115" }, + new[] { "PDFjs_8145_p55.jb2", "-116-2412448-85-120-68-48-36-77-1236891-493365" }, + new[] { "unitized_page_ii.jb2", "-12766-66-96123-7212470-3-116-72114-6462-112" }, + }; + + [Theory] + [MemberData(nameof(Data))] + public void CompareChecksum(string filename, string checksum) + { + var iis = new ImageInputStream(ImageHelpers.LoadFileBytes(filename)); + var doc = new Jbig2Document(iis); + + Jbig2Bitmap b = doc.GetPage(1).GetBitmap(); + byte[] digest = Hash(b.GetByteArray()); + + var stringBuilder = new StringBuilder(); + foreach (byte toAppend in digest) + { + stringBuilder.Append((sbyte)toAppend); + } + + Assert.Equal(checksum, stringBuilder.ToString()); + } + + private static byte[] Hash(byte[] input) + { + using (var md5 = MD5.Create()) + { + return md5.ComputeHash(input); + } + } + } +} diff --git a/src/UglyToad.PdfPig.Tests/Filters/Jbig2/GenericRegionTest.cs b/src/UglyToad.PdfPig.Tests/Filters/Jbig2/GenericRegionTest.cs new file mode 100644 index 000000000..ff4a1f85e --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Filters/Jbig2/GenericRegionTest.cs @@ -0,0 +1,41 @@ +namespace UglyToad.PdfPig.Tests.Filters.Jbig2 +{ + using Xunit; + using UglyToad.PdfPig.Filters.Jbig2; + using UglyToad.PdfPig.Tests.Images; + + public class GenericRegionTest + { + [Fact(Skip = "File removed")] + public void ParseHeaderTest() + { + var iis = new ImageInputStream(ImageHelpers.LoadFileBytes("sampledata.jb2")); + + // Twelfth Segment (number 11) + var sis = new SubInputStream(iis, 523, 35); + var gr = new GenericRegion(); + gr.Init(null, sis); + + Assert.Equal(54, gr.RegionInfo.BitmapWidth); + Assert.Equal(44, gr.RegionInfo.BitmapHeight); + Assert.Equal(4, gr.RegionInfo.X); + Assert.Equal(11, gr.RegionInfo.Y); + Assert.Equal(CombinationOperator.OR, gr.RegionInfo.CombinationOperator); + Assert.False(gr.UseExtTemplates); + Assert.False(gr.IsMMREncoded); + Assert.Equal(0, gr.GbTemplate); + Assert.True(gr.IsTPGDon); + + short[] gbAtX = gr.GbAtX; + short[] gbAtY = gr.GbAtY; + Assert.Equal(3, gbAtX[0]); + Assert.Equal(-1, gbAtY[0]); + Assert.Equal(-3, gbAtX[1]); + Assert.Equal(-1, gbAtY[1]); + Assert.Equal(2, gbAtX[2]); + Assert.Equal(-2, gbAtY[2]); + Assert.Equal(-2, gbAtX[3]); + Assert.Equal(-2, gbAtY[3]); + } + } +} diff --git a/src/UglyToad.PdfPig.Tests/Filters/Jbig2/HalftoneRegionTest.cs b/src/UglyToad.PdfPig.Tests/Filters/Jbig2/HalftoneRegionTest.cs new file mode 100644 index 000000000..212614ae1 --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Filters/Jbig2/HalftoneRegionTest.cs @@ -0,0 +1,32 @@ +namespace UglyToad.PdfPig.Tests.Filters.Jbig2 +{ + using Xunit; + using UglyToad.PdfPig.Filters.Jbig2; + using UglyToad.PdfPig.Tests.Images; + + public class HalftoneRegionTest + { + [Fact(Skip = "File removed")] + public void ParseHeaderTest() + { + var iis = new ImageInputStream(ImageHelpers.LoadFileBytes("sampledata.jb2")); + // Seventh Segment (number 6) + var sis = new SubInputStream(iis, 302, 87); + var hr = new HalftoneRegion(sis); + hr.Init(null, sis); + + Assert.True(hr.IsMMREncoded); + Assert.Equal(0, hr.HTemplate); + Assert.False(hr.HSkipEnabled); + Assert.Equal(CombinationOperator.OR, hr.HCombinationOperator); + Assert.Equal(0, hr.HDefaultPixel); + Assert.Equal(8, hr.HGridWidth); + Assert.Equal(9, hr.HGridHeight); + Assert.Equal(0, hr.HGridX); + Assert.Equal(0, hr.HGridY); + Assert.Equal(1024, hr.HRegionX); + Assert.Equal(0, hr.HRegionY); + } + } + +} diff --git a/src/UglyToad.PdfPig.Tests/Filters/Jbig2/MMRDecompressorTest.cs b/src/UglyToad.PdfPig.Tests/Filters/Jbig2/MMRDecompressorTest.cs new file mode 100644 index 000000000..9f239fbda --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Filters/Jbig2/MMRDecompressorTest.cs @@ -0,0 +1,29 @@ +namespace UglyToad.PdfPig.Tests.Filters.Jbig2 +{ + using Xunit; + using UglyToad.PdfPig.Filters.Jbig2; + using UglyToad.PdfPig.Tests.Images; + + public class MMRDecompressorTest + { + [Fact(Skip = "File removed")] + public void MmrDecodingTest() + { + var expected = new byte[] + { + 0, 0, 2, 34, 38, 102, 239, 255, 2, 102, 102, + 238, 238, 239, 255, 255, 0, 2, 102, 102, 127, + 255, 255, 255, 0, 0, 0, 4, 68, 102, 102, 127 + }; + + var iis = new ImageInputStream(ImageHelpers.LoadFileBytes("sampledata.jb2")); + // Sixth Segment (number 5) + var sis = new SubInputStream(iis, 252, 38); + var mmrd = new MMRDecompressor(16 * 4, 4, sis); + Jbig2Bitmap b = mmrd.Uncompress(); + byte[] actual = b.GetByteArray(); + + Assert.Equal(expected, actual); + } + } +} diff --git a/src/UglyToad.PdfPig.Tests/Filters/Jbig2/PatternDictionaryTest.cs b/src/UglyToad.PdfPig.Tests/Filters/Jbig2/PatternDictionaryTest.cs new file mode 100644 index 000000000..7870dff1b --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Filters/Jbig2/PatternDictionaryTest.cs @@ -0,0 +1,25 @@ +namespace UglyToad.PdfPig.Tests.Filters.Jbig2 +{ + using Xunit; + using UglyToad.PdfPig.Filters.Jbig2; + using UglyToad.PdfPig.Tests.Images; + + public class PatternDictionaryTest + { + [Fact(Skip = "File removed")] + public void ParseHeaderTest() + { + var iis = new ImageInputStream(ImageHelpers.LoadFileBytes("sampledata.jb2")); + // Sixth Segment (number 5) + var sis = new SubInputStream(iis, 245, 45); + var pd = new PatternDictionary(); + pd.Init(null, sis); + + Assert.True(pd.IsMMREncoded); + Assert.Equal(0, pd.HdTemplate); + Assert.Equal(4, pd.HdpWidth); + Assert.Equal(4, pd.HdpHeight); + Assert.Equal(15, pd.GrayMax); + } + } +} diff --git a/src/UglyToad.PdfPig.Tests/Filters/Jbig2/RegionSegmentInformationTest.cs b/src/UglyToad.PdfPig.Tests/Filters/Jbig2/RegionSegmentInformationTest.cs new file mode 100644 index 000000000..8cbbd8243 --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Filters/Jbig2/RegionSegmentInformationTest.cs @@ -0,0 +1,24 @@ +namespace UglyToad.PdfPig.Tests.Filters.Jbig2 +{ + using UglyToad.PdfPig.Filters.Jbig2; + using UglyToad.PdfPig.Tests.Images; + using Xunit; + + public class RegionSegmentInformationTest + { + [Fact(Skip = "File removed")] + public void ParseHeaderTest() + { + var iis = new ImageInputStream(ImageHelpers.LoadFileBytes("sampledata.jb2")); + var sis = new SubInputStream(iis, 130, 49); + var rsi = new RegionSegmentInformation(sis); + rsi.ParseHeader(); + + Assert.Equal(37, rsi.BitmapWidth); + Assert.Equal(8, rsi.BitmapHeight); + Assert.Equal(4, rsi.X); + Assert.Equal(1, rsi.Y); + Assert.Equal(CombinationOperator.OR, rsi.CombinationOperator); + } + } +} diff --git a/src/UglyToad.PdfPig.Tests/Filters/Jbig2DecodeFilterTests.cs b/src/UglyToad.PdfPig.Tests/Filters/Jbig2DecodeFilterTests.cs new file mode 100644 index 000000000..acd3c8673 --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Filters/Jbig2DecodeFilterTests.cs @@ -0,0 +1,50 @@ +using System.Collections.Generic; +using UglyToad.PdfPig.Filters; +using UglyToad.PdfPig.Tests.Images; +using UglyToad.PdfPig.Tokens; +using Xunit; + +namespace UglyToad.PdfPig.Tests.Filters +{ + public class Jbig2DecodeFilterTests + { + [Fact(Skip = "File removed")] + public void CanDecodeJbig2CompressedImageData_WithoutGlobalSegments() + { + var encodedImageBytes = ImageHelpers.LoadFileBytes("sampledata_page1.jb2"); + + var filter = new Jbig2DecodeFilter(); + var dictionary = new Dictionary() + { + { NameToken.Filter, NameToken.Jbig2Decode } + }; + + var expectedBytes = ImageHelpers.LoadFileBytes("sampledata_page1.jb2-decoded.bin"); + var decodedBytes = filter.Decode(encodedImageBytes, new DictionaryToken(dictionary), 0); + Assert.Equal(expectedBytes, decodedBytes); + } + + [Fact] + public void CanDecodeJbig2CompressedImageData_WithGlobalSegments() + { + var encodedGlobalsBytes = ImageHelpers.LoadFileBytes("globals.jb2"); + var encodedImageBytes = ImageHelpers.LoadFileBytes("img-refs-globals.jb2"); + + var filter = new Jbig2DecodeFilter(); + var dictionary = new Dictionary + { + { NameToken.Filter, NameToken.Jbig2Decode }, + { NameToken.DecodeParms, new DictionaryToken(new Dictionary + { + { NameToken.Jbig2Globals, new StreamToken(new DictionaryToken(new Dictionary()), encodedGlobalsBytes) } + }) + }, + { NameToken.ImageMask, BooleanToken.True } + }; + + var expectedBytes = ImageHelpers.LoadFileBytes("img-refs-globals-decoded.bin", isCompressed: true); + var decodedBytes = filter.Decode(encodedImageBytes, new DictionaryToken(dictionary), 0); + Assert.Equal(expectedBytes, decodedBytes); + } + } +} diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/001.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/001.jb2 new file mode 100644 index 000000000..aad904f85 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/001.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/002.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/002.jb2 new file mode 100644 index 000000000..0b703aa60 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/002.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/003.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/003.jb2 new file mode 100644 index 000000000..f7c91a0f7 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/003.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/004.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/004.jb2 new file mode 100644 index 000000000..00704736a Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/004.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/005.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/005.jb2 new file mode 100644 index 000000000..dff905909 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/005.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/006.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/006.jb2 new file mode 100644 index 000000000..b13273561 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/006.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/007.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/007.jb2 new file mode 100644 index 000000000..e1abdd38d Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/007.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/042_1.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/042_1.jb2 new file mode 100644 index 000000000..52bb1d393 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/042_1.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/042_10.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/042_10.jb2 new file mode 100644 index 000000000..cf6a96c5f Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/042_10.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/042_11.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/042_11.jb2 new file mode 100644 index 000000000..b33a729a5 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/042_11.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/042_12.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/042_12.jb2 new file mode 100644 index 000000000..cee8bd1ab Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/042_12.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/042_13.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/042_13.jb2 new file mode 100644 index 000000000..f9e1b6f69 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/042_13.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/042_14.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/042_14.jb2 new file mode 100644 index 000000000..30ae6581a Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/042_14.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/042_15.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/042_15.jb2 new file mode 100644 index 000000000..13f744ff6 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/042_15.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/042_16.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/042_16.jb2 new file mode 100644 index 000000000..a2acc787d Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/042_16.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/042_17.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/042_17.jb2 new file mode 100644 index 000000000..59fda75f0 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/042_17.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/042_18.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/042_18.jb2 new file mode 100644 index 000000000..4eb018405 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/042_18.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/042_19.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/042_19.jb2 new file mode 100644 index 000000000..34f376bc9 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/042_19.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/042_2.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/042_2.jb2 new file mode 100644 index 000000000..fabf17778 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/042_2.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/042_20.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/042_20.jb2 new file mode 100644 index 000000000..9c7f76409 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/042_20.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/042_21.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/042_21.jb2 new file mode 100644 index 000000000..e5f933c97 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/042_21.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/042_22.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/042_22.jb2 new file mode 100644 index 000000000..54c6f047f Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/042_22.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/042_23.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/042_23.jb2 new file mode 100644 index 000000000..238dda769 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/042_23.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/042_24.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/042_24.jb2 new file mode 100644 index 000000000..a4ccbe642 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/042_24.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/042_25.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/042_25.jb2 new file mode 100644 index 000000000..ac24fa3f2 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/042_25.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/042_3.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/042_3.jb2 new file mode 100644 index 000000000..d31d9ce1c Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/042_3.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/042_4.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/042_4.jb2 new file mode 100644 index 000000000..12eccee00 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/042_4.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/042_5.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/042_5.jb2 new file mode 100644 index 000000000..e2f5965b4 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/042_5.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/042_6.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/042_6.jb2 new file mode 100644 index 000000000..b49201170 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/042_6.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/042_7.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/042_7.jb2 new file mode 100644 index 000000000..5490c6024 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/042_7.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/042_8.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/042_8.jb2 new file mode 100644 index 000000000..edebd798a Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/042_8.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/042_9.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/042_9.jb2 new file mode 100644 index 000000000..fa4d74dbe Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/042_9.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/20123110001.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/20123110001.jb2 new file mode 100644 index 000000000..553bd630e Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/20123110001.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/20123110002.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/20123110002.jb2 new file mode 100644 index 000000000..c83efc516 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/20123110002.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/20123110003.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/20123110003.jb2 new file mode 100644 index 000000000..7d9f8bc46 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/20123110003.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/20123110004.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/20123110004.jb2 new file mode 100644 index 000000000..37dfe83c3 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/20123110004.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/20123110005.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/20123110005.jb2 new file mode 100644 index 000000000..f4ef987f8 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/20123110005.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/20123110006.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/20123110006.jb2 new file mode 100644 index 000000000..0354d8ec0 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/20123110006.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/20123110007.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/20123110007.jb2 new file mode 100644 index 000000000..31bb6e658 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/20123110007.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/20123110008.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/20123110008.jb2 new file mode 100644 index 000000000..6362f76a0 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/20123110008.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/20123110009.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/20123110009.jb2 new file mode 100644 index 000000000..a5eb42003 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/20123110009.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/20123110010.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/20123110010.jb2 new file mode 100644 index 000000000..a26c605c1 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/20123110010.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/PDFjs_8145_p55.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/PDFjs_8145_p55.jb2 new file mode 100755 index 000000000..14b4c9bd9 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/PDFjs_8145_p55.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/amb_1.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/amb_1.jb2 new file mode 100644 index 000000000..75e57fcbe Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/amb_1.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/amb_2.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/amb_2.jb2 new file mode 100644 index 000000000..3b5bc2150 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/amb_2.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/arith-decoded-testsequence.bin b/src/UglyToad.PdfPig.Tests/Images/Files/arith-decoded-testsequence.bin new file mode 100644 index 000000000..45c482749 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/arith-decoded-testsequence.bin differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/arith-encoded-testsequence.bin b/src/UglyToad.PdfPig.Tests/Images/Files/arith-encoded-testsequence.bin new file mode 100644 index 000000000..8ccf8b473 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/arith-encoded-testsequence.bin differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/globals.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/globals.jb2 new file mode 100644 index 000000000..8c5027f77 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/globals.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/img-refs-globals-decoded.bin b/src/UglyToad.PdfPig.Tests/Images/Files/img-refs-globals-decoded.bin new file mode 100644 index 000000000..57921e856 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/img-refs-globals-decoded.bin differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/img-refs-globals.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/img-refs-globals.jb2 new file mode 100644 index 000000000..753066405 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/img-refs-globals.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/Files/unitized_page_ii.jb2 b/src/UglyToad.PdfPig.Tests/Images/Files/unitized_page_ii.jb2 new file mode 100755 index 000000000..74ff41ca3 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Images/Files/unitized_page_ii.jb2 differ diff --git a/src/UglyToad.PdfPig.Tests/Images/PngFromPdfImageFactoryTests.cs b/src/UglyToad.PdfPig.Tests/Images/PngFromPdfImageFactoryTests.cs index 927c0fd56..c2e8bc0b7 100644 --- a/src/UglyToad.PdfPig.Tests/Images/PngFromPdfImageFactoryTests.cs +++ b/src/UglyToad.PdfPig.Tests/Images/PngFromPdfImageFactoryTests.cs @@ -258,6 +258,23 @@ public void CanGeneratePngFromCalGrayImageData() Assert.True(ImageHelpers.ImagesAreEqual(LoadImage("calgray.png"), bytes)); } + [Fact(Skip = "File removed")] + public void CanGeneratePngFromJbig2DecodedImageData() + { + var decodedBytes = ImageHelpers.LoadFileBytes("sampledata_page1.jb2-decoded.bin"); + var image = new TestPdfImage + { + ColorSpaceDetails = DeviceGrayColorSpaceDetails.Instance, + DecodedBytes = decodedBytes, + WidthInSamples = 64, + HeightInSamples = 56, + BitsPerComponent = 1 + }; + + Assert.True(PngFromPdfImageFactory.TryGenerate(image, out var bytes)); + Assert.True(ImageHelpers.ImagesAreEqual(LoadImage("sampledata_page1.jb2.png"), bytes)); + } + private static byte[] LoadImage(string name) { return ImageHelpers.LoadFileBytes(name); diff --git a/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj b/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj index 795e75a0a..62bdfbe4e 100644 --- a/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj +++ b/src/UglyToad.PdfPig.Tests/UglyToad.PdfPig.Tests.csproj @@ -10,12 +10,16 @@ ..\pdfpig.snk + + 4 + + @@ -133,4 +137,10 @@ PreserveNewest + + + + + + diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/AbstractImageInputStream.cs b/src/UglyToad.PdfPig/Filters/Jbig2/AbstractImageInputStream.cs new file mode 100644 index 000000000..2385e6f32 --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/AbstractImageInputStream.cs @@ -0,0 +1,166 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + using System; + using System.Collections.Generic; + using System.IO; + using System.Linq; + + internal abstract class AbstractImageInputStream : IImageInputStream + { + private readonly Stack<(long streamPos, int bitOffset)> markedPositions = new Stack<(long, int)>(); + + private int bitOffset; + + /// + public abstract long Length { get; } + + /// + public abstract long Position { get; } + + /// + public abstract void Seek(long pos); + + /// + public abstract int Read(); + + /// + public abstract int Read(byte[] b, int off, int len); + + /// + public int Read(byte[] b) + { + return Read(b, 0, b.Length); + } + + /// + public int ReadBit() + { + var savedBitOffset = bitOffset; + var b = ReadByte(); + SetBitOffset(savedBitOffset); + + var bit = (b & (1 << 7 - bitOffset)) != 0; + + bitOffset = (bitOffset + 1) % 8; + + // Rewind if we're still processing the byte + if (bitOffset > 0) + { + Seek(Position - 1); + } + + return (byte)(bit ? 1 : 0); + } + + /// + public long ReadBits(int numBits) + { + if (numBits > 32) + { + throw new ArgumentOutOfRangeException(nameof(numBits)); + } + + long accum = 0L; + for (int i = 0; i < numBits; i++) + { + accum <<= 1; // Shift left one bit to make room + var bit = (long)ReadBit(); + accum |= bit; + } + + return accum; + } + + /// + public byte ReadByte() + { + var value = Read(); + if (value == -1) + { + throw new EndOfStreamException(); + } + + return (byte)value; + } + + /// + public uint ReadUnsignedInt() + { + var buffer = new byte[4]; + Read(buffer); + + return BitConverter.ToUInt32(buffer.Reverse().ToArray(), 0); + } + + /// + public void Mark() + { + markedPositions.Push((Position, bitOffset)); + } + + /// + public void Reset() + { + if (markedPositions.Count > 0) + { + var position = markedPositions.Pop(); + Seek(position.streamPos); + bitOffset = position.bitOffset; + } + } + + /// + public long SkipBytes(int n) + { + var desiredPosition = Position + n; + if (desiredPosition > Length) + { + Seek(Length); + return desiredPosition - Length; + } + else + { + Seek(desiredPosition); + return n; + } + } + + /// + public void SkipBits() + { + if (bitOffset != 0) + { + bitOffset = 0; + Seek(Position + 1); + } + } + + /// + public virtual void Dispose() + { + } + + /// + /// Sets the bit offset to an integer between 0 and 7, inclusive. The byte offset + /// within the stream, as returned by getStreamPosition, is left unchanged. + /// A value of 0 indicates the most-significant bit, and a value of 7 indicates + /// the least significant bit, of the byte being read. + /// + /// the desired offset, as an int between 0 and 7, inclusive. + /// thrown if bitOffset is not between 0 and 7, inclusive. + protected void SetBitOffset(int bitOffset) + { + if (bitOffset < 0 || bitOffset > 7) + { + throw new ArgumentOutOfRangeException(nameof(bitOffset), "must be betwwen 0 and 7!"); + } + + this.bitOffset = bitOffset; + } + + protected bool IsAtEnd() + { + return Position == Length; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/ArithmeticDecoder.cs b/src/UglyToad.PdfPig/Filters/Jbig2/ArithmeticDecoder.cs new file mode 100644 index 000000000..35708429a --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/ArithmeticDecoder.cs @@ -0,0 +1,186 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + /// + /// This class represents the arithmetic decoder, described in ISO/IEC 14492:2001 in E.3 + /// + internal sealed class ArithmeticDecoder + { + private static readonly int[][] QE = new[]{ + new []{ 0x5601, 01, 01, 1 }, new []{ 0x3401, 02, 06, 0 }, new []{ 0x1801, 03, 09, 0 }, + new []{ 0x0AC1, 04, 12, 0 }, new []{ 0x0521, 05, 29, 0 }, new []{ 0x0221, 38, 33, 0 }, + new []{ 0x5601, 07, 06, 1 }, new []{ 0x5401, 08, 14, 0 }, new []{ 0x4801, 09, 14, 0 }, + new []{ 0x3801, 10, 14, 0 }, new []{ 0x3001, 11, 17, 0 }, new []{ 0x2401, 12, 18, 0 }, + new []{ 0x1C01, 13, 20, 0 }, new []{ 0x1601, 29, 21, 0 }, new []{ 0x5601, 15, 14, 1 }, + new []{ 0x5401, 16, 14, 0 }, new []{ 0x5101, 17, 15, 0 }, new []{ 0x4801, 18, 16, 0 }, + new []{ 0x3801, 19, 17, 0 }, new []{ 0x3401, 20, 18, 0 }, new []{ 0x3001, 21, 19, 0 }, + new []{ 0x2801, 22, 19, 0 }, new []{ 0x2401, 23, 20, 0 }, new []{ 0x2201, 24, 21, 0 }, + new []{ 0x1C01, 25, 22, 0 }, new []{ 0x1801, 26, 23, 0 }, new []{ 0x1601, 27, 24, 0 }, + new []{ 0x1401, 28, 25, 0 }, new []{ 0x1201, 29, 26, 0 }, new []{ 0x1101, 30, 27, 0 }, + new []{ 0x0AC1, 31, 28, 0 }, new []{ 0x09C1, 32, 29, 0 }, new []{ 0x08A1, 33, 30, 0 }, + new []{ 0x0521, 34, 31, 0 }, new []{ 0x0441, 35, 32, 0 }, new []{ 0x02A1, 36, 33, 0 }, + new []{ 0x0221, 37, 34, 0 }, new []{ 0x0141, 38, 35, 0 }, new []{ 0x0111, 39, 36, 0 }, + new []{ 0x0085, 40, 37, 0 }, new []{ 0x0049, 41, 38, 0 }, new []{ 0x0025, 42, 39, 0 }, + new []{ 0x0015, 43, 40, 0 }, new []{ 0x0009, 44, 41, 0 }, new []{ 0x0005, 45, 42, 0 }, + new []{ 0x0001, 45, 43, 0 }, new []{ 0x5601, 46, 46, 0 } }; + + private readonly IImageInputStream iis; + + private int a; + private int b; + private long c; + + private int ct; + private long streamPos0; + + public int A => a; + + public long C => c; + + public ArithmeticDecoder(IImageInputStream iis) + { + this.iis = iis; + Init(); + } + + private void Init() + { + streamPos0 = iis.Position; + b = iis.Read(); + + c = b << 16; + + ByteIn(); + + c <<= 7; + ct -= 7; + a = 0x8000; + } + + public int Decode(CX cx) + { + int d; + int qeValue = QE[cx.Cx][0]; + int icx = cx.Cx; + + a -= qeValue; + + if ((c >> 16) < qeValue) + { + d = LpsExchange(cx, icx, qeValue); + Renormalize(); + } + else + { + c -= (qeValue << 16); + if ((a & 0x8000) == 0) + { + d = MpsExchange(cx, icx); + Renormalize(); + } + else + { + return cx.Mps; + } + } + + return d; + } + + private void ByteIn() + { + if (iis.Position > streamPos0) + { + iis.Seek(iis.Position - 1); + } + + b = iis.Read(); + + if (b == 0xFF) + { + int b1 = iis.Read(); + if (b1 > 0x8f) + { + c += 0xff00; + ct = 8; + iis.Seek(iis.Position - 2); + } + else + { + c += b1 << 9; + ct = 7; + } + } + else + { + b = iis.Read(); + c += b << 8; + ct = 8; + } + + c &= 0xffffffffL; + } + + private void Renormalize() + { + do + { + if (ct == 0) + { + ByteIn(); + } + + a <<= 1; + c <<= 1; + ct--; + + } while ((a & 0x8000) == 0); + + c &= 0xffffffffL; + } + + private int MpsExchange(CX cx, int icx) + { + int mps = cx.Mps; + + if (a < QE[icx][0]) + { + if (QE[icx][3] == 1) + { + cx.ToggleMps(); + } + + cx.Cx = QE[icx][2]; + return 1 - mps; + } + else + { + cx.Cx = QE[icx][1]; + return mps; + } + } + + private int LpsExchange(CX cx, int icx, int qeValue) + { + int mps = cx.Mps; + + if (a < qeValue) + { + cx.Cx = QE[icx][1]; + a = qeValue; + + return mps; + } + else + { + if (QE[icx][3] == 1) + { + cx.ToggleMps(); + } + + cx.Cx = QE[icx][2]; + a = qeValue; + return 1 - mps; + } + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/ArithmeticIntegerDecoder.cs b/src/UglyToad.PdfPig/Filters/Jbig2/ArithmeticIntegerDecoder.cs new file mode 100644 index 000000000..f142c4a33 --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/ArithmeticIntegerDecoder.cs @@ -0,0 +1,160 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + /// + /// This class represents the arithmetic integer decoder, described in ISO/IEC 14492:2001 (Annex A). + /// + internal sealed class ArithmeticIntegerDecoder + { + private readonly ArithmeticDecoder decoder; + + private int prev; + + public ArithmeticIntegerDecoder(ArithmeticDecoder decoder) + { + this.decoder = decoder; + } + + /// + /// Arithmetic Integer Decoding Procedure, Annex A.2. + /// + /// cxIAx to be decoded + /// Decoded value. + public long Decode(CX cxIAx) + { + int v = 0; + int d, s; + + int bitsToRead; + int offset; + + if (cxIAx == null) + { + cxIAx = new CX(512, 1); + } + + prev = 1; + + cxIAx.Index = prev; + s = decoder.Decode(cxIAx); + SetPrev(s); + + cxIAx.Index = prev; + d = decoder.Decode(cxIAx); + SetPrev(d); + + if (d == 1) + { + cxIAx.Index = prev; + d = decoder.Decode(cxIAx); + SetPrev(d); + + if (d == 1) + { + cxIAx.Index = prev; + d = decoder.Decode(cxIAx); + SetPrev(d); + + if (d == 1) + { + cxIAx.Index = prev; + d = decoder.Decode(cxIAx); + SetPrev(d); + + if (d == 1) + { + cxIAx.Index = prev; + d = decoder.Decode(cxIAx); + SetPrev(d); + + if (d == 1) + { + bitsToRead = 32; + offset = 4436; + } + else + { + bitsToRead = 12; + offset = 340; + } + } + else + { + bitsToRead = 8; + offset = 84; + } + } + else + { + bitsToRead = 6; + offset = 20; + } + } + else + { + bitsToRead = 4; + offset = 4; + } + } + else + { + bitsToRead = 2; + offset = 0; + } + + for (int i = 0; i < bitsToRead; i++) + { + cxIAx.Index = prev; + d = decoder.Decode(cxIAx); + SetPrev(d); + v = (v << 1) | d; + } + + v += offset; + + if (s == 0) + { + return v; + } + else if (s == 1 && v > 0) + { + return -v; + } + + return long.MaxValue; + } + + /// + /// The IAID decoding procedure, Annex A.3. + /// + /// The contexts and statistics for decoding procedure. + /// Symbol code length + /// The decoded value + public int DecodeIAID(CX cxIAID, long symCodeLen) + { + // A.3 1) + prev = 1; + + // A.3 2) + for (int i = 0; i < symCodeLen; i++) + { + cxIAID.Index = prev; + prev = (prev << 1) | decoder.Decode(cxIAID); + } + + // A.3 3) & 4) + return (prev - (1 << (int)symCodeLen)); + } + + private void SetPrev(int bit) + { + if (prev < 256) + { + prev = ((prev << 1) | bit) & 0x1ff; + } + else + { + prev = ((((prev << 1) | bit) & 511) | 256) & 0x1ff; + } + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/CX.cs b/src/UglyToad.PdfPig/Filters/Jbig2/CX.cs new file mode 100644 index 000000000..b8557a777 --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/CX.cs @@ -0,0 +1,41 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + /// + /// CX represents the context used by arithmetic decoding and arithmetic integer decoding. It selects the probability + /// estimate and statistics used during decoding procedure. + /// + internal sealed class CX + { + private readonly byte[] cx; + private readonly byte[] mps; + + public int Index { get; set; } + + public int Cx { get => cx[Index] & 0x7f; set => cx[Index] = (byte)(value & 0x7f); } + + /// + /// Returns the decision. Possible values are 0 or 1. + /// + public byte Mps => mps[Index]; + + /// + /// Creates a new instance + /// + /// Number of context values + /// Start index + public CX(int size, int index) + { + Index = index; + cx = new byte[size]; + mps = new byte[size]; + } + + /// + /// Flips the bit in actual "more predictable symbol" array element. + /// + public void ToggleMps() + { + mps[Index] ^= 1; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/CombinationOperator.cs b/src/UglyToad.PdfPig/Filters/Jbig2/CombinationOperator.cs new file mode 100644 index 000000000..4f7005d89 --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/CombinationOperator.cs @@ -0,0 +1,34 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + /// + /// This enumeration keeps the available logical operator defined in the JBIG2 ISO standard. + /// + internal enum CombinationOperator + { + OR, AND, XOR, XNOR, REPLACE + } + + internal static class CombinationOperators + { + public static CombinationOperator TranslateOperatorCodeToEnum(short combinationOperatorCode) + { + switch (combinationOperatorCode) + { + case 0: + return CombinationOperator.OR; + + case 1: + return CombinationOperator.AND; + + case 2: + return CombinationOperator.XOR; + + case 3: + return CombinationOperator.XNOR; + + default: + return CombinationOperator.REPLACE; + } + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/EncodedTable.cs b/src/UglyToad.PdfPig/Filters/Jbig2/EncodedTable.cs new file mode 100644 index 000000000..4de73c46a --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/EncodedTable.cs @@ -0,0 +1,65 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + using System.Collections.Generic; + + /// + /// This class represents a encoded huffman table. + /// + internal sealed class EncodedTable : HuffmanTable + { + private readonly Table table; + + public EncodedTable(Table table) + { + this.table = table; + ParseTable(); + } + + public void ParseTable() + { + var sis = table.getSubInputStream(); + + var codeTable = new List(); + + int prefLen, rangeLen, rangeLow; + int curRangeLow = table.HtLow; + + // Annex B.2 5) - decode table lines + while (curRangeLow < table.HtHigh) + { + prefLen = (int)sis.ReadBits(table.HtPS); + rangeLen = (int)sis.ReadBits(table.HtRS); + rangeLow = curRangeLow; + + codeTable.Add(new Code(prefLen, rangeLen, rangeLow, false)); + + curRangeLow += 1 << rangeLen; + } + + // Annex B.2 6) + prefLen = (int)sis.ReadBits(table.HtPS); + + // Annex B.2 7) - lower range table line + rangeLen = 32; + rangeLow = table.HtLow - 1; + codeTable.Add(new Code(prefLen, rangeLen, rangeLow, true)); + + // Annex B.2 8) + prefLen = (int)sis.ReadBits(table.HtPS); + + // Annex B.2 9) - upper range table line + rangeLen = 32; + rangeLow = table.HtHigh; + codeTable.Add(new Code(prefLen, rangeLen, rangeLow, false)); + + // Annex B.2 10) - out-of-band table line + if (table.HtOutOfBand == 1) + { + prefLen = (int)sis.ReadBits(table.HtPS); + codeTable.Add(new Code(prefLen, -1, -1, false)); + } + + InitTree(codeTable); + } + } +} \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/EndOfStripe.cs b/src/UglyToad.PdfPig/Filters/Jbig2/EndOfStripe.cs new file mode 100644 index 000000000..f13df7af7 --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/EndOfStripe.cs @@ -0,0 +1,27 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + /// + /// This segment flags an end of stripe (see JBIG2 ISO standard, 7.4.9). + /// + internal sealed class EndOfStripe : ISegmentData + { + private SubInputStream subInputStream; + private int lineNumber; + + private void ParseHeader() + { + lineNumber = (int)(subInputStream.ReadBits(32) & 0xffffffff); + } + + public void Init(SegmentHeader header, SubInputStream sis) + { + subInputStream = sis; + ParseHeader(); + } + + public int GetLineNumber() + { + return lineNumber; + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/FixedSizeTable.cs b/src/UglyToad.PdfPig/Filters/Jbig2/FixedSizeTable.cs new file mode 100644 index 000000000..1eb73b97d --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/FixedSizeTable.cs @@ -0,0 +1,15 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + using System.Collections.Generic; + + /// + /// This class represents a fixed size huffman table. + /// + internal sealed class FixedSizeTable : HuffmanTable + { + public FixedSizeTable(List runCodeTable) + { + InitTree(runCodeTable); + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/GenericRefinementRegion.cs b/src/UglyToad.PdfPig/Filters/Jbig2/GenericRefinementRegion.cs new file mode 100644 index 000000000..053e5d78d --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/GenericRefinementRegion.cs @@ -0,0 +1,882 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + using System; + + /// + /// This class represents a generic refinement region and implements the procedure described in JBIG2 ISO standard, 6.3 + /// and 7.4.7. + /// + internal sealed class GenericRefinementRegion : IRegion + { + public abstract class Template + { + internal abstract short Form(short c1, short c2, short c3, short c4, short c5); + + internal abstract void SetIndex(CX cx); + } + + private class Template0 : Template + { + internal override sealed short Form(short c1, short c2, short c3, short c4, short c5) + { + return (short)((c1 << 10) | (c2 << 7) | (c3 << 4) | (c4 << 1) | (int)c5); + } + + internal override sealed void SetIndex(CX cx) + { + // Figure 14, page 22 + cx.Index = 0x100; + } + } + + private class Template1 : Template + { + internal override sealed short Form(short c1, short c2, short c3, short c4, short c5) + { + return (short)(((c1 & 0x02) << 8) | (c2 << 6) | ((c3 & 0x03) << 4) | (c4 << 1) | (int)c5); + } + + internal override sealed void SetIndex(CX cx) + { + // Figure 15, page 22 + cx.Index = 0x080; + } + } + + private static readonly Template T0 = new Template0(); + private static readonly Template T1 = new Template1(); + + private SubInputStream subInputStream; + + private SegmentHeader segmentHeader; + + // Region segment information flags, 7.4.1 + public RegionSegmentInformation RegionInfo { get; private set; } + + // Generic refinement region segment flags, 7.4.7.2 + private bool isTPGROn; + private short templateID; + + private Template template; + // Generic refinement region segment AT flags, 7.4.7.3 + private short[] grAtX; + private short[] grAtY; + + // Decoded data as pixel values (use row stride/width to wrap line) + private Jbig2Bitmap regionBitmap; + + // Variables for decoding + private Jbig2Bitmap referenceBitmap; + private int referenceDX; + private int referenceDY; + + private ArithmeticDecoder arithDecoder; + private CX cx; + + // If true, AT pixels are not on their nominal location and have to be overridden. + private bool @override; + private bool[] grAtOverride; + + public GenericRefinementRegion() + { + } + + public GenericRefinementRegion(SubInputStream subInputStream) + { + this.subInputStream = subInputStream; + this.RegionInfo = new RegionSegmentInformation(subInputStream); + } + + public GenericRefinementRegion(SubInputStream subInputStream, + SegmentHeader segmentHeader) + { + this.subInputStream = subInputStream; + this.segmentHeader = segmentHeader; + this.RegionInfo = new RegionSegmentInformation(subInputStream); + } + + /// + /// Parses the flags described in JBIG2 ISO standard: + /// 7.4.7.2 Generic refinement region segment flags. + /// 7.4.7.3 Generic refinement refion segment AT flags. + /// + private void ParseHeader() + { + RegionInfo.ParseHeader(); + + // Bit 2-7 + subInputStream.ReadBits(6); // Dirty read... + + // Bit 1 + if (subInputStream.ReadBit() == 1) + { + isTPGROn = true; + } + + // Bit 0 + templateID = (short)subInputStream.ReadBit(); + + switch (templateID) + { + case 0: + template = T0; + ReadAtPixels(); + break; + + case 1: + template = T1; + break; + } + } + + private void ReadAtPixels() + { + grAtX = new short[2]; + grAtY = new short[2]; + + // Byte 0 + grAtX[0] = (sbyte)subInputStream.ReadByte(); + // Byte 1 + grAtY[0] = (sbyte)subInputStream.ReadByte(); + // Byte 2 + grAtX[1] = (sbyte)subInputStream.ReadByte(); + // Byte 3 + grAtY[1] = (sbyte)subInputStream.ReadByte(); + } + + /// + /// Decode using a template and arithmetic coding, as described in 6.3.5.6 + /// + /// The decoded . + /// if an underlying IO operation fails + /// if a segment header value is invalid + /// if the maximum value limit of an integer is exceeded + public Jbig2Bitmap GetRegionBitmap() + { + if (null == regionBitmap) + { + // 6.3.5.6 - 1) + int isLineTypicalPredicted = 0; + + if (referenceBitmap == null) + { + // Get the reference bitmap, which is the base of refinement process + referenceBitmap = GetGrReference(); + } + + if (arithDecoder == null) + { + arithDecoder = new ArithmeticDecoder(subInputStream); + } + + if (cx == null) + { + cx = new CX(8192, 1); + } + + // 6.3.5.6 - 2) + regionBitmap = new Jbig2Bitmap(RegionInfo.BitmapWidth, RegionInfo.BitmapHeight); + + if (templateID == 0) + { + // AT pixel may only occur in template 0 + UpdateOverride(); + } + + int paddedWidth = (regionBitmap.Width + 7) & -8; + int deltaRefStride = isTPGROn ? -referenceDY * referenceBitmap.RowStride : 0; + int yOffset = deltaRefStride + 1; + + // 6.3.5.6 - 3) + for (int y = 0; y < regionBitmap.Height; y++) + { + // 6.3.5.6 - 3 b) + if (isTPGROn) + { + isLineTypicalPredicted ^= DecodeSLTP(); + } + + if (isLineTypicalPredicted == 0) + { + // 6.3.5.6 - 3 c) + DecodeOptimized(y, regionBitmap.Width, regionBitmap.RowStride, + referenceBitmap.RowStride, paddedWidth, deltaRefStride, yOffset); + } + else + { + // 6.3.5.6 - 3 d) + DecodeTypicalPredictedLine(y, regionBitmap.Width, + regionBitmap.RowStride, referenceBitmap.RowStride, + paddedWidth, deltaRefStride); + } + } + } + // 6.3.5.6 - 4) + return regionBitmap; + } + + private int DecodeSLTP() + { + template.SetIndex(cx); + return arithDecoder.Decode(cx); + } + + private Jbig2Bitmap GetGrReference() + { + SegmentHeader[] segments = segmentHeader.GetRtSegments(); + IRegion region = (IRegion)segments[0].GetSegmentData(); + + return region.GetRegionBitmap(); + } + + private void DecodeOptimized(int lineNumber, int width, int rowStride, + int refRowStride, int paddedWidth, int deltaRefStride, + int lineOffset) + { + + // Offset of the reference bitmap with respect to the bitmap being decoded + // For example: if referenceDY = -1, y is 1 HIGHER that currY + int currentLine = lineNumber - referenceDY; + int referenceByteIndex = referenceBitmap.GetByteIndex(Math.Max(0, -referenceDX), + currentLine); + + int byteIndex = regionBitmap.GetByteIndex(Math.Max(0, referenceDX), lineNumber); + + switch (templateID) + { + case 0: + DecodeTemplate(lineNumber, width, rowStride, refRowStride, paddedWidth, deltaRefStride, + lineOffset, byteIndex, currentLine, referenceByteIndex, T0); + break; + case 1: + DecodeTemplate(lineNumber, width, rowStride, refRowStride, paddedWidth, deltaRefStride, + lineOffset, byteIndex, currentLine, referenceByteIndex, T1); + break; + } + + } + + private void DecodeTemplate(int lineNumber, int width, int rowStride, + int refRowStride, int paddedWidth, int deltaRefStride, + int lineOffset, int byteIndex, int currentLine, int refByteIndex, + Template templateFormation) + { + short c1, c2, c3, c4, c5; + + int w1, w2, w3, w4; + w1 = w2 = w3 = w4 = 0; + + if (currentLine >= 1 && (currentLine - 1) < referenceBitmap.Height) + { + w1 = referenceBitmap.GetByteAsInteger(refByteIndex - refRowStride); + } + + if (currentLine >= 0 && currentLine < referenceBitmap.Height) + { + w2 = referenceBitmap.GetByteAsInteger(refByteIndex); + } + + if (currentLine >= -1 && currentLine + 1 < referenceBitmap.Height) + { + w3 = referenceBitmap.GetByteAsInteger(refByteIndex + refRowStride); + } + + refByteIndex++; + + if (lineNumber >= 1) + { + w4 = regionBitmap.GetByteAsInteger(byteIndex - rowStride); + } + + byteIndex++; + + int modReferenceDX = referenceDX % 8; + int shiftOffset = 6 + modReferenceDX; + int modRefByteIdx = refByteIndex % refRowStride; + + if (shiftOffset >= 0) + { + c1 = (short)((shiftOffset >= 8 ? 0 : ((int)((uint)w1 >> shiftOffset))) & 0x07); + c2 = (short)((shiftOffset >= 8 ? 0 : ((int)((uint)w2 >> shiftOffset))) & 0x07); + c3 = (short)((shiftOffset >= 8 ? 0 : ((int)((uint)w3 >> shiftOffset))) & 0x07); + if (shiftOffset == 6 && modRefByteIdx > 1) + { + if (currentLine >= 1 && (currentLine - 1) < referenceBitmap.Height) + { + c1 = (short)((int)c1 | (referenceBitmap.GetByteAsInteger(refByteIndex - refRowStride - 2) << 2) & 0x04); + } + if (currentLine >= 0 && currentLine < referenceBitmap.Height) + { + c2 = (short)((int)c2 | (referenceBitmap.GetByteAsInteger(refByteIndex - 2) << 2) & 0x04); + } + if (currentLine >= -1 && currentLine + 1 < referenceBitmap.Height) + { + c3 = (short)((int)c3 | (referenceBitmap.GetByteAsInteger(refByteIndex + refRowStride - 2) << 2) & 0x04); + } + } + if (shiftOffset == 0) + { + w1 = w2 = w3 = 0; + if (modRefByteIdx < refRowStride - 1) + { + if (currentLine >= 1 && (currentLine - 1) < referenceBitmap.Height) + { + w1 = referenceBitmap.GetByteAsInteger(refByteIndex - refRowStride); + } + + if (currentLine >= 0 && currentLine < referenceBitmap.Height) + { + w2 = referenceBitmap.GetByteAsInteger(refByteIndex); + } + + if (currentLine >= -1 && currentLine + 1 < referenceBitmap.Height) + { + w3 = referenceBitmap.GetByteAsInteger(refByteIndex + refRowStride); + } + } + refByteIndex++; + } + } + else + { + c1 = (short)((w1 << 1) & 0x07); + c2 = (short)((w2 << 1) & 0x07); + c3 = (short)((w3 << 1) & 0x07); + w1 = w2 = w3 = 0; + if (modRefByteIdx < refRowStride - 1) + { + if (currentLine >= 1 && (currentLine - 1) < referenceBitmap.Height) + { + w1 = referenceBitmap.GetByteAsInteger(refByteIndex - refRowStride); + } + + if (currentLine >= 0 && currentLine < referenceBitmap.Height) + { + w2 = referenceBitmap.GetByteAsInteger(refByteIndex); + } + + if (currentLine >= -1 && currentLine + 1 < referenceBitmap.Height) + { + w3 = referenceBitmap.GetByteAsInteger(refByteIndex + refRowStride); + } + + refByteIndex++; + } + c1 |= (short)((int)((uint)w1 >> 7) & 0x07); + c2 |= (short)((int)((uint)w2 >> 7) & 0x07); + c3 |= (short)((int)((uint)w3 >> 7) & 0x07); + } + + c4 = (short)(int)((uint)w4 >> 6); + c5 = 0; + + int modBitsToTrim = (2 - modReferenceDX) % 8; + w1 <<= modBitsToTrim; + w2 <<= modBitsToTrim; + w3 <<= modBitsToTrim; + + w4 <<= 2; + + for (int x = 0; x < width; x++) + { + int minorX = x & 0x07; + + short tval = templateFormation.Form(c1, c2, c3, c4, c5); + + if (@override) + { + cx.Index = OverrideAtTemplate0(tval, x, lineNumber, + regionBitmap.GetByte(regionBitmap.GetByteIndex(x, lineNumber)), minorX); + } + else + { + cx.Index = tval; + } + int bit = arithDecoder.Decode(cx); + regionBitmap.SetPixel(x, lineNumber, (byte)bit); + + c1 = (short)(((c1 << 1) | 0x01 & ((int)((uint)w1 >> 7))) & 0x07); + c2 = (short)(((c2 << 1) | 0x01 & ((int)((uint)w2 >> 7))) & 0x07); + c3 = (short)(((c3 << 1) | 0x01 & ((int)((uint)w3 >> 7))) & 0x07); + c4 = (short)(((c4 << 1) | 0x01 & ((int)((uint)w4 >> 7))) & 0x07); + c5 = (short)bit; + + if ((x - referenceDX) % 8 == 5) + { + if (((x - referenceDX) / 8) + 1 >= referenceBitmap.RowStride) + { + w1 = w2 = w3 = 0; + } + else + { + if (currentLine >= 1 && (currentLine - 1 < referenceBitmap.Height)) + { + w1 = referenceBitmap.GetByteAsInteger(refByteIndex - refRowStride); + } + else + { + w1 = 0; + } + if (currentLine >= 0 && currentLine < referenceBitmap.Height) + { + w2 = referenceBitmap.GetByteAsInteger(refByteIndex); + } + else + { + w2 = 0; + } + if (currentLine >= -1 && (currentLine + 1) < referenceBitmap.Height) + { + w3 = referenceBitmap.GetByteAsInteger(refByteIndex + refRowStride); + } + else + { + w3 = 0; + } + } + refByteIndex++; + } + else + { + w1 <<= 1; + w2 <<= 1; + w3 <<= 1; + } + + if (minorX == 5 && lineNumber >= 1) + { + if ((x >> 3) + 1 >= regionBitmap.RowStride) + { + w4 = 0; + } + else + { + w4 = regionBitmap.GetByteAsInteger(byteIndex - rowStride); + } + byteIndex++; + } + else + { + w4 <<= 1; + } + } + } + + private void UpdateOverride() + { + if (grAtX == null || grAtY == null) + { + return; + } + + if (grAtX.Length != grAtY.Length) + { + return; + } + + grAtOverride = new bool[grAtX.Length]; + + switch (templateID) + { + case 0: + if (grAtX[0] != -1 && grAtY[0] != -1) + { + grAtOverride[0] = true; + @override = true; + } + + if (grAtX[1] != -1 && grAtY[1] != -1) + { + grAtOverride[1] = true; + @override = true; + } + break; + case 1: + @override = false; + break; + } + } + + private void DecodeTypicalPredictedLine(int lineNumber, int width, + int rowStride, int refRowStride, int paddedWidth, + int deltaRefStride) + { + // Offset of the reference bitmap with respect to the bitmap being + // decoded + // For example: if grReferenceDY = -1, y is 1 HIGHER that currY + int currentLine = lineNumber - referenceDY; + int refByteIndex = referenceBitmap.GetByteIndex(0, currentLine); + + int byteIndex = regionBitmap.GetByteIndex(0, lineNumber); + + switch (templateID) + { + case 0: + DecodeTypicalPredictedLineTemplate0(lineNumber, width, rowStride, refRowStride, + paddedWidth, deltaRefStride, byteIndex, currentLine, refByteIndex); + break; + case 1: + DecodeTypicalPredictedLineTemplate1(lineNumber, width, rowStride, refRowStride, + paddedWidth, deltaRefStride, byteIndex, currentLine, refByteIndex); + break; + } + } + + private void DecodeTypicalPredictedLineTemplate0(int lineNumber, int width, + int rowStride, int refRowStride, int paddedWidth, + int deltaRefStride, int byteIndex, int currentLine, int refByteIndex) + { + int context; + int overriddenContext; + + int previousLine; + int previousReferenceLine; + int currentReferenceLine; + int nextReferenceLine; + + if (lineNumber > 0) + { + previousLine = regionBitmap.GetByteAsInteger(byteIndex - rowStride); + } + else + { + previousLine = 0; + } + + if (currentLine > 0 && currentLine <= referenceBitmap.Height) + { + previousReferenceLine = referenceBitmap + .GetByteAsInteger(refByteIndex - refRowStride + deltaRefStride) << 4; + } + else + { + previousReferenceLine = 0; + } + + if (currentLine >= 0 && currentLine < referenceBitmap.Height) + { + currentReferenceLine = referenceBitmap + .GetByteAsInteger(refByteIndex + deltaRefStride) << 1; + } + else + { + currentReferenceLine = 0; + } + + if (currentLine > -2 && currentLine < (referenceBitmap.Height - 1)) + { + nextReferenceLine = referenceBitmap + .GetByteAsInteger(refByteIndex + refRowStride + deltaRefStride); + } + else + { + nextReferenceLine = 0; + } + + context = ((previousLine >> 5) & 0x6) | ((nextReferenceLine >> 2) & 0x30) + | (currentReferenceLine & 0x180) | (previousReferenceLine & 0xc00); + + int nextByte; + for (int x = 0; x < paddedWidth; x = nextByte) + { + byte result = 0; + nextByte = x + 8; + int minorWidth = width - x > 8 ? 8 : width - x; + bool readNextByte = nextByte < width; + bool refReadNextByte = nextByte < referenceBitmap.Width; + + int yOffset = deltaRefStride + 1; + + if (lineNumber > 0) + { + previousLine = (previousLine << 8) | (readNextByte + ? regionBitmap.GetByteAsInteger(byteIndex - rowStride + 1) : 0); + } + + if (currentLine > 0 && currentLine <= referenceBitmap.Height) + { + previousReferenceLine = (previousReferenceLine << 8) + | (refReadNextByte ? referenceBitmap + .GetByteAsInteger(refByteIndex - refRowStride + yOffset) << 4 : 0); + } + + if (currentLine >= 0 && currentLine < referenceBitmap.Height) + { + currentReferenceLine = (currentReferenceLine << 8) | (refReadNextByte + ? referenceBitmap.GetByteAsInteger(refByteIndex + yOffset) << 1 : 0); + } + + if (currentLine > -2 && currentLine < (referenceBitmap.Height - 1)) + { + nextReferenceLine = (nextReferenceLine << 8) | (refReadNextByte + ? referenceBitmap.GetByteAsInteger(refByteIndex + refRowStride + yOffset) + : 0); + } + + for (int minorX = 0; minorX < minorWidth; minorX++) + { + bool isPixelTypicalPredicted = false; + int bit = 0; + + // i) + int bitmapValue = (context >> 4) & 0x1FF; + + if (bitmapValue == 0x1ff) + { + isPixelTypicalPredicted = true; + bit = 1; + } + else if (bitmapValue == 0x00) + { + isPixelTypicalPredicted = true; + bit = 0; + } + + if (!isPixelTypicalPredicted) + { + // iii) - is like 3 c) but for one pixel only + if (@override) + { + overriddenContext = OverrideAtTemplate0(context, x + minorX, lineNumber, + result, minorX); + cx.Index = overriddenContext; + } + else + { + cx.Index = context; + } + bit = arithDecoder.Decode(cx); + } + + int toShift = 7 - minorX; + result = (byte)(result | bit << toShift); + + context = ((context & 0xdb6) << 1) | bit | ((previousLine >> toShift + 5) & 0x002) + | ((nextReferenceLine >> toShift + 2) & 0x010) + | ((currentReferenceLine >> toShift) & 0x080) + | ((previousReferenceLine >> toShift) & 0x400); + } + regionBitmap.SetByte(byteIndex++, result); + refByteIndex++; + } + } + + private void DecodeTypicalPredictedLineTemplate1(int lineNumber, int width, + int rowStride, int refRowStride, int paddedWidth, + int deltaRefStride, int byteIndex, int currentLine, int refByteIndex) + { + int context; + int grReferenceValue; + + int previousLine; + int previousReferenceLine; + int currentReferenceLine; + int nextReferenceLine; + + if (lineNumber > 0) + { + previousLine = regionBitmap.GetByteAsInteger(byteIndex - rowStride); + } + else + { + previousLine = 0; + } + + if (currentLine > 0 && currentLine <= referenceBitmap.Height) + { + previousReferenceLine = referenceBitmap + .GetByteAsInteger(byteIndex - refRowStride + deltaRefStride) << 2; + } + else + { + previousReferenceLine = 0; + } + + if (currentLine >= 0 && currentLine < referenceBitmap.Height) + { + currentReferenceLine = referenceBitmap.GetByteAsInteger(byteIndex + deltaRefStride); + } + else + { + currentReferenceLine = 0; + } + + if (currentLine > -2 && currentLine < (referenceBitmap.Height - 1)) + { + nextReferenceLine = referenceBitmap + .GetByteAsInteger(byteIndex + refRowStride + deltaRefStride); + } + else + { + nextReferenceLine = 0; + } + + context = ((previousLine >> 5) & 0x6) | ((nextReferenceLine >> 2) & 0x30) + | (currentReferenceLine & 0xc0) | (previousReferenceLine & 0x200); + + grReferenceValue = ((nextReferenceLine >> 2) & 0x70) | (currentReferenceLine & 0xc0) + | (previousReferenceLine & 0x700); + + int nextByte; + for (int x = 0; x < paddedWidth; x = nextByte) + { + byte result = 0; + nextByte = x + 8; + int minorWidth = width - x > 8 ? 8 : width - x; + bool readNextByte = nextByte < width; + bool refReadNextByte = nextByte < referenceBitmap.Width; + + int yOffset = deltaRefStride + 1; + + if (lineNumber > 0) + { + previousLine = (previousLine << 8) | (readNextByte + ? regionBitmap.GetByteAsInteger(byteIndex - rowStride + 1) : 0); + } + + if (currentLine > 0 && currentLine <= referenceBitmap.Height) + { + previousReferenceLine = (previousReferenceLine << 8) + | (refReadNextByte ? referenceBitmap + .GetByteAsInteger(refByteIndex - refRowStride + yOffset) << 2 : 0); + } + + if (currentLine >= 0 && currentLine < referenceBitmap.Height) + { + currentReferenceLine = (currentReferenceLine << 8) | (refReadNextByte + ? referenceBitmap.GetByteAsInteger(refByteIndex + yOffset) : 0); + } + + if (currentLine > -2 && currentLine < (referenceBitmap.Height - 1)) + { + nextReferenceLine = (nextReferenceLine << 8) | (refReadNextByte + ? referenceBitmap.GetByteAsInteger(refByteIndex + refRowStride + yOffset) + : 0); + } + + for (int minorX = 0; minorX < minorWidth; minorX++) + { + int bit; + + // i) + int bitmapValue = (grReferenceValue >> 4) & 0x1ff; + + if (bitmapValue == 0x1ff) + { + bit = 1; + } + else if (bitmapValue == 0x00) + { + bit = 0; + } + else + { + cx.Index = context; + bit = arithDecoder.Decode(cx); + } + + int toShift = 7 - minorX; + result = (byte)(result | bit << toShift); + + context = ((context & 0x0d6) << 1) | bit | ((previousLine >> toShift + 5) & 0x002) + | ((nextReferenceLine >> toShift + 2) & 0x010) + | ((currentReferenceLine >> toShift) & 0x040) + | ((previousReferenceLine >> toShift) & 0x200); + + grReferenceValue = ((grReferenceValue & 0x0db) << 1) + | ((nextReferenceLine >> toShift + 2) & 0x010) + | ((currentReferenceLine >> toShift) & 0x080) + | ((previousReferenceLine >> toShift) & 0x400); + } + regionBitmap.SetByte(byteIndex++, result); + refByteIndex++; + } + } + + private int OverrideAtTemplate0(int context, int x, int y, int result, int minorX) + { + if (grAtOverride[0]) + { + context &= 0xfff7; + if (grAtY[0] == 0 && grAtX[0] >= -minorX) + { + context |= (result >> (7 - (minorX + grAtX[0])) & 0x1) << 3; + } + else + { + context |= GetPixel(regionBitmap, x + grAtX[0], y + grAtY[0]) << 3; + } + } + + if (grAtOverride[1]) + { + context &= 0xefff; + if (grAtY[1] == 0 && grAtX[1] >= -minorX) + { + context |= (result >> (7 - (minorX + grAtX[1])) & 0x1) << 12; + } + else + { + context |= GetPixel(referenceBitmap, x + grAtX[1] + referenceDX, + y + grAtY[1] + referenceDY) << 12; + } + } + return context; + } + + private static byte GetPixel(Jbig2Bitmap b, int x, int y) + { + if (x < 0 || x >= b.Width) + { + return 0; + } + if (y < 0 || y >= b.Height) + { + return 0; + } + + return b.GetPixel(x, y); + } + + public void Init(SegmentHeader header, SubInputStream sis) + { + segmentHeader = header; + subInputStream = sis; + RegionInfo = new RegionSegmentInformation(subInputStream); + ParseHeader(); + } + + internal void SetParameters(CX cx, ArithmeticDecoder arithmeticDecoder, + short grTemplate, int regionWidth, int regionHeight, + Jbig2Bitmap grReference, int grReferenceDX, int grReferenceDY, + bool isTPGRon, short[] grAtX, short[] grAtY) + { + + if (null != cx) + { + this.cx = cx; + } + + if (null != arithmeticDecoder) + { + arithDecoder = arithmeticDecoder; + } + + templateID = grTemplate; + + RegionInfo.BitmapWidth = regionWidth; + RegionInfo.BitmapHeight = regionHeight; + + referenceBitmap = grReference; + referenceDX = grReferenceDX; + referenceDY = grReferenceDY; + + isTPGROn = isTPGRon; + + this.grAtX = grAtX; + this.grAtY = grAtY; + + regionBitmap = null; + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/GenericRegion.cs b/src/UglyToad.PdfPig/Filters/Jbig2/GenericRegion.cs new file mode 100644 index 000000000..26dfe8659 --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/GenericRegion.cs @@ -0,0 +1,1107 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + /// + /// This class represents a generic region segment. + /// Parsing is done as described in 7.4.5. + /// Decoding procedure is done as described in 6.2.5.7 and 7.4.6.4. + /// + internal sealed class GenericRegion : IRegion + { + private SubInputStream subInputStream; + private long dataHeaderOffset = 0; + private long dataHeaderLength; + private long dataOffset; + private long dataLength; + + // Generic region segment flags, 7.4.6.2 + public bool UseExtTemplates { get; private set; } + public bool IsTPGDon { get; private set; } + public byte GbTemplate { get; private set; } + public bool IsMMREncoded { get; private set; } + + // Generic region segment AT flags, 7.4.6.3 + public short[] GbAtX { get; private set; } + public short[] GbAtY { get; private set; } + private bool[] gbAtOverride; + + // If true, AT pixels are not on their nominal location and have to be overridden + private bool @override; + + // Decoded data as pixel values (use row stride/width to wrap line) + private Jbig2Bitmap regionBitmap; + + private ArithmeticDecoder arithDecoder; + private CX cx; + + private MMRDecompressor mmrDecompressor; + + // Region segment information field, 7.4.1 + public RegionSegmentInformation RegionInfo { get; private set; } + + public GenericRegion() + { + } + + public GenericRegion(SubInputStream subInputStream) + { + this.subInputStream = subInputStream; + this.RegionInfo = new RegionSegmentInformation(subInputStream); + } + + private void ParseHeader() + { + RegionInfo.ParseHeader(); + + // Bit 5-7 + subInputStream.ReadBits(3); // Dirty read... + + // Bit 4 + if (subInputStream.ReadBit() == 1) + { + UseExtTemplates = true; + } + + // Bit 3 + if (subInputStream.ReadBit() == 1) + { + IsTPGDon = true; + } + + // Bit 1-2 + GbTemplate = (byte)(subInputStream.ReadBits(2) & 0xf); + + // Bit 0 + if (subInputStream.ReadBit() == 1) + { + IsMMREncoded = true; + } + + if (!IsMMREncoded) + { + int amountOfGbAt; + if (GbTemplate == 0) + { + if (UseExtTemplates) + { + amountOfGbAt = 12; + } + else + { + amountOfGbAt = 4; + } + } + else + { + amountOfGbAt = 1; + } + + ReadGbAtPixels(amountOfGbAt); + } + + ComputeSegmentDataStructure(); + } + + private void ReadGbAtPixels(int amountOfGbAt) + { + GbAtX = new short[amountOfGbAt]; + GbAtY = new short[amountOfGbAt]; + + for (int i = 0; i < amountOfGbAt; i++) + { + GbAtX[i] = (sbyte)subInputStream.ReadByte(); + GbAtY[i] = (sbyte)subInputStream.ReadByte(); + } + } + + private void ComputeSegmentDataStructure() + { + dataOffset = subInputStream.Position; + dataHeaderLength = dataOffset - dataHeaderOffset; + dataLength = subInputStream.Length - dataHeaderLength; + } + + /// + /// The procedure is described in 6.2.5.7, page 17. + /// + /// The decoded . + public Jbig2Bitmap GetRegionBitmap() + { + if (null == regionBitmap) + { + if (IsMMREncoded) + { + // MMR DECODER CALL + if (null == mmrDecompressor) + { + mmrDecompressor = new MMRDecompressor(RegionInfo.BitmapWidth, + RegionInfo.BitmapHeight, + new SubInputStream(subInputStream, dataOffset, dataLength)); + } + + // 6.2.6 + regionBitmap = mmrDecompressor.Uncompress(); + + } + else + { + // ARITHMETIC DECODER PROCEDURE for generic region segments + UpdateOverrideFlags(); + + // 6.2.5.7 - 1) + int ltp = 0; + + if (arithDecoder == null) + { + arithDecoder = new ArithmeticDecoder(subInputStream); + } + if (cx == null) + { + cx = new CX(65536, 1); + } + + // 6.2.5.7 - 2) + regionBitmap = new Jbig2Bitmap(RegionInfo.BitmapWidth, + RegionInfo.BitmapHeight); + + int paddedWidth = (regionBitmap.Width + 7) & -8; + + // 6.2.5.7 - 3 + for (int line = 0; line < regionBitmap.Height; line++) + { + // 6.2.5.7 - 3 b) + if (IsTPGDon) + { + ltp ^= DecodeSLTP(); + } + + // 6.2.5.7 - 3 c) + if (ltp == 1) + { + if (line > 0) + { + CopyLineAbove(line); + } + } + else + { + // 3 d) + // NOT USED ATM - If corresponding pixel of SKIP bitmap is 0, set + // current pixel to 0. Something like that: + // if (useSkip) { + // for (int i = 1; i < rowstride; i++) { + // if (skip[pixel] == 1) { + // gbReg[pixel] = 0; + // } + // pixel++; + // } + // } else { + DecodeLine(line, regionBitmap.Width, regionBitmap.RowStride, + paddedWidth); + // } + } + } + } + } + + // 4 + return regionBitmap; + } + + private int DecodeSLTP() + { + switch (GbTemplate) + { + case 0: + cx.Index = 0x9b25; + break; + case 1: + cx.Index = 0x795; + break; + case 2: + cx.Index = 0xe5; + break; + case 3: + cx.Index = 0x195; + break; + } + return arithDecoder.Decode(cx); + } + + private void DecodeLine(int lineNumber, int width, int rowStride, + int paddedWidth) + { + int byteIndex = regionBitmap.GetByteIndex(0, lineNumber); + int idx = byteIndex - rowStride; + + switch (GbTemplate) + { + case 0: + if (!UseExtTemplates) + { + DecodeTemplate0a(lineNumber, width, rowStride, paddedWidth, byteIndex, idx); + } + else + { + DecodeTemplate0b(lineNumber, width, rowStride, paddedWidth, byteIndex, idx); + } + break; + case 1: + DecodeTemplate1(lineNumber, width, rowStride, paddedWidth, byteIndex, idx); + break; + case 2: + DecodeTemplate2(lineNumber, width, rowStride, paddedWidth, byteIndex, idx); + break; + case 3: + DecodeTemplate3(lineNumber, width, rowStride, paddedWidth, byteIndex, idx); + break; + } + } + + /// + /// Each pixel gets the value from the corresponding pixel of the row above. Line 0 cannot get copied values (source + /// will be -1, doesn't exist). + /// + /// Coordinate of the row that should be set. + private void CopyLineAbove(int lineNumber) + { + int targetByteIndex = lineNumber * regionBitmap.RowStride; + int sourceByteIndex = targetByteIndex - regionBitmap.RowStride; + + for (int i = 0; i < regionBitmap.RowStride; i++) + { + // Get the byte that should be copied and put it into Bitmap + regionBitmap.SetByte(targetByteIndex++, regionBitmap.GetByte(sourceByteIndex++)); + } + } + + private void DecodeTemplate0a(int lineNumber, int width, int rowStride, + int paddedWidth, int byteIndex, int idx) + { + int context; + int overriddenContext; + + int line1 = 0; + int line2 = 0; + + if (lineNumber >= 1) + { + line1 = regionBitmap.GetByteAsInteger(idx); + } + + if (lineNumber >= 2) + { + line2 = regionBitmap.GetByteAsInteger(idx - rowStride) << 6; + } + + context = (line1 & 0xf0) | (line2 & 0x3800); + + int nextByte; + for (int x = 0; x < paddedWidth; x = nextByte) + { + // 6.2.5.7 3d + byte result = 0; + nextByte = x + 8; + int minorWidth = width - x > 8 ? 8 : width - x; + + if (lineNumber > 0) + { + line1 = (line1 << 8) + | (nextByte < width ? regionBitmap.GetByteAsInteger(idx + 1) : 0); + } + + if (lineNumber > 1) + { + line2 = (line2 << 8) | (nextByte < width + ? regionBitmap.GetByteAsInteger(idx - rowStride + 1) << 6 : 0); + } + + for (int minorX = 0; minorX < minorWidth; minorX++) + { + int toShift = 7 - minorX; + if (@override) + { + overriddenContext = OverrideAtTemplate0a(context, (x + minorX), lineNumber, + result, minorX, toShift); + cx.Index = overriddenContext; + } + else + { + cx.Index = context; + } + + int bit = arithDecoder.Decode(cx); + + result = (byte)(result | bit << toShift); + + context = ((context & 0x7bf7) << 1) | bit | ((line1 >> toShift) & 0x10) + | ((line2 >> toShift) & 0x800); + } + + regionBitmap.SetByte(byteIndex++, result); + idx++; + } + } + + private void DecodeTemplate0b(int lineNumber, int width, int rowStride, + int paddedWidth, int byteIndex, int idx) + { + int context; + int overriddenContext; + + int line1 = 0; + int line2 = 0; + + if (lineNumber >= 1) + { + line1 = regionBitmap.GetByteAsInteger(idx); + } + + if (lineNumber >= 2) + { + line2 = regionBitmap.GetByteAsInteger(idx - rowStride) << 6; + } + + context = (line1 & 0xf0) | (line2 & 0x3800); + + int nextByte; + for (int x = 0; x < paddedWidth; x = nextByte) + { + // 6.2.5.7 3d + byte result = 0; + nextByte = x + 8; + int minorWidth = width - x > 8 ? 8 : width - x; + + if (lineNumber > 0) + { + line1 = (line1 << 8) + | (nextByte < width ? regionBitmap.GetByteAsInteger(idx + 1) : 0); + } + + if (lineNumber > 1) + { + line2 = (line2 << 8) | (nextByte < width + ? regionBitmap.GetByteAsInteger(idx - rowStride + 1) << 6 : 0); + } + + for (int minorX = 0; minorX < minorWidth; minorX++) + { + int toShift = 7 - minorX; + if (@override) + { + overriddenContext = OverrideAtTemplate0b(context, (x + minorX), lineNumber, + result, minorX, toShift); + cx.Index = overriddenContext; + } + else + { + cx.Index = context; + } + + int bit = arithDecoder.Decode(cx); + + result = (byte)(result | bit << toShift); + + context = ((context & 0x7bf7) << 1) | bit | ((line1 >> toShift) & 0x10) + | ((line2 >> toShift) & 0x800); + } + + regionBitmap.SetByte(byteIndex++, result); + idx++; + } + } + + private void DecodeTemplate1(int lineNumber, int width, int rowStride, + int paddedWidth, int byteIndex, int idx) + { + int context; + int overriddenContext; + + int line1 = 0; + int line2 = 0; + + if (lineNumber >= 1) + { + line1 = regionBitmap.GetByteAsInteger(idx); + } + + if (lineNumber >= 2) + { + line2 = regionBitmap.GetByteAsInteger(idx - rowStride) << 5; + } + + context = ((line1 >> 1) & 0x1f8) | ((line2 >> 1) & 0x1e00); + + int nextByte; + for (int x = 0; x < paddedWidth; x = nextByte) + { + // 6.2.5.7 3d + byte result = 0; + nextByte = x + 8; + int minorWidth = width - x > 8 ? 8 : width - x; + + if (lineNumber >= 1) + { + line1 = (line1 << 8) + | (nextByte < width ? regionBitmap.GetByteAsInteger(idx + 1) : 0); + } + + if (lineNumber >= 2) + { + line2 = (line2 << 8) | (nextByte < width + ? regionBitmap.GetByteAsInteger(idx - rowStride + 1) << 5 : 0); + } + + for (int minorX = 0; minorX < minorWidth; minorX++) + { + if (@override) + { + overriddenContext = OverrideAtTemplate1(context, x + minorX, lineNumber, result, + minorX); + cx.Index = overriddenContext; + } + else + { + cx.Index = context; + } + + int bit = arithDecoder.Decode(cx); + + result = (byte)(result | bit << 7 - minorX); + + int toShift = 8 - minorX; + context = ((context & 0xefb) << 1) | bit | ((line1 >> toShift) & 0x8) + | ((line2 >> toShift) & 0x200); + } + + regionBitmap.SetByte(byteIndex++, result); + idx++; + } + } + + private void DecodeTemplate2(int lineNumber, int width, int rowStride, + int paddedWidth, int byteIndex, int idx) + { + int context; + int overriddenContext; + + int line1 = 0; + int line2 = 0; + + if (lineNumber >= 1) + { + line1 = regionBitmap.GetByteAsInteger(idx); + } + + if (lineNumber >= 2) + { + line2 = regionBitmap.GetByteAsInteger(idx - rowStride) << 4; + } + + context = ((line1 >> 3) & 0x7c) | ((line2 >> 3) & 0x380); + + int nextByte; + for (int x = 0; x < paddedWidth; x = nextByte) + { + // 6.2.5.7 3d + byte result = 0; + nextByte = x + 8; + int minorWidth = width - x > 8 ? 8 : width - x; + + if (lineNumber >= 1) + { + line1 = (line1 << 8) + | (nextByte < width ? regionBitmap.GetByteAsInteger(idx + 1) : 0); + } + + if (lineNumber >= 2) + { + line2 = (line2 << 8) | (nextByte < width + ? regionBitmap.GetByteAsInteger(idx - rowStride + 1) << 4 : 0); + } + + for (int minorX = 0; minorX < minorWidth; minorX++) + { + if (@override) + { + overriddenContext = OverrideAtTemplate2(context, x + minorX, lineNumber, result, + minorX); + cx.Index = overriddenContext; + } + else + { + cx.Index = context; + } + + int bit = arithDecoder.Decode(cx); + + result = (byte)(result | bit << (7 - minorX)); + + int toShift = 10 - minorX; + context = ((context & 0x1bd) << 1) | bit | ((line1 >> toShift) & 0x4) + | ((line2 >> toShift) & 0x80); + } + + regionBitmap.SetByte(byteIndex++, result); + idx++; + } + } + + private void DecodeTemplate3(int lineNumber, int width, int rowStride, + int paddedWidth, int byteIndex, int idx) + { + int context; + int overriddenContext; + + int line1 = 0; + + if (lineNumber >= 1) + { + line1 = regionBitmap.GetByteAsInteger(idx); + } + + context = (line1 >> 1) & 0x70; + + int nextByte; + for (int x = 0; x < paddedWidth; x = nextByte) + { + // 6.2.5.7 3d + byte result = 0; + nextByte = x + 8; + int minorWidth = width - x > 8 ? 8 : width - x; + + if (lineNumber >= 1) + { + line1 = (line1 << 8) + | (nextByte < width ? regionBitmap.GetByteAsInteger(idx + 1) : 0); + } + + for (int minorX = 0; minorX < minorWidth; minorX++) + { + if (@override) + { + overriddenContext = OverrideAtTemplate3(context, x + minorX, lineNumber, result, + minorX); + cx.Index = overriddenContext; + } + else + { + cx.Index = context; + } + + int bit = arithDecoder.Decode(cx); + + result = (byte)(result | bit << (7 - minorX)); + context = ((context & 0x1f7) << 1) | bit | ((line1 >> (8 - minorX)) & 0x010); + } + + regionBitmap.SetByte(byteIndex++, result); + idx++; + } + } + + private void UpdateOverrideFlags() + { + if (GbAtX == null || GbAtY == null) + { + return; + } + + if (GbAtX.Length != GbAtY.Length) + { + return; + } + + gbAtOverride = new bool[GbAtX.Length]; + + switch (GbTemplate) + { + case 0: + if (!UseExtTemplates) + { + if (GbAtX[0] != 3 || GbAtY[0] != -1) + { + SetOverrideFlag(0); + } + + if (GbAtX[1] != -3 || GbAtY[1] != -1) + { + SetOverrideFlag(1); + } + + if (GbAtX[2] != 2 || GbAtY[2] != -2) + { + SetOverrideFlag(2); + } + + if (GbAtX[3] != -2 || GbAtY[3] != -2) + { + SetOverrideFlag(3); + } + } + else + { + if (GbAtX[0] != -2 || GbAtY[0] != 0) + { + SetOverrideFlag(0); + } + + if (GbAtX[1] != 0 || GbAtY[1] != -2) + { + SetOverrideFlag(1); + } + + if (GbAtX[2] != -2 || GbAtY[2] != -1) + { + SetOverrideFlag(2); + } + + if (GbAtX[3] != -1 || GbAtY[3] != -2) + { + SetOverrideFlag(3); + } + + if (GbAtX[4] != 1 || GbAtY[4] != -2) + { + SetOverrideFlag(4); + } + + if (GbAtX[5] != 2 || GbAtY[5] != -1) + { + SetOverrideFlag(5); + } + + if (GbAtX[6] != -3 || GbAtY[6] != 0) + { + SetOverrideFlag(6); + } + + if (GbAtX[7] != -4 || GbAtY[7] != 0) + { + SetOverrideFlag(7); + } + + if (GbAtX[8] != 2 || GbAtY[8] != -2) + { + SetOverrideFlag(8); + } + + if (GbAtX[9] != 3 || GbAtY[9] != -1) + { + SetOverrideFlag(9); + } + + if (GbAtX[10] != -2 || GbAtY[10] != -2) + { + SetOverrideFlag(10); + } + + if (GbAtX[11] != -3 || GbAtY[11] != -1) + { + SetOverrideFlag(11); + } + } + break; + + case 1: + if (GbAtX[0] != 3 || GbAtY[0] != -1) + { + SetOverrideFlag(0); + } + + break; + + case 2: + if (GbAtX[0] != 2 || GbAtY[0] != -1) + { + SetOverrideFlag(0); + } + + break; + + case 3: + if (GbAtX[0] != 2 || GbAtY[0] != -1) + { + SetOverrideFlag(0); + } + + break; + } + } + + private void SetOverrideFlag(int index) + { + gbAtOverride[index] = true; + @override = true; + } + + private int OverrideAtTemplate0a(int context, int x, int y, int result, + int minorX, int toShift) + { + if (gbAtOverride[0]) + { + context &= 0xffef; + if (GbAtY[0] == 0 && GbAtX[0] >= -minorX) + { + context |= (result >> (toShift - GbAtX[0]) & 0x1) << 4; + } + else + { + context |= GetPixel(x + GbAtX[0], y + GbAtY[0]) << 4; + } + } + + if (gbAtOverride[1]) + { + context &= 0xfbff; + if (GbAtY[1] == 0 && GbAtX[1] >= -minorX) + { + context |= (result >> (toShift - GbAtX[1]) & 0x1) << 10; + } + else + { + context |= GetPixel(x + GbAtX[1], y + GbAtY[1]) << 10; + } + } + + if (gbAtOverride[2]) + { + context &= 0xf7ff; + if (GbAtY[2] == 0 && GbAtX[2] >= -minorX) + { + context |= (result >> (toShift - GbAtX[2]) & 0x1) << 11; + } + else + { + context |= GetPixel(x + GbAtX[2], y + GbAtY[2]) << 11; + } + } + + if (gbAtOverride[3]) + { + context &= 0x7fff; + if (GbAtY[3] == 0 && GbAtX[3] >= -minorX) + { + context |= (result >> (toShift - GbAtX[3]) & 0x1) << 15; + } + else + { + context |= GetPixel(x + GbAtX[3], y + GbAtY[3]) << 15; + } + } + return context; + } + + private int OverrideAtTemplate0b(int context, int x, int y, int result, + int minorX, int toShift) + { + if (gbAtOverride[0]) + { + context &= 0xfffd; + if (GbAtY[0] == 0 && GbAtX[0] >= -minorX) + { + context |= (result >> (toShift - GbAtX[0]) & 0x1) << 1; + } + else + { + context |= GetPixel(x + GbAtX[0], y + GbAtY[0]) << 1; + } + } + + if (gbAtOverride[1]) + { + context &= 0xdfff; + if (GbAtY[1] == 0 && GbAtX[1] >= -minorX) + { + context |= (result >> (toShift - GbAtX[1]) & 0x1) << 13; + } + else + { + context |= GetPixel(x + GbAtX[1], y + GbAtY[1]) << 13; + } + } + if (gbAtOverride[2]) + { + context &= 0xfdff; + if (GbAtY[2] == 0 && GbAtX[2] >= -minorX) + { + context |= (result >> (toShift - GbAtX[2]) & 0x1) << 9; + } + else + { + context |= GetPixel(x + GbAtX[2], y + GbAtY[2]) << 9; + } + } + if (gbAtOverride[3]) + { + context &= 0xbfff; + if (GbAtY[3] == 0 && GbAtX[3] >= -minorX) + { + context |= (result >> (toShift - GbAtX[3]) & 0x1) << 14; + } + else + { + context |= GetPixel(x + GbAtX[3], y + GbAtY[3]) << 14; + } + } + if (gbAtOverride[4]) + { + context &= 0xefff; + if (GbAtY[4] == 0 && GbAtX[4] >= -minorX) + { + context |= (result >> (toShift - GbAtX[4]) & 0x1) << 12; + } + else + { + context |= GetPixel(x + GbAtX[4], y + GbAtY[4]) << 12; + } + } + if (gbAtOverride[5]) + { + context &= 0xffdf; + if (GbAtY[5] == 0 && GbAtX[5] >= -minorX) + { + context |= (result >> (toShift - GbAtX[5]) & 0x1) << 5; + } + else + { + context |= GetPixel(x + GbAtX[5], y + GbAtY[5]) << 5; + } + } + if (gbAtOverride[6]) + { + context &= 0xfffb; + if (GbAtY[6] == 0 && GbAtX[6] >= -minorX) + { + context |= (result >> (toShift - GbAtX[6]) & 0x1) << 2; + } + else + { + context |= GetPixel(x + GbAtX[6], y + GbAtY[6]) << 2; + } + } + if (gbAtOverride[7]) + { + context &= 0xfff7; + if (GbAtY[7] == 0 && GbAtX[7] >= -minorX) + { + context |= (result >> (toShift - GbAtX[7]) & 0x1) << 3; + } + else + { + context |= GetPixel(x + GbAtX[7], y + GbAtY[7]) << 3; + } + } + if (gbAtOverride[8]) + { + context &= 0xf7ff; + if (GbAtY[8] == 0 && GbAtX[8] >= -minorX) + { + context |= (result >> (toShift - GbAtX[8]) & 0x1) << 11; + } + else + { + context |= GetPixel(x + GbAtX[8], y + GbAtY[8]) << 11; + } + } + if (gbAtOverride[9]) + { + context &= 0xffef; + if (GbAtY[9] == 0 && GbAtX[9] >= -minorX) + { + context |= (result >> (toShift - GbAtX[9]) & 0x1) << 4; + } + else + { + context |= GetPixel(x + GbAtX[9], y + GbAtY[9]) << 4; + } + } + if (gbAtOverride[10]) + { + context &= 0x7fff; + if (GbAtY[10] == 0 && GbAtX[10] >= -minorX) + { + context |= (result >> (toShift - GbAtX[10]) & 0x1) << 15; + } + else + { + context |= GetPixel(x + GbAtX[10], y + GbAtY[10]) << 15; + } + } + if (gbAtOverride[11]) + { + context &= 0xfdff; + if (GbAtY[11] == 0 && GbAtX[11] >= -minorX) + { + context |= (result >> (toShift - GbAtX[11]) & 0x1) << 10; + } + else + { + context |= GetPixel(x + GbAtX[11], y + GbAtY[11]) << 10; + } + } + + return context; + } + + private int OverrideAtTemplate1(int context, int x, int y, int result, + int minorX) + { + context &= 0x1ff7; + if (GbAtY[0] == 0 && GbAtX[0] >= -minorX) + { + return (context | (result >> (7 - (minorX + GbAtX[0])) & 0x1) << 3); + } + else + { + return (context | GetPixel(x + GbAtX[0], y + GbAtY[0]) << 3); + } + } + + private int OverrideAtTemplate2(int context, int x, int y, int result, + int minorX) + { + context &= 0x3fb; + if (GbAtY[0] == 0 && GbAtX[0] >= -minorX) + { + return (context | (result >> (7 - (minorX + GbAtX[0])) & 0x1) << 2); + } + else + { + return (context | GetPixel(x + GbAtX[0], y + GbAtY[0]) << 2); + } + } + + private int OverrideAtTemplate3(int context, int x, int y, int result, + int minorX) + { + context &= 0x3ef; + if (GbAtY[0] == 0 && GbAtX[0] >= -minorX) + { + return (context | (result >> (7 - (minorX + GbAtX[0])) & 0x1) << 4); + } + else + { + return (context | GetPixel(x + GbAtX[0], y + GbAtY[0]) << 4); + } + } + + private byte GetPixel(int x, int y) + { + if (x < 0 || x >= regionBitmap.Width) + { + return 0; + } + + if (y < 0 || y >= regionBitmap.Height) + { + return 0; + } + + return regionBitmap.GetPixel(x, y); + } + + /// + /// Used by . + /// + /// whether the data is MMR-encoded + /// the offset + /// the length of the data + /// bitmap height + /// bitmap width + internal void SetParameters(bool isMMREncoded, long dataOffset, + long dataLength, int gbh, int gbw) + { + this.IsMMREncoded = isMMREncoded; + this.dataOffset = dataOffset; + this.dataLength = dataLength; + this.RegionInfo.BitmapHeight = gbh; + this.RegionInfo.BitmapWidth = gbw; + this.mmrDecompressor = null; + ResetBitmap(); + } + + /// + /// Used by . + /// + /// whether the data is MMR-encoded + /// sd template + /// is TPGDon + /// use skip + /// x values gbA pixels + /// y values gbA pixels + /// bitmap width + /// bitmap height + /// context for the arithmetic decoder + /// the arithmetic decode to be used + internal void SetParameters(bool isMMREncoded, byte sdTemplate, + bool isTPGDon, bool useSkip, short[] sdATX, short[] sdATY, + int symWidth, int hcHeight, CX cx, + ArithmeticDecoder arithmeticDecoder) + { + this.IsMMREncoded = isMMREncoded; + this.GbTemplate = sdTemplate; + this.IsTPGDon = isTPGDon; + this.GbAtX = sdATX; + this.GbAtY = sdATY; + this.RegionInfo.BitmapWidth = symWidth; + this.RegionInfo.BitmapHeight = hcHeight; + + if (null != cx) + { + this.cx = cx; + } + + if (null != arithmeticDecoder) + { + this.arithDecoder = arithmeticDecoder; + } + + this.mmrDecompressor = null; + ResetBitmap(); + } + + /// + /// Used by and . + /// + /// whether the data is MMR-encoded + /// the offset + /// the length of the data + /// bitmap height + /// bitmap width + /// gb template + /// is TPGDon + /// use skip + /// x values of gbA pixels + /// y values of gbA pixels + internal void SetParameters(bool isMMREncoded, long dataOffset, + long dataLength, int gbh, int gbw, byte gbTemplate, + bool isTPGDon, bool useSkip, short[] gbAtX, short[] gbAtY) + { + this.dataOffset = dataOffset; + this.dataLength = dataLength; + + this.RegionInfo = new RegionSegmentInformation(gbw, gbh); + this.GbTemplate = gbTemplate; + + this.IsMMREncoded = isMMREncoded; + this.IsTPGDon = isTPGDon; + this.GbAtX = gbAtX; + this.GbAtY = gbAtY; + } + + /// + /// Simply sets the memory-critical bitmap of this region to {@code null}. + /// + internal void ResetBitmap() + { + regionBitmap = null; + } + + public void Init(SegmentHeader header, SubInputStream sis) + { + subInputStream = sis; + RegionInfo = new RegionSegmentInformation(subInputStream); + ParseHeader(); + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/HalftoneRegion.cs b/src/UglyToad.PdfPig/Filters/Jbig2/HalftoneRegion.cs new file mode 100644 index 000000000..f125f87e6 --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/HalftoneRegion.cs @@ -0,0 +1,347 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + using System; + using System.Collections.Generic; + using UglyToad.PdfPig.Util; + + /// + /// This class represents the data of segment type "Halftone region". Parsing is described in 7.4.5, page 67. Decoding + /// procedure in 6.6.5 and 7.4.5.2. + /// + internal sealed class HalftoneRegion : IRegion + { + private SubInputStream subInputStream; + private SegmentHeader segmentHeader; + private long dataHeaderOffset = 0; + private long dataHeaderLength; + private long dataOffset; + private long dataLength; + + // Decoded data + private Jbig2Bitmap halftoneRegionBitmap; + + // Previously decoded data from other regions or dictionaries, stored to use as patterns in this region. + private List patterns; + + // Region segment information field, 7.4.1 + public RegionSegmentInformation RegionInfo { get; private set; } + + // Halftone segment information field, 7.4.5.1.1 + public byte HDefaultPixel { get; private set; } + public CombinationOperator HCombinationOperator { get; private set; } + public bool HSkipEnabled { get; private set; } + public byte HTemplate { get; private set; } + public bool IsMMREncoded { get; private set; } + + // Halftone grid position and size, 7.4.5.1.2 + // Width of the gray-scale image, 7.4.5.1.2.1 + public int HGridWidth { get; private set; } + // Height of the gray-scale image, 7.4.5.1.2.2 + public int HGridHeight { get; private set; } + // Horizontal offset of the grid, 7.4.5.1.2.3 + public int HGridX { get; private set; } + // Vertical offset of the grid, 7.4.5.1.2.4 + public int HGridY { get; private set; } + + // Halftone grid vector, 7.4.5.1.3 + // Horizontal coordinate of the halftone grid vector, 7.4.5.1.3.1 + public int HRegionX { get; private set; } + // Vertical coordinate of the halftone grod vector, 7.4.5.1.3.2 + public int HRegionY { get; private set; } + + public HalftoneRegion() + { + } + + public HalftoneRegion(SubInputStream subInputStream) + { + this.subInputStream = subInputStream; + this.RegionInfo = new RegionSegmentInformation(subInputStream); + } + + public HalftoneRegion(SubInputStream subInputStream, SegmentHeader segmentHeader) + { + this.subInputStream = subInputStream; + this.segmentHeader = segmentHeader; + this.RegionInfo = new RegionSegmentInformation(subInputStream); + } + + private void ParseHeader() + { + RegionInfo.ParseHeader(); + + // Bit 7 + HDefaultPixel = (byte)subInputStream.ReadBit(); + + // Bit 4-6 + HCombinationOperator = CombinationOperators + .TranslateOperatorCodeToEnum((short)(subInputStream.ReadBits(3) & 0xf)); + + // Bit 3 + if (subInputStream.ReadBit() == 1) + { + HSkipEnabled = true; + } + + // Bit 1-2 + HTemplate = (byte)(subInputStream.ReadBits(2) & 0xf); + + // Bit 0 + if (subInputStream.ReadBit() == 1) + { + IsMMREncoded = true; + } + + HGridWidth = (int)(subInputStream.ReadBits(32) & 0xffffffff); + HGridHeight = (int)(subInputStream.ReadBits(32) & 0xffffffff); + + HGridX = (int)subInputStream.ReadBits(32); + HGridY = (int)subInputStream.ReadBits(32); + + HRegionX = (int)subInputStream.ReadBits(16) & 0xffff; + HRegionY = (int)subInputStream.ReadBits(16) & 0xffff; + + ComputeSegmentDataStructure(); + } + + private void ComputeSegmentDataStructure() + { + dataOffset = subInputStream.Position; + dataHeaderLength = dataOffset - dataHeaderOffset; + dataLength = subInputStream.Length - dataHeaderLength; + } + + /// + /// The procedure is described in JBIG2 ISO standard, 6.6.5. + /// + /// The decoded + public Jbig2Bitmap GetRegionBitmap() + { + if (null == halftoneRegionBitmap) + { + // 6.6.5, page 40 + // 1) + halftoneRegionBitmap = new Jbig2Bitmap(RegionInfo.BitmapWidth, + RegionInfo.BitmapHeight); + + if (patterns == null) + { + patterns = GetPatterns(); + } + + if (HDefaultPixel == 1) + { + ArrayHelper.Fill(halftoneRegionBitmap.GetByteArray(), (byte)0xff); + } + + // 2) + // 6.6.5.1 Computing hSkip - At the moment SKIP is not used... we are not able to test it. + // Bitmap hSkip; + // if (hSkipEnabled) { + // int hPatternHeight = (int) hPats.get(0).getHeight(); + // int hPatternWidth = (int) hPats.get(0).getWidth(); + // Implementation could be achieved like this: Set or get pattern width and height from + // referred pattern segments. The method is called like this: + // hSkip = computeHSkip(hPatternHeight, hPatternWidth); + // } + + // 3) + int bitsPerValue = (int)Math.Ceiling(Math.Log(patterns.Count) / Math.Log(2)); + + // 4) + int[][] grayScaleValues = GrayScaleDecoding(bitsPerValue); + + // 5), rendering the pattern, described in 6.6.5.2 + RenderPattern(grayScaleValues); + } + // 6) + return halftoneRegionBitmap; + } + + /// + /// This method draws the pattern into the region bitmap({ @code htReg}), as described in 6.6.5.2, page 42 + /// + private void RenderPattern(int[][] grayScaleValues) + { + int x, y; + + // 1) + for (int m = 0; m < HGridHeight; m++) + { + // a) + for (int n = 0; n < HGridWidth; n++) + { + // i) + x = ComputeX(m, n); + y = ComputeY(m, n); + + // ii) + Jbig2Bitmap patternBitmap = patterns[grayScaleValues[m][n]]; + Jbig2Bitmaps.Blit(patternBitmap, halftoneRegionBitmap, (x + HGridX), (y + HGridY), + HCombinationOperator); + } + } + } + + private List GetPatterns() + { + var patterns = new List(); + + foreach (SegmentHeader s in segmentHeader.GetRtSegments()) + { + PatternDictionary patternDictionary = (PatternDictionary)s.GetSegmentData(); + patterns.AddRange(patternDictionary.GetDictionary()); + } + + return patterns; + } + + + /// + /// Gray-scale image decoding procedure is special for halftone region decoding + /// and is described in Annex C.5 on page 98. + /// + private int[][] GrayScaleDecoding(int bitsPerValue) + { + short[] gbAtX = null; + short[] gbAtY = null; + + if (!IsMMREncoded) + { + gbAtX = new short[4]; + gbAtY = new short[4]; + // Set AT pixel values + if (HTemplate <= 1) + { + gbAtX[0] = 3; + } + else if (HTemplate >= 2) + { + gbAtX[0] = 2; + } + + gbAtY[0] = -1; + gbAtX[1] = -3; + gbAtY[1] = -1; + gbAtX[2] = 2; + gbAtY[2] = -2; + gbAtX[3] = -2; + gbAtY[3] = -2; + } + + var grayScalePlanes = new Jbig2Bitmap[bitsPerValue]; + + // 1) + var genericRegion = new GenericRegion(subInputStream); + genericRegion.SetParameters(IsMMREncoded, dataOffset, dataLength, HGridHeight, HGridWidth, + HTemplate, false, HSkipEnabled, gbAtX, gbAtY); + + // 2) + int j = bitsPerValue - 1; + + grayScalePlanes[j] = genericRegion.GetRegionBitmap(); + + while (j > 0) + { + j--; + genericRegion.ResetBitmap(); + // 3) a) + grayScalePlanes[j] = genericRegion.GetRegionBitmap(); + // 3) b) + grayScalePlanes = CombineGrayScalePlanes(grayScalePlanes, j); + } + + // 4) + return ComputeGrayScaleValues(grayScalePlanes, bitsPerValue); + } + + private Jbig2Bitmap[] CombineGrayScalePlanes(Jbig2Bitmap[] grayScalePlanes, int j) + { + int byteIndex = 0; + for (int y = 0; y < grayScalePlanes[j].Height; y++) + { + + for (int x = 0; x < grayScalePlanes[j].Width; x += 8) + { + byte newValue = grayScalePlanes[j + 1].GetByte(byteIndex); + byte oldValue = grayScalePlanes[j].GetByte(byteIndex); + + grayScalePlanes[j].SetByte(byteIndex++, + Jbig2Bitmaps.CombineBytes(oldValue, newValue, CombinationOperator.XOR)); + } + } + return grayScalePlanes; + } + + private int[][] ComputeGrayScaleValues(Jbig2Bitmap[] grayScalePlanes, int bitsPerValue) + { + // Gray-scale decoding procedure, page 98 + int[][] grayScaleValues = new int[HGridHeight][]; + for (int i = 0; i < grayScaleValues.Length; i++) + { + grayScaleValues[i] = new int[HGridWidth]; + } + + // 4) + for (int y = 0; y < HGridHeight; y++) + { + for (int x = 0; x < HGridWidth; x += 8) + { + int minorWidth = HGridWidth - x > 8 ? 8 : HGridWidth - x; + int byteIndex = grayScalePlanes[0].GetByteIndex(x, y); + + for (int minorX = 0; minorX < minorWidth; minorX++) + { + int i = minorX + x; + grayScaleValues[y][i] = 0; + + for (int j = 0; j < bitsPerValue; j++) + { + grayScaleValues[y][i] += ((grayScalePlanes[j] + .GetByte(byteIndex) >> (7 - i & 7)) & 1) * (1 << j); + } + } + } + } + return grayScaleValues; + } + + private int ComputeX(int m, int n) + { + return ShiftAndFill((HGridX + m * HRegionY + n * HRegionX)); + } + + private int ComputeY(int m, int n) + { + return ShiftAndFill((HGridY + m * HRegionX - n * HRegionY)); + } + + private int ShiftAndFill(int value) + { + // shift value by 8 and let the leftmost 8 bits be 0 + value >>= 8; + + if (value < 0) + { + // fill the leftmost 8 bits with 1 + int bitPosition = (int)(Math.Log(value.HighestOneBit()) / Math.Log(2)); + + for (int i = 1; i < 31 - bitPosition; i++) + { + // bit flip + value |= 1 << (31 - i); + } + } + + return value; + } + + public void Init(SegmentHeader header, SubInputStream sis) + { + segmentHeader = header; + subInputStream = sis; + RegionInfo = new RegionSegmentInformation(subInputStream); + ParseHeader(); + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/HuffmanTable.cs b/src/UglyToad.PdfPig/Filters/Jbig2/HuffmanTable.cs new file mode 100644 index 000000000..3c1086688 --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/HuffmanTable.cs @@ -0,0 +1,109 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + using System; + using System.Collections.Generic; + using System.Text; + + /// + /// This abstract class is the base class for all types of Huffman tables. + /// + internal abstract class HuffmanTable + { + private readonly InternalNode rootNode = new InternalNode(); + + /// + /// This inner class represents a code for use in Huffman tables. + /// + internal class Code + { + public int PrefixLength { get; } + public int RangeLength { get; } + public int RangeLow { get; } + public bool IsLowerRange { get; } + + public int Value { get; set; } = -1; + + public Code(int prefixLength, int rangeLength, int rangeLow, bool isLowerRange) + { + PrefixLength = prefixLength; + RangeLength = rangeLength; + RangeLow = rangeLow; + IsLowerRange = isLowerRange; + } + + public override sealed string ToString() + { + return (Value != -1 ? ValueNode.BitPattern(Value, PrefixLength) : "?") + "/" + + PrefixLength + "/" + RangeLength + "/" + RangeLow; + } + } + + public void InitTree(List codeTable) + { + PreprocessCodes(codeTable); + + foreach (var c in codeTable) + { + rootNode.Append(c); + } + } + + public long Decode(IImageInputStream iis) + { + return rootNode.Decode(iis); + } + + public override sealed string ToString() + { + return rootNode + "\n"; + } + + public static string CodeTableToString(List codeTable) + { + var sb = new StringBuilder(); + + foreach (var c in codeTable) + { + sb.Append(c.ToString()).Append("\n"); + } + + return sb.ToString(); + } + + private void PreprocessCodes(List codeTable) + { + // Annex B.3 1) - build the histogram + int maxPrefixLength = 0; + + foreach (Code c in codeTable) + { + maxPrefixLength = Math.Max(maxPrefixLength, c.PrefixLength); + } + + var lenCount = new int[maxPrefixLength + 1]; + foreach (Code c in codeTable) + { + lenCount[c.PrefixLength]++; + } + + int curCode; + var firstCode = new int[lenCount.Length + 1]; + lenCount[0] = 0; + + // Annex B.3 3) + for (int curLen = 1; curLen <= lenCount.Length; curLen++) + { + firstCode[curLen] = (firstCode[curLen - 1] + (lenCount[curLen - 1]) << 1); + curCode = firstCode[curLen]; + foreach (var code in codeTable) + { + if (code.PrefixLength == curLen) + { + code.Value = curCode; + curCode++; + } + } + } + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/IImageInputStream.cs b/src/UglyToad.PdfPig/Filters/Jbig2/IImageInputStream.cs new file mode 100644 index 000000000..8c6155a5b --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/IImageInputStream.cs @@ -0,0 +1,89 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + using System; + + internal interface IImageInputStream : IDisposable + { + /// + /// Returns the total length of the stream, if known. Otherwise, -1 is returned. + /// + long Length { get; } + + /// + /// Returns the current byte position of the stream. + /// + long Position { get; } + + /// + /// Marks a position in the stream to be returned to by a subsequent call to reset. Additionally, calls to mark and reset may be nested arbitrarily. + /// An arbitrary amount of data may be read following the call to mark. + /// The bit position used by the readBits method is saved and restored by each pair of calls to mark and reset. + /// + void Mark(); + + /// + /// Reads a single byte from the stream and returns it as an integer between 0 and 255. If the end of the stream is reached, -1 is returned. + /// The bit offset within the stream is reset to zero before the read occurs. + /// + int Read(); + + /// + /// Reads up to b.length bytes from the stream, and stores them into b starting at index 0. The number of bytes read is returned. + /// If no bytes can be read because the end of the stream has been reached, -1 is returned. + /// The bit offset within the stream is reset to zero before the read occurs. + /// + int Read(byte[] b); + + /// + /// Reads up to len bytes from the stream, and stores them into b starting at index off. The number of bytes read is returned. If no bytes can be read because the end of the stream has been reached, -1 is returned. + /// The bit offset within the stream is reset to zero before the read occurs. + /// + int Read(byte[] b, int off, int len); + + /// + /// Reads a single bit from the stream and returns it as an int with the value 0 or 1. The bit offset is advanced by one and reduced modulo 8. + /// + int ReadBit(); + + /// + /// Reads a bitstring from the stream and returns it as a long, with the first bit read becoming the most significant bit of the output. The read starts within the byte indicated by getStreamPosition, at the bit given by getBitOffset. The bit offset is advanced by numBits and reduced modulo 8. + /// The byte order of the stream has no effect on this method. The return value of this method is constructed as though the bits were read one at a time, and shifted into the right side of the return value, + /// + long ReadBits(int numBits); + + /// + /// Reads a byte from the stream and returns it as a byte value. Byte values between 0x00 and 0x7f represent integer values between 0 and 127. Values between 0x80 and 0xff represent negative values from -128 to /1. + /// The bit offset within the stream is reset to zero before the read occurs. + /// + byte ReadByte(); + + /// + /// Reads 4 bytes from the stream, and (conceptually) concatenates them according to the current byte order, converts the result to a long, masks it with 0xffffffffL in order to strip off any sign-extension bits, and returns the result as an unsigned long value. + /// The bit offset within the stream is reset to zero before the read occurs + /// + uint ReadUnsignedInt(); + + /// + /// Returns the stream pointer to its previous position, including the bit offset, at the time of the most recent unmatched call to mark. + /// Calls to reset without a corresponding call to mark have no effect. + /// + void Reset(); + + /// + /// Sets the current stream position to the desired location. The next read will occur at this location. The bit offset is set to 0. + /// It is legal to seek past the end of the file; an will be thrown only if a read is performed. + /// + void Seek(long pos); + + /// + /// Skips remaining bits in the current byte. + /// + void SkipBits(); + + /// + /// Moves the stream position forward by a given number of bytes. It is possible that this method will only be able to skip forward by a smaller number of bytes than requested, for example if the end of the stream is reached. In all cases, the actual number of bytes skipped is returned. + /// The bit offset is set to zero prior to advancing the position. + /// + long SkipBytes(int n); + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/IJbigDictionary.cs b/src/UglyToad.PdfPig/Filters/Jbig2/IJbigDictionary.cs new file mode 100644 index 000000000..367d066ff --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/IJbigDictionary.cs @@ -0,0 +1,19 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + using System.Collections.Generic; + + /// + /// Interface for all JBIG2 dictionaries segments. + /// + internal interface IJbigDictionary : ISegmentData + { + /// + /// Decodes a dictionary segment and returns the result. + /// + /// A list of s as a result of the decoding process of dictionary segments. + /// if the segment header value is invalid. + /// if the maximum value limit of an integer is exceeded. + /// if an underlying IO operation fails. + List GetDictionary(); + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/IRegion.cs b/src/UglyToad.PdfPig/Filters/Jbig2/IRegion.cs new file mode 100644 index 000000000..a2a5c00ce --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/IRegion.cs @@ -0,0 +1,22 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + /// + /// Interface for all JBIG2 region segments. + /// + internal interface IRegion : ISegmentData + { + /// + /// Returns about this region. + /// + RegionSegmentInformation RegionInfo { get; } + + /// + /// Decodes and returns a regions content. + /// + /// The decoded region as . + /// if the segment header value is invalid. + /// if the maximum value limit of an integer is exceeded. + /// if an underlying IO operation fails. + Jbig2Bitmap GetRegionBitmap(); + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/ISegmentData.cs b/src/UglyToad.PdfPig/Filters/Jbig2/ISegmentData.cs new file mode 100644 index 000000000..6498f8365 --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/ISegmentData.cs @@ -0,0 +1,18 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + /// + /// Interface for all data parts of segments. + /// + internal interface ISegmentData + { + /// + /// Parse the stream and read information of header. + /// + /// The segments' header (to make referred-to segments available in data part). + /// Wrapped { @code ImageInputStream} into {@code SubInputStream}. + /// if the segment header value is invalid. + /// if the maximum value limit of an integer is exceeded. + /// if an underlying IO operation fails. + void Init(SegmentHeader header, SubInputStream sis); + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/ImageInputStream.cs b/src/UglyToad.PdfPig/Filters/Jbig2/ImageInputStream.cs new file mode 100644 index 000000000..8d88915f9 --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/ImageInputStream.cs @@ -0,0 +1,75 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + using System; + using System.IO; + + internal sealed class ImageInputStream : AbstractImageInputStream + { + private readonly Stream inner; + + /// + public override sealed long Length => inner.Length; + + /// + public override sealed long Position => inner.Position; + + /// + /// Constructs a that will read the image data + /// from a given byte array. + /// + /// + public ImageInputStream(byte[] bytes) + : this(new MemoryStream(bytes ?? + throw new ArgumentNullException(nameof(bytes)))) + { + } + + /// + /// Constructs a that will read the image data + /// from a given . + /// + /// the to read the image data from.> + public ImageInputStream(Stream input) + { + inner = input ?? throw new ArgumentNullException(nameof(input)); + } + + /// + public override sealed void Seek(long pos) + { + SetBitOffset(0); + inner.Position = pos; + } + + /// + public override sealed int Read() + { + if (IsAtEnd()) + { + return -1; + } + + SetBitOffset(0); + return inner.ReadByte(); + } + + /// + public override sealed int Read(byte[] b, int off, int len) + { + if (IsAtEnd()) + { + throw new EndOfStreamException(); + } + + SetBitOffset(0); + var numBytesRead = inner.Read(b, 0, len); + return numBytesRead; + } + + /// + public override sealed void Dispose() + { + inner.Dispose(); + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/IntegerMaxValueException.cs b/src/UglyToad.PdfPig/Filters/Jbig2/IntegerMaxValueException.cs new file mode 100644 index 000000000..c57237b3d --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/IntegerMaxValueException.cs @@ -0,0 +1,24 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + using System; + + /// + /// Can be used if the maximum value limit of an integer is exceeded. + /// + internal sealed class IntegerMaxValueException : Jbig2Exception + { + public IntegerMaxValueException() + { + } + + public IntegerMaxValueException(string message) + : base(message) + { + } + + public IntegerMaxValueException(string message, Exception innerException) + : base(message, innerException) + { + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/InternalNode.cs b/src/UglyToad.PdfPig/Filters/Jbig2/InternalNode.cs new file mode 100644 index 000000000..a4bd84e46 --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/InternalNode.cs @@ -0,0 +1,137 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + using System; + using System.Text; + using static HuffmanTable; + + /// + /// This class represents an internal node of a Huffman tree. It contains two child nodes. + /// + internal sealed class InternalNode : Node + { + private readonly int depth; + + private Node zero; + private Node one; + + public InternalNode() + { + depth = 0; + } + + public InternalNode(int depth) + { + this.depth = depth; + } + + public void Append(Code c) + { + // ignore unused codes + if (c.PrefixLength == 0) + { + return; + } + + int shift = c.PrefixLength - 1 - depth; + + if (shift < 0) + { + throw new ArgumentException("Negative shifting is not possible."); + } + + int bit = (c.Value >> shift) & 1; + if (shift == 0) + { + if (c.RangeLength == -1) + { + // the child will be a OutOfBand + if (bit == 1) + { + if (one != null) + { + throw new InvalidOperationException("already have a OOB for " + c); + } + + one = new OutOfBandNode(c); + } + else + { + if (zero != null) + { + throw new InvalidOperationException("already have a OOB for " + c); + } + + zero = new OutOfBandNode(c); + } + } + else + { + // the child will be a ValueNode + if (bit == 1) + { + if (one != null) + { + throw new InvalidOperationException("already have a ValueNode for " + c); + } + + one = new ValueNode(c); + } + else + { + if (zero != null) + { + throw new InvalidOperationException("already have a ValueNode for " + c); + } + + zero = new ValueNode(c); + } + } + } + else + { + // the child will be an InternalNode + if (bit == 1) + { + if (one == null) + { + one = new InternalNode(depth + 1); + } ((InternalNode)one).Append(c); + } + else + { + if (zero == null) + { + zero = new InternalNode(depth + 1); + } ((InternalNode)zero).Append(c); + } + } + } + + public override sealed long Decode(IImageInputStream iis) + { + int b = iis.ReadBit(); + Node n = b == 0 ? zero : one; + return n.Decode(iis); + } + + public override sealed string ToString() + { + var sb = new StringBuilder("\n"); + + Pad(sb); + sb.Append("0: ").Append(zero).Append("\n"); + Pad(sb); + sb.Append("1: ").Append(one).Append("\n"); + + return sb.ToString(); + } + + private void Pad(StringBuilder sb) + { + for (int i = 0; i < depth; i++) + { + sb.Append(" "); + } + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/InvalidHeaderValueException.cs b/src/UglyToad.PdfPig/Filters/Jbig2/InvalidHeaderValueException.cs new file mode 100644 index 000000000..d9c947f0f --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/InvalidHeaderValueException.cs @@ -0,0 +1,21 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + using System; + + internal class InvalidHeaderValueException : Jbig2Exception + { + public InvalidHeaderValueException() + { + } + + public InvalidHeaderValueException(string message) + : base(message) + { + } + + public InvalidHeaderValueException(string message, Exception innerException) + : base(message, innerException) + { + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/Jbig2Bitmap.cs b/src/UglyToad.PdfPig/Filters/Jbig2/Jbig2Bitmap.cs new file mode 100644 index 000000000..8d4bb3ee7 --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/Jbig2Bitmap.cs @@ -0,0 +1,137 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + /// + /// This class represents a bi-level image that is organized like a bitmap. + /// + internal sealed class Jbig2Bitmap + { + // 8 pixels per byte, 0 for white, 1 for black + private readonly byte[] bitmap; + + /// + /// The height of the bitmap in pixels. + /// + public int Height { get; } + + /// + /// The width of the bitmap in pixels. + /// + public int Width { get; } + + /// + /// The amount of bytes used per row. + /// + public int RowStride { get; } + + /// + /// Creates an instance of a blank image. + /// The image data is stored in a byte array. Each pixels is stored as one bit, so that each byte contains 8 pixel. + /// A pixel has by default the value 0 for white and 1 for black. + /// Row stride means the amount of bytes per line. It is computed automatically and fills the pad bits with 0. + /// + /// + /// + public Jbig2Bitmap(int width, int height) + { + Height = height; + Width = width; + RowStride = (width + 7) >> 3; + + bitmap = new byte[Height * RowStride]; + } + + /// + /// Returns the value of a pixel specified by the given coordinates. + /// By default, the value is 0 for a white pixel and 1 for a black pixel. + /// The value is placed in the rightmost bit in the byte. + /// + /// The x-coordinate of the pixel. + /// The y-coordinate of the pixel. + /// The value of a pixel. + public byte GetPixel(int x, int y) + { + int byteIndex = GetByteIndex(x, y); + int bitOffset = GetBitOffset(x); + + int toShift = 7 - bitOffset; + return (byte)((GetByte(byteIndex) >> toShift) & 0x01); + } + + public void SetPixel(int x, int y, byte pixelValue) + { + int byteIndex = GetByteIndex(x, y); + int bitOffset = GetBitOffset(x); + + int shift = 7 - bitOffset; + + byte src = bitmap[byteIndex]; + byte result = (byte)(src | (pixelValue << shift)); + bitmap[byteIndex] = result; + } + + /// + /// Returns the index of the byte that contains the pixel, specified by the + /// pixel's x and y coordinates. + /// + /// The x-coordinate of the pixel. + /// The y-coordinate of the pixel. + /// The index of the byte that contains the specified pixel. + public int GetByteIndex(int x, int y) + { + return y * RowStride + (x >> 3); + } + + /// + /// Returns the bitmap as a byte array. + /// + /// The byte array of this bitmap. + public byte[] GetByteArray() + { + return bitmap; + } + + /// + /// Returns a byte from the bitmap byte array. + /// + /// The array index that specifies the position of the wanted byte. + /// The byte at the requested index. + public byte GetByte(int index) + { + return bitmap[index]; + } + + /// + /// Sets the given value at the given array index position. + /// + /// The array index that specifies the position of a byte. + /// The byte that should be set. + public void SetByte(int index, byte value) + { + bitmap[index] = value; + } + + /// + /// Converts the byte at specified index into an integer and returns the value. + /// if the given index is out of bound. + /// + /// The array index that specifies the position of the wanted byte. + /// The converted byte at the requested index as an integer. + public int GetByteAsInteger(int index) + { + return bitmap[index] & 0xff; + } + + /// + /// Computes the offset of the given x coordinate in its byte. The method uses optimized modulo + /// operation for a better performance. + /// + /// The x coordinate of a pixel. + /// The bit offset of a pixel in its byte. + public int GetBitOffset(int x) + { + // The same as x % 8. + // The rightmost three bits are 1. The value masks all bits upon the value "7". + return x & 0x07; + } + } +} \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/Jbig2Bitmaps.cs b/src/UglyToad.PdfPig/Filters/Jbig2/Jbig2Bitmaps.cs new file mode 100644 index 000000000..69522d3ed --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/Jbig2Bitmaps.cs @@ -0,0 +1,299 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + using System; + + internal static class Jbig2Bitmaps + { + /// + /// Returns the specified rectangle area of the bitmap. + /// + /// A that specifies the requested image section. + /// src the given bitmap + /// A that represents the requested image section. + public static Jbig2Bitmap Extract(Jbig2Rectangle roi, Jbig2Bitmap src) + { + var dst = new Jbig2Bitmap(roi.Width, roi.Height); + + int upShift = roi.X & 0x07; + int downShift = 8 - upShift; + int dstLineStartIdx = 0; + + int padding = (8 - dst.Width & 0x07); + int srcLineStartIdx = src.GetByteIndex(roi.X, roi.Y); + int srcLineEndIdx = src.GetByteIndex(roi.X + roi.Width - 1, roi.Y); + bool usePadding = dst.RowStride == srcLineEndIdx + 1 - srcLineStartIdx; + + int maxY = roi.GetMaxY(); + for (int y = roi.Y; y < maxY; y++) + { + int srcIdx = srcLineStartIdx; + int dstIdx = dstLineStartIdx; + + if (srcLineStartIdx == srcLineEndIdx) + { + var pixels = (byte)(src.GetByte(srcIdx) << upShift); + dst.SetByte(dstIdx, Unpad(padding, pixels)); + } + else if (upShift == 0) + { + for (int x = srcLineStartIdx; x <= srcLineEndIdx; x++) + { + byte value = src.GetByte(srcIdx++); + + if (x == srcLineEndIdx && usePadding) + { + value = Unpad(padding, value); + } + + dst.SetByte(dstIdx++, value); + } + } + else + { + CopyLine(src, dst, upShift, downShift, padding, srcLineStartIdx, srcLineEndIdx, + usePadding, srcIdx, dstIdx); + } + + srcLineStartIdx += src.RowStride; + srcLineEndIdx += src.RowStride; + dstLineStartIdx += dst.RowStride; + } + + return dst; + } + + /// + /// The method combines two given bytes with an logical operator. + /// The JBIG2 Standard specifies 5 possible combinations of bytes. + /// Hint: Please take a look at ISO/IEC 14492:2001 (E) for detailed definition + /// and description of the operators. + /// + /// The value that should be combined with value2. + /// The value that should be combined with value1. + /// The specified combination operator. + /// The combination result. + public static byte CombineBytes(byte value1, byte value2, CombinationOperator op) + { + switch (op) + { + case CombinationOperator.OR: + return (byte)(value2 | value1); + + case CombinationOperator.AND: + return (byte)(value2 & value1); + + case CombinationOperator.XOR: + return (byte)(value2 ^ value1); + + case CombinationOperator.XNOR: + return (byte)~(value1 ^ value2); + + case CombinationOperator.REPLACE: + default: + // Old value is replaced by new value. + return value2; + } + } + + /// + /// This method combines a given bitmap with the current instance. + /// Parts of the bitmap to blit that are outside of the target bitmap will be ignored. + /// + /// The bitmap that should be combined with the one of the current instance. + /// The destination bitmap. + /// The x coordinate where the upper left corner of the bitmap to blit should be positioned. + /// The y coordinate where the upper left corner of the bitmap to blit should be positioned. + /// The combination operator for combining two pixels. + public static void Blit(Jbig2Bitmap src, Jbig2Bitmap dst, int x, int y, CombinationOperator combinationOperator) + { + int startLine = 0; + int srcStartIdx = 0; + int srcEndIdx = (src.RowStride - 1); + + // Ignore those parts of the source bitmap which would be placed outside the target bitmap. + if (x < 0) + { + srcStartIdx = -x; + x = 0; + } + else if (x + src.Width > dst.Width) + { + srcEndIdx -= (src.Width + x - dst.Width); + } + + if (y < 0) + { + startLine = -y; + y = 0; + srcStartIdx += src.RowStride; + srcEndIdx += src.RowStride; + } + else if (y + src.Height > dst.Height) + { + startLine = src.Height + y - dst.Height; + } + + int shiftVal1 = x & 0x07; + int shiftVal2 = 8 - shiftVal1; + + int padding = src.Width & 0x07; + int toShift = shiftVal2 - padding; + + bool useShift = (shiftVal2 & 0x07) != 0; + bool specialCase = src.Width <= ((srcEndIdx - srcStartIdx) << 3) + shiftVal2; + + int dstStartIdx = dst.GetByteIndex(x, y); + + int lastLine = Math.Min(src.Height, startLine + dst.Height); + + if (!useShift) + { + BlitUnshifted(src, dst, startLine, lastLine, dstStartIdx, srcStartIdx, srcEndIdx, + combinationOperator); + } + else if (specialCase) + { + BlitSpecialShifted(src, dst, startLine, lastLine, dstStartIdx, srcStartIdx, srcEndIdx, + toShift, shiftVal1, shiftVal2, combinationOperator); + } + else + { + BlitShifted(src, dst, startLine, lastLine, dstStartIdx, srcStartIdx, srcEndIdx, toShift, + shiftVal1, shiftVal2, combinationOperator, padding); + } + } + + private static void CopyLine(Jbig2Bitmap src, Jbig2Bitmap dst, int sourceUpShift, int sourceDownShift, + int padding, int firstSourceByteOfLine, int lastSourceByteOfLine, bool usePadding, + int sourceOffset, int targetOffset) + { + for (int x = firstSourceByteOfLine; x < lastSourceByteOfLine; x++) + { + + if (sourceOffset + 1 < src.GetByteArray().Length) + { + bool isLastByte = x + 1 == lastSourceByteOfLine; + var value = (byte)(src.GetByte(sourceOffset++) << sourceUpShift + | ((int)(uint)(src.GetByte(sourceOffset) & 0xff) >> sourceDownShift)); + + if (isLastByte && !usePadding) + { + value = Unpad(padding, value); + } + + dst.SetByte(targetOffset++, value); + + if (isLastByte && usePadding) + { + value = Unpad(padding, + (byte)((src.GetByte(sourceOffset) & 0xff) << sourceUpShift)); + dst.SetByte(targetOffset, value); + } + + } + else + { + var value = (byte)(src.GetByte(sourceOffset++) << sourceUpShift & 0xff); + dst.SetByte(targetOffset++, value); + } + } + } + + /// + /// Removes unnecessary bits from a byte. + /// + /// The amount of unnecessary bits. + /// The byte that should be cleaned up. + /// A cleaned byte. + private static byte Unpad(int padding, byte value) + { + return (byte)(value >> padding << padding); + } + + private static void BlitUnshifted(Jbig2Bitmap src, Jbig2Bitmap dst, int startLine, int lastLine, + int dstStartIdx, int srcStartIdx, int srcEndIdx, CombinationOperator op) + { + + for (int dstLine = startLine; dstLine < lastLine; dstLine++, dstStartIdx += dst + .RowStride, srcStartIdx += src.RowStride, srcEndIdx += src.RowStride) + { + int dstIdx = dstStartIdx; + + // Go through the bytes in a line of the Symbol + for (int srcIdx = srcStartIdx; srcIdx <= srcEndIdx; srcIdx++) + { + byte oldByte = dst.GetByte(dstIdx); + byte newByte = src.GetByte(srcIdx); + dst.SetByte(dstIdx++, CombineBytes(oldByte, newByte, op)); + } + } + } + + private static void BlitSpecialShifted(Jbig2Bitmap src, Jbig2Bitmap dst, int startLine, int lastLine, + int dstStartIdx, int srcStartIdx, int srcEndIdx, int toShift, int shiftVal1, + int shiftVal2, CombinationOperator op) + { + + for (int dstLine = startLine; dstLine < lastLine; dstLine++, dstStartIdx += dst + .RowStride, srcStartIdx += src.RowStride, srcEndIdx += src.RowStride) + { + short register = 0; + int dstIdx = dstStartIdx; + + // Go through the bytes in a line of the Symbol + for (int srcIdx = srcStartIdx; srcIdx <= srcEndIdx; srcIdx++) + { + byte oldByte = dst.GetByte(dstIdx); + register = (short)(((int)register | src.GetByteAsInteger(srcIdx)) << shiftVal2); + byte newByte = (byte)(register >> 8); + + if (srcIdx == srcEndIdx) + { + newByte = Unpad(toShift, newByte); + } + + dst.SetByte(dstIdx++, CombineBytes(oldByte, newByte, op)); + register <<= shiftVal1; + } + } + } + + private static void BlitShifted(Jbig2Bitmap src, Jbig2Bitmap dst, int startLine, int lastLine, + int dstStartIdx, int srcStartIdx, int srcEndIdx, int toShift, int shiftVal1, + int shiftVal2, CombinationOperator op, int padding) + { + + for (int dstLine = startLine; dstLine < lastLine; dstLine++, dstStartIdx += dst + .RowStride, srcStartIdx += src.RowStride, srcEndIdx += src.RowStride) + { + short register = 0; + int dstIdx = dstStartIdx; + + // Go through the bytes in a line of the symbol + for (int srcIdx = srcStartIdx; srcIdx <= srcEndIdx; srcIdx++) + { + byte oldByte = dst.GetByte(dstIdx); + register = (short)(((int)register | src.GetByteAsInteger(srcIdx)) << shiftVal2); + + byte newByte = (byte)(register >> 8); + dst.SetByte(dstIdx++, CombineBytes(oldByte, newByte, op)); + + register <<= shiftVal1; + + if (srcIdx == srcEndIdx) + { + newByte = (byte)(register >> (8 - shiftVal2)); + + if (padding != 0) + { + newByte = Unpad(8 + toShift, newByte); + } + + oldByte = dst.GetByte(dstIdx); + dst.SetByte(dstIdx, CombineBytes(oldByte, newByte, op)); + } + } + } + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/Jbig2Document.cs b/src/UglyToad.PdfPig/Filters/Jbig2/Jbig2Document.cs new file mode 100644 index 000000000..d48b5c0a4 --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/Jbig2Document.cs @@ -0,0 +1,298 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + using System; + using System.Collections.Generic; + using System.IO; + + /// + /// This class represents the document structure with its pages and global segments. + /// + internal sealed class Jbig2Document : IDisposable + { + // ID string in file header, see ISO/IEC 14492:2001, D.4.1 + private static readonly int[] FILE_HEADER_ID = { 0x97, 0x4A, 0x42, 0x32, 0x0D, 0x0A, 0x1A, 0x0A }; + + // This map contains all pages of this document. The key is the number of the page. + private readonly Dictionary pages = new Dictionary(); + + // The length of the file header if exists + private short fileHeaderLength = 9; + + /// + /// According to D.4.2 - File header bit 0 + /// This flag contains information about the file organization: + /// 1: for sequential + /// 0: for random access + /// You can use the constants and . + /// + private short organisationType = (short)SEQUENTIAL; + + public static readonly int RANDOM = 0; + public static readonly int SEQUENTIAL = 1; + + /// + /// According to D.4.2 - Bit 1 + /// true: if amount of pages is unknown, amount of pages field is not present. + /// false: if there is a field in the file header where the amount of pages can be read + /// + public bool IsNumberOfPageUnknown { get; private set; } = true; + + /// + /// According to D.4.3 - Number of pages field (4 bytes). + /// Only present if is false. + /// + public int NumberOfPages { get; private set; } + + // Defines whether extended Template is used. + private bool gbUseExtTemplate; + + // This is the source data stream wrapped into a SubInputStream. + private readonly SubInputStream subInputStream; + + // Holds a load of segments, that aren't associated with a page. + private Jbig2Globals globalSegments; + + public Jbig2Document(IImageInputStream input) + : this(input, null) + { + } + + public Jbig2Document(IImageInputStream input, Jbig2Globals globals) + { + if (input == null) + { + throw new ArgumentNullException(nameof(input), " must not be null"); + } + + subInputStream = new SubInputStream(input, 0, long.MaxValue); + globalSegments = globals; + + MapStream(); + } + + /// + /// Retrieves the segment with the given segment number considering only segments + /// that aren't associated with a page. + /// + /// The number of the requested segment. + /// The requested . + internal SegmentHeader GetGlobalSegment(int segmentNumber) + { + if (null != globalSegments) + { + return globalSegments.GetSegment(segmentNumber); + } + return null; + } + + /// + /// Retrieves a specified by the given page number. + /// + /// The page number of the requested . + /// The requested . + public Jbig2Page GetPage(int pageNumber) + { + return pages.ContainsKey(pageNumber) ? pages[pageNumber] : null; + } + + /// + /// Diposes the supplied . + /// + public void Dispose() + { + subInputStream.Dispose(); + } + + /// + /// Retrieves the amount of pages in this JBIG2 document. If the pages are striped, + /// the document will be completely parsed and the amount of pages will be gathered. + /// + /// The amount of pages in this JBIG2 document. + internal int GetAmountOfPages() + { + if (IsNumberOfPageUnknown || NumberOfPages == 0) + { + if (pages.Count == 0) + { + MapStream(); + } + + return pages.Count; + } + else + { + return NumberOfPages; + } + } + + + /// + /// This method maps the stream and stores all segments. + /// + private void MapStream() + { + var segments = new List(); + + long offset = 0; + int segmentType = 0; + + // Parse the file header if there is one. + if (IsFileHeaderPresent()) + { + ParseFileHeader(); + offset += fileHeaderLength; + } + + if (globalSegments == null) + { + globalSegments = new Jbig2Globals(); + } + + Jbig2Page page; + + // If organisation type is random-access: walk through the segment headers until EOF segment + // appears (specified with segment number 51) + while (segmentType != 51 && !ReachedEndOfStream(offset)) + { + var segment = new SegmentHeader(this, subInputStream, offset, + organisationType); + + int associatedPage = segment.PageAssociation; + segmentType = segment.SegmentType; + + if (associatedPage != 0) + { + page = GetPage(associatedPage); + if (page == null) + { + page = new Jbig2Page(this, associatedPage); + pages[associatedPage] = page; + } + page.Add(segment); + } + else + { + globalSegments.AddSegment(segment.SegmentNumber, segment); + } + segments.Add(segment); + + offset = subInputStream.Position; + + // Sequential organization skips data part and sets the offset + if (organisationType == SEQUENTIAL) + { + offset += segment.SegmentDataLength; + } + } + + // Random organization: segment headers are finished. Data part starts and the offset can be set. + DetermineRandomDataOffsets(segments, offset); + } + + private bool IsFileHeaderPresent() + { + subInputStream.Mark(); + + foreach (int magicByte in FILE_HEADER_ID) + { + if (magicByte != subInputStream.Read()) + { + subInputStream.Reset(); + return false; + } + } + + subInputStream.Reset(); + return true; + } + + /// + /// Determines the start of the data parts and sets the offset. + /// + private void DetermineRandomDataOffsets(List segments, long offset) + { + if (organisationType == RANDOM) + { + foreach (SegmentHeader s in segments) + { + s.SegmentDataStartOffset = offset; + offset += s.SegmentDataLength; + } + } + } + + /// + /// This method reads the stream and sets variables for information about organization type and length etc. + /// + private void ParseFileHeader() + { + subInputStream.Seek(0); + + // D.4.1 - ID string, read will be skipped + subInputStream.SkipBytes(8); + + // D.4.2 Header flag (1 byte): + + // Bit 3-7 are reserved and must be 0 + subInputStream.ReadBits(5); + + // Bit 2 - Indicates if extended templates are used + if (subInputStream.ReadBit() == 1) + { + gbUseExtTemplate = true; + } + + // Bit 1 - Indicates if amount of pages are unknown + if (subInputStream.ReadBit() != 1) + { + IsNumberOfPageUnknown = false; + } + + // Bit 0 - Indicates file organisation type + organisationType = (short)subInputStream.ReadBit(); + + // D.4.3 Number of pages (field is only present if amount of pages are 'NOT unknown') + if (!IsNumberOfPageUnknown) + { + NumberOfPages = (int)subInputStream.ReadUnsignedInt(); + fileHeaderLength = 13; + } + + } + + /// + /// This method checks, if the stream is at its end to avoid + /// s and reads 32 bits. + /// + /// + /// true, if if end of stream reached. false, if there are more bytes to read + private bool ReachedEndOfStream(long offset) + { + try + { + subInputStream.Seek(offset); + subInputStream.ReadBits(32); + return false; + } + catch (EndOfStreamException) + { + return true; + } + } + + internal Jbig2Globals GetGlobalSegments() + { + return globalSegments; + } + + internal bool IsAmountOfPagesUnknown() + { + return IsNumberOfPageUnknown; + } + + internal bool IsGbUseExtTemplate() + { + return gbUseExtTemplate; + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/Jbig2Exception.cs b/src/UglyToad.PdfPig/Filters/Jbig2/Jbig2Exception.cs new file mode 100644 index 000000000..388407182 --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/Jbig2Exception.cs @@ -0,0 +1,22 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + using System; + + internal class Jbig2Exception : Exception + { + public Jbig2Exception() + { + } + + public Jbig2Exception(string message) + : base(message) + { + } + + public Jbig2Exception(string message, Exception innerException) + : base(message, innerException) + { + } + } + +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/Jbig2Globals.cs b/src/UglyToad.PdfPig/Filters/Jbig2/Jbig2Globals.cs new file mode 100644 index 000000000..76abb8f4d --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/Jbig2Globals.cs @@ -0,0 +1,26 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + using System.Collections.Generic; + + /// + /// This class stores segments, that aren't associated to a page. + /// If the data is embedded in another format, for example PDF, this segments might be stored separately in the file. + /// This segments will be decoded on demand and all results are stored in the document object and can be retrieved from + /// there. + /// + internal sealed class Jbig2Globals + { + // This map contains all segments, that are not associated with a page. The key is the segment number. + private readonly Dictionary globalSegments = new Dictionary(); + + internal SegmentHeader GetSegment(int segmentNumber) + { + return globalSegments[segmentNumber]; + } + + internal void AddSegment(int segmentNumber, SegmentHeader segment) + { + globalSegments[segmentNumber] = segment; + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/Jbig2Page.cs b/src/UglyToad.PdfPig/Filters/Jbig2/Jbig2Page.cs new file mode 100644 index 000000000..e61d21137 --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/Jbig2Page.cs @@ -0,0 +1,363 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + using System.Collections.Generic; + using UglyToad.PdfPig.Util; + + /// + /// This class represents a JBIG2 page. + /// + internal sealed class Jbig2Page + { + // This list contains all segments of this page, sorted by segment number in ascending order. + private readonly SortedDictionary segments = new SortedDictionary(); + + // NOTE: page number != segmentList index + private readonly int pageNumber; + + // The page bitmap that represents the page buffer + private Jbig2Bitmap pageBitmap; + + private int finalHeight; + private int finalWidth; + private int resolutionX; + private int resolutionY; + + private readonly Jbig2Document document; + + internal Jbig2Page(Jbig2Document document, int pageNumber) + { + this.document = document; + this.pageNumber = pageNumber; + } + + /// + /// This method searches for a segment specified by its number. + /// + /// Segment number of the segment to search for. + /// The retrieved or null. + internal SegmentHeader GetSegment(int number) + { + SegmentHeader s = segments.ContainsKey(number) ? segments[number] : null; + + if (null != s) + { + return s; + } + + if (null != document) + { + return document.GetGlobalSegment(number); + } + return null; + } + + /// + /// Returns the associated page information segment. + /// + /// The associated segment or null if not available. + internal SegmentHeader GetPageInformationSegment() + { + foreach (SegmentHeader s in segments.Values) + { + if (s.SegmentType == 48) + { + return s; + } + } + return null; + } + + /// + /// Returns the decoded bitmap if present.Otherwise the page bitmap will be composed before returning + /// the result. + /// + /// The result of decoding a page + /// + /// + public Jbig2Bitmap GetBitmap() + { + if (null == pageBitmap) + { + ComposePageBitmap(); + } + return pageBitmap; + } + + /// + /// This method composes the bitmaps of segments to a page and stores the page as a . + /// + /// + /// + private void ComposePageBitmap() + { + if (pageNumber > 0) + { + // Page 79, 1) Decoding the page information segment + PageInformation pageInformation = (PageInformation)GetPageInformationSegment() + .GetSegmentData(); + CreatePage(pageInformation); + ClearSegmentData(); + } + } + + private void CreatePage(PageInformation pageInformation) + { + if (!pageInformation.IsStriped || pageInformation.BitmapHeight != -1) + { + // Page 79, 4) + CreateNormalPage(pageInformation); + } + else + { + CreateStripedPage(pageInformation); + } + } + + private void CreateNormalPage(PageInformation pageInformation) + { + pageBitmap = new Jbig2Bitmap(pageInformation.BitmapWidth, pageInformation.BitmapHeight); + + // Page 79, 3) + // If default pixel value is not 0, byte will be filled with 0xff + if (pageInformation.DefaultPixelValue != 0) + { + ArrayHelper.Fill(pageBitmap.GetByteArray(), (byte)0xff); + } + + foreach (SegmentHeader s in segments.Values) + { + // Page 79, 5) + switch (s.SegmentType) + { + case 6: // Immediate text region + case 7: // Immediate lossless text region + case 22: // Immediate halftone region + case 23: // Immediate lossless halftone region + case 38: // Immediate generic region + case 39: // Immediate lossless generic region + case 42: // Immediate generic refinement region + case 43: // Immediate lossless generic refinement region + IRegion r = (IRegion)s.GetSegmentData(); + + Jbig2Bitmap regionBitmap = r.GetRegionBitmap(); + + if (FitsPage(pageInformation, regionBitmap)) + { + pageBitmap = regionBitmap; + } + else + { + RegionSegmentInformation regionInfo = r.RegionInfo; + CombinationOperator op = GetCombinationOperator(pageInformation, + regionInfo.CombinationOperator); + Jbig2Bitmaps.Blit(regionBitmap, pageBitmap, regionInfo.X, + regionInfo.Y, op); + } + + break; + } + } + } + + /// + /// Check if we have only one region that forms the complete page. If the dimension equals the page's dimension set + /// the region's bitmap as the page's bitmap. Otherwise we have to blit the smaller region's bitmap into the page's + /// bitmap. + /// + private bool FitsPage(PageInformation pageInformation, Jbig2Bitmap regionBitmap) + { + return CountRegions() == 1 && pageInformation.DefaultPixelValue == 0 + && pageInformation.BitmapWidth == regionBitmap.Width + && pageInformation.BitmapHeight == regionBitmap.Height; + } + + private void CreateStripedPage(PageInformation pageInformation) + { + List pageStripes = CollectPageStripes(); + + pageBitmap = new Jbig2Bitmap(pageInformation.BitmapWidth, finalHeight); + + int startLine = 0; + foreach (ISegmentData sd in pageStripes) + { + if (sd is EndOfStripe) + { + startLine = ((EndOfStripe)sd).GetLineNumber() + 1; + } + else + { + IRegion r = (IRegion)sd; + RegionSegmentInformation regionInfo = r.RegionInfo; + CombinationOperator op = GetCombinationOperator(pageInformation, + regionInfo.CombinationOperator); + Jbig2Bitmaps.Blit(r.GetRegionBitmap(), pageBitmap, regionInfo.X, startLine, + op); + } + } + } + + private List CollectPageStripes() + { + List pageStripes = new List(); + foreach (SegmentHeader s in segments.Values) + { + // Page 79, 5) + switch (s.SegmentType) + { + case 6: // Immediate text region + case 7: // Immediate lossless text region + case 22: // Immediate halftone region + case 23: // Immediate lossless halftone region + case 38: // Immediate generic region + case 39: // Immediate lossless generic region + case 42: // Immediate generic refinement region + case 43: // Immediate lossless generic refinement region + IRegion r = (IRegion)s.GetSegmentData(); + pageStripes.Add(r); + break; + + case 50: // End of stripe + EndOfStripe eos = (EndOfStripe)s.GetSegmentData(); + pageStripes.Add(eos); + finalHeight = eos.GetLineNumber() + 1; + break; + } + } + + return pageStripes; + } + + /// + /// This method counts the regions segments. If there is only one region, the bitmap + /// of this segment is equal to the page bitmap and blitting is not necessary. + /// + /// Number of regions. + private int CountRegions() + { + int regionCount = 0; + + foreach (SegmentHeader s in segments.Values) + { + switch (s.SegmentType) + { + case 6: // Immediate text region + case 7: // Immediate lossless text region + case 22: // Immediate halftone region + case 23: // Immediate lossless halftone region + case 38: // Immediate generic region + case 39: // Immediate lossless generic region + case 42: // Immediate generic refinement region + case 43: // Immediate lossless generic refinement region + regionCount++; + break; + } + } + + return regionCount; + } + + /// + /// This method checks and sets, which combination operator shall be used. + /// + /// object. + /// The combination operator, specified by actual segment. + /// the new combination operator. + private CombinationOperator GetCombinationOperator(PageInformation pi, + CombinationOperator newOperator) + { + if (pi.IsCombinationOperatorOverrideAllowed) + { + return newOperator; + } + else + { + return pi.CombinationOperator; + } + } + + /// + /// Adds a into the page's segments map. + /// + /// The segment to be added. + internal void Add(SegmentHeader segment) + { + segments[segment.SegmentNumber] = segment; + } + + /// + /// Resets the memory-critical segments to force on-demand-decoding and to avoid + /// holding the segments' bitmap too long. + /// + private void ClearSegmentData() + { + var keySet = segments.Keys; + + foreach (int key in keySet) + { + segments[key].CleanSegmentData(); + } + } + + /// + /// Reset memory-critical parts of page. + /// + internal void ClearPageData() + { + pageBitmap = null; + } + + internal int GetHeight() + { + if (finalHeight == 0) + { + PageInformation pi = GetPageInformation(); + if (pi.BitmapHeight == -1) + { + GetBitmap(); + } + else + { + finalHeight = pi.BitmapHeight; + } + } + return finalHeight; + } + + internal int GetWidth() + { + if (finalWidth == 0) + { + finalWidth = GetPageInformation().BitmapWidth; + } + return finalWidth; + } + + internal int GetResolutionX() + { + if (resolutionX == 0) + { + resolutionX = GetPageInformation().ResolutionX; + } + return resolutionX; + } + + internal int GetResolutionY() + { + if (resolutionY == 0) + { + resolutionY = GetPageInformation().ResolutionY; + } + return resolutionY; + } + + private PageInformation GetPageInformation() + { + return (PageInformation)GetPageInformationSegment().GetSegmentData(); + } + + public override sealed string ToString() + { + return GetType().Name + " (Page number: " + pageNumber + ")"; + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/Jbig2Rectangle.cs b/src/UglyToad.PdfPig/Filters/Jbig2/Jbig2Rectangle.cs new file mode 100644 index 000000000..526f2c70a --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/Jbig2Rectangle.cs @@ -0,0 +1,40 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + internal readonly struct Jbig2Rectangle + { + /// + /// The x-coordinate of the upper-left corner of the rectangle. + /// + public int X { get; } + + /// + /// The y-coordinate of the upper-left corner of the rectangle. + /// + public int Y { get; } + + /// + /// The width of the rectangle. + /// + public int Width { get; } + + /// + /// The height of the rectangle. + /// + public int Height { get; } + + /// + /// Create a new . + /// + /// The x-coordinate of the upper-left corner of the rectangle. + /// The y-coordinate of the upper-left corner of the rectangle. + /// The width of the rectangle. + /// The height of the rectangle. + public Jbig2Rectangle(int x, int y, int width, int height) + { + X = x; + Y = y; + Width = width; + Height = height; + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/MMRConstants.cs b/src/UglyToad.PdfPig/Filters/Jbig2/MMRConstants.cs new file mode 100644 index 000000000..b3a2c5b8b --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/MMRConstants.cs @@ -0,0 +1,272 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + /// + /// Constants for MMR (de)compression. + /// + internal static class MMRConstants + { + public const int COMP_FAXG3 = 0; + public const int COMP_FAXG4 = 1; + public const int COMP_MMR = 2; + public const int COMP_RLE = 3; + public const int COMP_FAXG3_2D = 4; + + public const int NOMASK = 0xFFFF; + public const int INCOMP = -4; + public const int EOF = -3; + public const int INVALID = -2; + public const int EOL = -1; + public const int CODE_P = 0; + public const int CODE_H = 1; + public const int CODE_V0 = 2; + public const int CODE_VR1 = 3; + public const int CODE_VR2 = 4; + public const int CODE_VR3 = 5; + public const int CODE_VL1 = 6; + public const int CODE_VL2 = 7; + public const int CODE_VL3 = 8; + public const int CODE_EXT2D = 9; + public const int CODE_EXT1D = 10; + public const int CODE_EOL = 11; + public const int CODE_EOF = 12; + public const int CODE_MAX = 12; + + public static readonly int[][] ModeCodes = new[]{ new []{ 4, 0x1, CODE_P }, // 0001 pass + new []{ 3, 0x1, CODE_H }, // 001 horizontal + new []{ 1, 0x1, CODE_V0 }, // 1 vert 0 + new []{ 3, 0x3, CODE_VR1 }, // 011 vert r 1 + new []{ 6, 0x3, CODE_VR2 }, // 000011 vert r 2 + new []{ 7, 0x3, CODE_VR3 }, // 0000011 vert r 3 + new []{ 3, 0x2, CODE_VL1 }, // 010 vert l 1 + new []{ 6, 0x2, CODE_VL2 }, // 000010 vert l 2 + new []{ 7, 0x2, CODE_VL3 }, // 0000010 vert l 3 + new []{ 10, 0xf, CODE_EXT2D }, // 0000001111 + new []{ 12, 0xf, CODE_EXT1D }, // 000000001111 + new []{ 12, 0x1, EOL }, // 000000000001 + }; + + public static readonly int[][] WhiteCodes = new[]{ new []{ 4, 0x07, 2 }, // 0111 + new []{ 4, 0x08, 3 }, // 1000 + new []{ 4, 0x0B, 4 }, // 1011 + new []{ 4, 0x0C, 5 }, // 1100 + new []{ 4, 0x0E, 6 }, // 1110 + new []{ 4, 0x0F, 7 }, // 1111 + new []{ 5, 0x12, 128 }, // 1001 0 + new []{ 5, 0x13, 8 }, // 1001 1 + new []{ 5, 0x14, 9 }, // 1010 0 + new []{ 5, 0x1B, 64 }, // 1101 1 + new []{ 5, 0x07, 10 }, // 0011 1 + new []{ 5, 0x08, 11 }, // 0100 0 + new []{ 6, 0x17, 192 }, // 0101 11 + new []{ 6, 0x18, 1664 }, // 0110 00 + new []{ 6, 0x2A, 16 }, // 1010 10 + new []{ 6, 0x2B, 17 }, // 1010 11 + new []{ 6, 0x03, 13 }, // 0000 11 + new []{ 6, 0x34, 14 }, // 1101 00 + new []{ 6, 0x35, 15 }, // 1101 01 + new []{ 6, 0x07, 1 }, // 0001 11 + new []{ 6, 0x08, 12 }, // 0010 00 + new []{ 7, 0x13, 26 }, // 0010 011 + new []{ 7, 0x17, 21 }, // 0010 111 + new []{ 7, 0x18, 28 }, // 0011 000 + new []{ 7, 0x24, 27 }, // 0100 100 + new []{ 7, 0x27, 18 }, // 0100 111 + new []{ 7, 0x28, 24 }, // 0101 000 + new []{ 7, 0x2B, 25 }, // 0101 011 + new []{ 7, 0x03, 22 }, // 0000 011 + new []{ 7, 0x37, 256 }, // 0110 111 + new []{ 7, 0x04, 23 }, // 0000 100 + new []{ 7, 0x08, 20 }, // 0001 000 + new []{ 7, 0xC, 19 }, // 0001 100 + new []{ 8, 0x12, 33 }, // 0001 0010 + new []{ 8, 0x13, 34 }, // 0001 0011 + new []{ 8, 0x14, 35 }, // 0001 0100 + new []{ 8, 0x15, 36 }, // 0001 0101 + new []{ 8, 0x16, 37 }, // 0001 0110 + new []{ 8, 0x17, 38 }, // 0001 0111 + new []{ 8, 0x1A, 31 }, // 0001 1010 + new []{ 8, 0x1B, 32 }, // 0001 1011 + new []{ 8, 0x02, 29 }, // 0000 0010 + new []{ 8, 0x24, 53 }, // 0010 0100 + new []{ 8, 0x25, 54 }, // 0010 0101 + new []{ 8, 0x28, 39 }, // 0010 1000 + new []{ 8, 0x29, 40 }, // 0010 1001 + new []{ 8, 0x2A, 41 }, // 0010 1010 + new []{ 8, 0x2B, 42 }, // 0010 1011 + new []{ 8, 0x2C, 43 }, // 0010 1100 + new []{ 8, 0x2D, 44 }, // 0010 1101 + new []{ 8, 0x03, 30 }, // 0000 0011 + new []{ 8, 0x32, 61 }, // 0011 0010 + new []{ 8, 0x33, 62 }, // 0011 0011 + new []{ 8, 0x34, 63 }, // 0011 0100 + new []{ 8, 0x35, 0 }, // 0011 0101 + new []{ 8, 0x36, 320 }, // 0011 0110 + new []{ 8, 0x37, 384 }, // 0011 0111 + new []{ 8, 0x04, 45 }, // 0000 0100 + new []{ 8, 0x4A, 59 }, // 0100 1010 + new []{ 8, 0x4B, 60 }, // 0100 1011 + new []{ 8, 0x5, 46 }, // 0000 0101 + new []{ 8, 0x52, 49 }, // 0101 0010 + new []{ 8, 0x53, 50 }, // 0101 0011 + new []{ 8, 0x54, 51 }, // 0101 0100 + new []{ 8, 0x55, 52 }, // 0101 0101 + new []{ 8, 0x58, 55 }, // 0101 1000 + new []{ 8, 0x59, 56 }, // 0101 1001 + new []{ 8, 0x5A, 57 }, // 0101 1010 + new []{ 8, 0x5B, 58 }, // 0101 1011 + new []{ 8, 0x64, 448 }, // 0110 0100 + new []{ 8, 0x65, 512 }, // 0110 0101 + new []{ 8, 0x67, 640 }, // 0110 0111 + new []{ 8, 0x68, 576 }, // 0110 1000 + new []{ 8, 0x0A, 47 }, // 0000 1010 + new []{ 8, 0x0B, 48 }, // 0000 1011 + new []{ 9, 0x01, INVALID }, // 0000 0000 1 + new []{ 9, 0x98, 1472 }, // 0100 1100 0 + new []{ 9, 0x99, 1536 }, // 0100 1100 1 + new []{ 9, 0x9A, 1600 }, // 0100 1101 0 + new []{ 9, 0x9B, 1728 }, // 0100 1101 1 + new []{ 9, 0xCC, 704 }, // 0110 0110 0 + new []{ 9, 0xCD, 768 }, // 0110 0110 1 + new []{ 9, 0xD2, 832 }, // 0110 1001 0 + new []{ 9, 0xD3, 896 }, // 0110 1001 1 + new []{ 9, 0xD4, 960 }, // 0110 1010 0 + new []{ 9, 0xD5, 1024 }, // 0110 1010 1 + new []{ 9, 0xD6, 1088 }, // 0110 1011 0 + new []{ 9, 0xD7, 1152 }, // 0110 1011 1 + new []{ 9, 0xD8, 1216 }, // 0110 1100 0 + new []{ 9, 0xD9, 1280 }, // 0110 1100 1 + new []{ 9, 0xDA, 1344 }, // 0110 1101 0 + new []{ 9, 0xDB, 1408 }, // 0110 1101 1 + new []{ 10, 0x01, INVALID }, // 0000 0000 01 + new []{ 11, 0x01, INVALID }, // 0000 0000 001 + new []{ 11, 0x08, 1792 }, // 0000 0001 000 + new []{ 11, 0x0C, 1856 }, // 0000 0001 100 + new []{ 11, 0x0D, 1920 }, // 0000 0001 101 + new []{ 12, 0x00, EOF }, // 0000 0000 0000 + new []{ 12, 0x01, EOL }, // 0000 0000 0001 + new []{ 12, 0x12, 1984 }, // 0000 0001 0010 + new []{ 12, 0x13, 2048 }, // 0000 0001 0011 + new []{ 12, 0x14, 2112 }, // 0000 0001 0100 + new []{ 12, 0x15, 2176 }, // 0000 0001 0101 + new []{ 12, 0x16, 2240 }, // 0000 0001 0110 + new []{ 12, 0x17, 2304 }, // 0000 0001 0111 + new []{ 12, 0x1C, 2368 }, // 0000 0001 1100 + new []{ 12, 0x1D, 2432 }, // 0000 0001 1101 + new []{ 12, 0x1E, 2496 }, // 0000 0001 1110 + new []{ 12, 0x1F, 2560 }, // 0000 0001 1111 + }; + public static readonly int MAX_WHITE_RUN = 2560; + + public static readonly int[][] BlackCodes = new[]{ new []{ 2, 0x02, 3 }, // 10 + new []{ 2, 0x03, 2 }, // 11 + new []{ 3, 0x02, 1 }, // 010 + new []{ 3, 0x03, 4 }, // 011 + new []{ 4, 0x02, 6 }, // 0010 + new []{ 4, 0x03, 5 }, // 0011 + new []{ 5, 0x03, 7 }, // 0001 1 + new []{ 6, 0x04, 9 }, // 0001 00 + new []{ 6, 0x05, 8 }, // 0001 01 + new []{ 7, 0x04, 10 }, // 0000 100 + new []{ 7, 0x05, 11 }, // 0000 101 + new []{ 7, 0x07, 12 }, // 0000 111 + new []{ 8, 0x04, 13 }, // 0000 0100 + new []{ 8, 0x07, 14 }, // 0000 0111 + new []{ 9, 0x01, INVALID }, // 0000 0000 1 + new []{ 9, 0x18, 15 }, // 0000 1100 0 + new []{ 10, 0x01, INVALID }, // 0000 0000 01 + new []{ 10, 0x17, 16 }, // 0000 0101 11 + new []{ 10, 0x18, 17 }, // 0000 0110 00 + new []{ 10, 0x37, 0 }, // 0000 1101 11 + new []{ 10, 0x08, 18 }, // 0000 0010 00 + new []{ 10, 0x0F, 64 }, // 0000 0011 11 + new []{ 11, 0x01, INVALID }, // 0000 0000 001 + new []{ 11, 0x17, 24 }, // 0000 0010 111 + new []{ 11, 0x18, 25 }, // 0000 0011 000 + new []{ 11, 0x28, 23 }, // 0000 0101 000 + new []{ 11, 0x37, 22 }, // 0000 0110 111 + new []{ 11, 0x67, 19 }, // 0000 1100 111 + new []{ 11, 0x68, 20 }, // 0000 1101 000 + new []{ 11, 0x6C, 21 }, // 0000 1101 100 + new []{ 11, 0x08, 1792 }, // 0000 0001 000 + new []{ 11, 0x0C, 1856 }, // 0000 0001 100 + new []{ 11, 0x0D, 1920 }, // 0000 0001 101 + new []{ 12, 0x00, EOF }, // 0000 0000 0000 + new []{ 12, 0x01, EOL }, // 0000 0000 0001 + new []{ 12, 0x12, 1984 }, // 0000 0001 0010 + new []{ 12, 0x13, 2048 }, // 0000 0001 0011 + new []{ 12, 0x14, 2112 }, // 0000 0001 0100 + new []{ 12, 0x15, 2176 }, // 0000 0001 0101 + new []{ 12, 0x16, 2240 }, // 0000 0001 0110 + new []{ 12, 0x17, 2304 }, // 0000 0001 0111 + new []{ 12, 0x1C, 2368 }, // 0000 0001 1100 + new []{ 12, 0x1D, 2432 }, // 0000 0001 1101 + new []{ 12, 0x1E, 2496 }, // 0000 0001 1110 + new []{ 12, 0x1F, 2560 }, // 0000 0001 1111 + new []{ 12, 0x24, 52 }, // 0000 0010 0100 + new []{ 12, 0x27, 55 }, // 0000 0010 0111 + new []{ 12, 0x28, 56 }, // 0000 0010 1000 + new []{ 12, 0x2B, 59 }, // 0000 0010 1011 + new []{ 12, 0x2C, 60 }, // 0000 0010 1100 + new []{ 12, 0x33, 320 }, // 0000 0011 0011 + new []{ 12, 0x34, 384 }, // 0000 0011 0100 + new []{ 12, 0x35, 448 }, // 0000 0011 0101 + new []{ 12, 0x37, 53 }, // 0000 0011 0111 + new []{ 12, 0x38, 54 }, // 0000 0011 1000 + new []{ 12, 0x52, 50 }, // 0000 0101 0010 + new []{ 12, 0x53, 51 }, // 0000 0101 0011 + new []{ 12, 0x54, 44 }, // 0000 0101 0100 + new []{ 12, 0x55, 45 }, // 0000 0101 0101 + new []{ 12, 0x56, 46 }, // 0000 0101 0110 + new []{ 12, 0x57, 47 }, // 0000 0101 0111 + new []{ 12, 0x58, 57 }, // 0000 0101 1000 + new []{ 12, 0x59, 58 }, // 0000 0101 1001 + new []{ 12, 0x5A, 61 }, // 0000 0101 1010 + new []{ 12, 0x5B, 256 }, // 0000 0101 1011 + new []{ 12, 0x64, 48 }, // 0000 0110 0100 + new []{ 12, 0x65, 49 }, // 0000 0110 0101 + new []{ 12, 0x66, 62 }, // 0000 0110 0110 + new []{ 12, 0x67, 63 }, // 0000 0110 0111 + new []{ 12, 0x68, 30 }, // 0000 0110 1000 + new []{ 12, 0x69, 31 }, // 0000 0110 1001 + new []{ 12, 0x6A, 32 }, // 0000 0110 1010 + new []{ 12, 0x6B, 33 }, // 0000 0110 1011 + new []{ 12, 0x6C, 40 }, // 0000 0110 1100 + new []{ 12, 0x6D, 41 }, // 0000 0110 1101 + new []{ 12, 0xC8, 128 }, // 0000 1100 1000 + new []{ 12, 0xC9, 192 }, // 0000 1100 1001 + new []{ 12, 0xCA, 26 }, // 0000 1100 1010 + new []{ 12, 0xCB, 27 }, // 0000 1100 1011 + new []{ 12, 0xCC, 28 }, // 0000 1100 1100 + new []{ 12, 0xCD, 29 }, // 0000 1100 1101 + new []{ 12, 0xD2, 34 }, // 0000 1101 0010 + new []{ 12, 0xD3, 35 }, // 0000 1101 0011 + new []{ 12, 0xD4, 36 }, // 0000 1101 0100 + new []{ 12, 0xD5, 37 }, // 0000 1101 0101 + new []{ 12, 0xD6, 38 }, // 0000 1101 0110 + new []{ 12, 0xD7, 39 }, // 0000 1101 0111 + new []{ 12, 0xDA, 42 }, // 0000 1101 1010 + new []{ 12, 0xDB, 43 }, // 0000 1101 1011 + new []{ 13, 0x4A, 640 }, // 0000 0010 0101 0 + new []{ 13, 0x4B, 704 }, // 0000 0010 0101 1 + new []{ 13, 0x4C, 768 }, // 0000 0010 0110 0 + new []{ 13, 0x4D, 832 }, // 0000 0010 0110 1 + new []{ 13, 0x52, 1280 }, // 0000 0010 1001 0 + new []{ 13, 0x53, 1344 }, // 0000 0010 1001 1 + new []{ 13, 0x54, 1408 }, // 0000 0010 1010 0 + new []{ 13, 0x55, 1472 }, // 0000 0010 1010 1 + new []{ 13, 0x5A, 1536 }, // 0000 0010 1101 0 + new []{ 13, 0x5B, 1600 }, // 0000 0010 1101 1 + new []{ 13, 0x64, 1664 }, // 0000 0011 0010 0 + new []{ 13, 0x65, 1728 }, // 0000 0011 0010 1 + new []{ 13, 0x6C, 512 }, // 0000 0011 0110 0 + new []{ 13, 0x6D, 576 }, // 0000 0011 0110 1 + new []{ 13, 0x72, 896 }, // 0000 0011 1001 0 + new []{ 13, 0x73, 960 }, // 0000 0011 1001 1 + new []{ 13, 0x74, 1024 }, // 0000 0011 1010 0 + new []{ 13, 0x75, 1088 }, // 0000 0011 1010 1 + new []{ 13, 0x76, 1152 }, // 0000 0011 1011 0 + new []{ 13, 0x77, 1216 }, // 0000 0011 1011 1 + }; + public static readonly int MAX_BLACK_RUN = 2560; + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/MMRDecompressor.cs b/src/UglyToad.PdfPig/Filters/Jbig2/MMRDecompressor.cs new file mode 100644 index 000000000..1ef168d19 --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/MMRDecompressor.cs @@ -0,0 +1,705 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + using System; + using System.IO; + using System.Text; + using UglyToad.PdfPig.Util; + + /// + /// A decompressor for MMR compression. + /// + internal sealed class MMRDecompressor + { + private readonly int width; + private readonly int height; + + /// + /// A class encapsulating the compressed raw data. + /// + private class RunData + { + private static readonly int MAX_RUN_DATA_BUFFER = 1024 << 7; // 1024 * 128 + private static readonly int MIN_RUN_DATA_BUFFER = 3; // min. bytes to decompress + private static readonly int CODE_OFFSET = 24; + + // Compressed data stream. + private readonly IImageInputStream stream; + private readonly byte[] buffer; + + private int bufferBase; + private int bufferTop; + + private int lastOffset = 0; + private int lastCode = 0; + + internal int Offset { get; set; } + + internal RunData(IImageInputStream stream) + { + this.stream = stream; + + Offset = 0; + lastOffset = 1; + + try + { + long length = stream.Length; + length = Math.Min(Math.Max(MIN_RUN_DATA_BUFFER, length), MAX_RUN_DATA_BUFFER); + buffer = new byte[(int)length]; + FillBuffer(0); + } + catch (IOException) + { + buffer = new byte[10]; + } + } + + internal Code UncompressGetCode(Code[] table) + { + return UncompressGetCodeLittleEndian(table); + } + + internal Code UncompressGetCodeLittleEndian(Code[] table) + { + int code = UncompressGetNextCodeLittleEndian() & 0xffffff; + Code result = table[code >> CODE_OFFSET - FIRST_LEVEL_TABLE_SIZE]; + + // perform second-level lookup + if (null != result && null != result.SubTable) + { + result = result.SubTable[(code >> CODE_OFFSET - FIRST_LEVEL_TABLE_SIZE + - SECOND_LEVEL_TABLE_SIZE) & SECOND_LEVEL_TABLE_MASK]; + } + + return result; + } + + /// + /// Fill up the code word in little endian mode. This is a hotspot, therefore the algorithm is heavily optimised. + /// For the frequent cases (i.e. short words) we try to get away with as little work as possible. + /// This method returns code words of 16 bits, which are aligned to the 24th bit.The lowest 8 bits are used as a + /// "queue" of bits so that an access to the actual data is only needed, when this queue becomes empty. + /// + private int UncompressGetNextCodeLittleEndian() + { + try + { + // the number of bits to fill (offset difference) + int bitsToFill = Offset - lastOffset; + + // check whether we can refill, or need to fill in absolute mode + if (bitsToFill < 0 || bitsToFill > 24) + { + // refill at absolute offset + int byteOffset = (Offset >> 3) - bufferBase; // offset >> 3 is equivalent to offset / 8 + + if (byteOffset >= bufferTop) + { + byteOffset += bufferBase; + FillBuffer(byteOffset); + byteOffset -= bufferBase; + } + + lastCode = (buffer[byteOffset] & 0xff) << 16 + | (buffer[byteOffset + 1] & 0xff) << 8 + | (buffer[byteOffset + 2] & 0xff); + + int bitOffset = Offset & 7; // equivalent to offset % 8 + lastCode <<= bitOffset; + } + else + { + // the offset to the next byte boundary as seen from the last offset + int bitOffset = lastOffset & 7; + int avail = 7 - bitOffset; + + // check whether there are enough bits in the "queue" + if (bitsToFill <= avail) + { + lastCode <<= bitsToFill; + } + else + { + int byteOffset = (lastOffset >> 3) + 3 - bufferBase; + + if (byteOffset >= bufferTop) + { + byteOffset += bufferBase; + FillBuffer(byteOffset); + byteOffset -= bufferBase; + } + + bitOffset = 8 - bitOffset; + do + { + lastCode <<= bitOffset; + lastCode |= buffer[byteOffset] & 0xff; + bitsToFill -= bitOffset; + byteOffset++; + bitOffset = 8; + } while (bitsToFill >= 8); + + lastCode <<= bitsToFill; // shift the rest + } + } + lastOffset = Offset; + + return lastCode; + } + catch (IOException e) + { + throw new IndexOutOfRangeException( + "Corrupted RLE data caused by an IOException while reading raw data: " + + e.ToString()); + } + } + + private void FillBuffer(int byteOffset) + { + bufferBase = byteOffset; + lock (stream) + { + try + { + stream.Seek(byteOffset); + bufferTop = stream.Read(buffer); + } + catch (EndOfStreamException) + { + // you never know which kind of EOF will kick in + bufferTop = -1; + } + // check filling degree + if (bufferTop > -1 && bufferTop < 3) + { + // CK: if filling degree is too small, + // smoothly fill up to the next three bytes or substitute with with + // empty bytes + int read = 0; + while (bufferTop < 3) + { + try + { + read = stream.Read(); + } + catch (EndOfStreamException) + { + read = -1; + } + buffer[bufferTop++] = read == -1 ? (byte)0 : (byte)(read & 0xff); + } + } + } + // leave some room, in order to save a few tests in the calling code + bufferTop -= 3; + + if (bufferTop < 0) + { + + // if we're at EOF, just supply zero-bytes + ArrayHelper.Fill(buffer, (byte)0); + bufferTop = buffer.Length - 3; + } + } + + /// + /// Skip to next byte + /// + internal void Align() + { + Offset = ((Offset + 7) >> 3) << 3; + } + } + + private class Code + { + internal Code[] SubTable { get; set; } + + internal int BitLength { get; } + internal int CodeWord { get; } + internal int RunLength { get; } + + internal Code(int[] codeData) + { + BitLength = codeData[0]; + CodeWord = codeData[1]; + RunLength = codeData[2]; + } + + public override sealed string ToString() + { + return BitLength + "/" + CodeWord + "/" + RunLength; + } + + public override sealed bool Equals(object obj) + { + return (obj is Code) && + ((Code)obj).BitLength == BitLength && + ((Code)obj).CodeWord == CodeWord && + ((Code)obj).RunLength == RunLength; + } + + public override sealed int GetHashCode() + { + return (BitLength, CodeWord, RunLength).GetHashCode(); + } + } + + private static readonly int FIRST_LEVEL_TABLE_SIZE = 8; + private static readonly int FIRST_LEVEL_TABLE_MASK = (1 << FIRST_LEVEL_TABLE_SIZE) - 1; + private static readonly int SECOND_LEVEL_TABLE_SIZE = 5; + private static readonly int SECOND_LEVEL_TABLE_MASK = (1 << SECOND_LEVEL_TABLE_SIZE) - 1; + + private static Code[] WhiteTable = null; + private static Code[] BlackTable = null; + private static Code[] ModeTable = null; + + private readonly RunData data; + + private static void InitTables() + { + if (null == WhiteTable) + { + WhiteTable = CreateLittleEndianTable(MMRConstants.WhiteCodes); + BlackTable = CreateLittleEndianTable(MMRConstants.BlackCodes); + ModeTable = CreateLittleEndianTable(MMRConstants.ModeCodes); + } + } + + private static int Uncompress2D(RunData runData, int[] referenceOffsets, int refRunLength, + int[] runOffsets, int width) + { + + int referenceBufferOffset = 0; + int currentBufferOffset = 0; + int currentLineBitPosition = 0; + + bool whiteRun = true; // Always start with a white run + Code code = null; // Storage var for current code being processed + + referenceOffsets[refRunLength] = referenceOffsets[refRunLength + 1] = width; + referenceOffsets[refRunLength + 2] = referenceOffsets[refRunLength + 3] = width + 1; + + try + { + decodeLoop: while (currentLineBitPosition < width) + { + // Get the mode code + code = runData.UncompressGetCode(ModeTable); + + if (code == null) + { + runData.Offset++; + goto endDecodeLoop; + } + + // Add the code length to the bit offset + runData.Offset += code.BitLength; + + switch (code.RunLength) + { + case MMRConstants.CODE_V0: + currentLineBitPosition = referenceOffsets[referenceBufferOffset]; + break; + + case MMRConstants.CODE_VR1: + currentLineBitPosition = referenceOffsets[referenceBufferOffset] + 1; + break; + + case MMRConstants.CODE_VL1: + currentLineBitPosition = referenceOffsets[referenceBufferOffset] - 1; + break; + + case MMRConstants.CODE_H: + for (int ever = 1; ever > 0;) + { + code = runData.UncompressGetCode(whiteRun == true ? WhiteTable : BlackTable); + + if (code == null) + { + goto endDecodeLoop; + } + + runData.Offset += code.BitLength; + if (code.RunLength < 64) + { + if (code.RunLength < 0) + { + runOffsets[currentBufferOffset++] = currentLineBitPosition; + code = null; + goto endDecodeLoop; + + } + currentLineBitPosition += code.RunLength; + runOffsets[currentBufferOffset++] = currentLineBitPosition; + break; + } + currentLineBitPosition += code.RunLength; + } + + int firstHalfBitPos = currentLineBitPosition; + for (int ever1 = 1; ever1 > 0;) + { + code = runData.UncompressGetCode(whiteRun != true ? WhiteTable : BlackTable); + if (code == null) + { + goto endDecodeLoop; + } + + runData.Offset += code.BitLength; + if (code.RunLength < 64) + { + if (code.RunLength < 0) + { + runOffsets[currentBufferOffset++] = currentLineBitPosition; + goto endDecodeLoop; + } + currentLineBitPosition += code.RunLength; + // don't generate 0-length run at EOL for cases where the line ends in an H-run. + if (currentLineBitPosition < width + || currentLineBitPosition != firstHalfBitPos) + { + runOffsets[currentBufferOffset++] = currentLineBitPosition; + } + + break; + } + currentLineBitPosition += code.RunLength; + } + + while (currentLineBitPosition < width + && referenceOffsets[referenceBufferOffset] <= currentLineBitPosition) + { + referenceBufferOffset += 2; + } + + goto decodeLoop; + + case MMRConstants.CODE_P: + referenceBufferOffset++; + currentLineBitPosition = referenceOffsets[referenceBufferOffset++]; + goto decodeLoop; + + case MMRConstants.CODE_VR2: + currentLineBitPosition = referenceOffsets[referenceBufferOffset] + 2; + break; + + case MMRConstants.CODE_VL2: + currentLineBitPosition = referenceOffsets[referenceBufferOffset] - 2; + break; + + case MMRConstants.CODE_VR3: + currentLineBitPosition = referenceOffsets[referenceBufferOffset] + 3; + break; + + case MMRConstants.CODE_VL3: + currentLineBitPosition = referenceOffsets[referenceBufferOffset] - 3; + break; + + case MMRConstants.EOL: + default: + // Possibly MMR-decoded + if (runData.Offset == 12 && code.RunLength == MMRConstants.EOL) + { + runData.Offset = 0; + Uncompress1D(runData, referenceOffsets, width); + runData.Offset++; + Uncompress1D(runData, runOffsets, width); + int retCode = Uncompress1D(runData, referenceOffsets, width); + runData.Offset++; + return retCode; + } + currentLineBitPosition = width; + goto decodeLoop; + } + + // Only vertical modes get this far + if (currentLineBitPosition <= width) + { + whiteRun = !whiteRun; + + runOffsets[currentBufferOffset++] = currentLineBitPosition; + + if (referenceBufferOffset > 0) + { + referenceBufferOffset--; + } + else + { + referenceBufferOffset++; + } + + while (currentLineBitPosition < width + && referenceOffsets[referenceBufferOffset] <= currentLineBitPosition) + { + referenceBufferOffset += 2; + } + } + } + } + catch (Exception) + { + var strBuf = new StringBuilder(); + strBuf.Append("whiteRun = "); + strBuf.Append(whiteRun); + strBuf.Append("\n"); + strBuf.Append("code = "); + strBuf.Append(code); + strBuf.Append("\n"); + strBuf.Append("refOffset = "); + strBuf.Append(referenceBufferOffset); + strBuf.Append("\n"); + strBuf.Append("curOffset = "); + strBuf.Append(currentBufferOffset); + strBuf.Append("\n"); + strBuf.Append("bitPos = "); + strBuf.Append(currentLineBitPosition); + strBuf.Append("\n"); + strBuf.Append("runData.offset = "); + strBuf.Append(runData.Offset); + strBuf.Append(" ( byte:"); + strBuf.Append(runData.Offset / 8); + strBuf.Append(", bit:"); + strBuf.Append(runData.Offset & 0x07); + strBuf.Append(" )"); + + return MMRConstants.EOF; + } + + endDecodeLoop: + + if (runOffsets[currentBufferOffset] != width) + { + runOffsets[currentBufferOffset] = width; + } + + if (code == null) + { + return MMRConstants.EOL; + } + return currentBufferOffset; + } + + public MMRDecompressor(int width, int height, IImageInputStream stream) + { + this.width = width; + this.height = height; + + data = new RunData(stream); + + InitTables(); + } + + public Jbig2Bitmap Uncompress() + { + Jbig2Bitmap result = new Jbig2Bitmap(width, height); + + int[] currentOffsets = new int[width + 5]; + int[] referenceOffsets = new int[width + 5]; + referenceOffsets[0] = width; + int refRunLength = 1; + + int count; + + for (int line = 0; line < height; line++) + { + count = Uncompress2D(data, referenceOffsets, refRunLength, currentOffsets, width); + + if (count == MMRConstants.EOF) + { + break; + } + + if (count > 0) + { + FillBitmap(result, line, currentOffsets, count); + } + + // Swap lines + int[] tempOffsets = referenceOffsets; + referenceOffsets = currentOffsets; + currentOffsets = tempOffsets; + refRunLength = count; + } + + DetectAndSkipEOL(); + + data.Align(); + + return result; + } + + private void DetectAndSkipEOL() + { + while (true) + { + Code code = data.UncompressGetCode(ModeTable); + if (null != code && code.RunLength == MMRConstants.EOL) + { + data.Offset += code.BitLength; + } + else + { + break; + } + } + } + + private void FillBitmap(Jbig2Bitmap result, int line, int[] currentOffsets, int count) + { + int x = 0; + int targetByte = result.GetByteIndex(0, line); + byte targetByteValue = 0; + for (int index = 0; index < count; index++) + { + + int offset = currentOffsets[index]; + byte value; + + if ((index & 1) == 0) + { + value = 0; + } + else + { + value = 1; + } + + while (x < offset) + { + targetByteValue = (byte)((targetByteValue << 1) | value); + x++; + + if ((x & 7) == 0) + { + result.SetByte(targetByte++, targetByteValue); + targetByteValue = 0; + } + } + } + + if ((x & 7) != 0) + { + targetByteValue <<= 8 - (x & 7); + result.SetByte(targetByte, targetByteValue); + } + } + + private static int Uncompress1D(RunData runData, int[] runOffsets, int width) + { + bool whiteRun = true; + int iBitPos = 0; + Code code = null; + int refOffset = 0; + + while (iBitPos < width) + { + while (true) + { + if (whiteRun) + { + code = runData.UncompressGetCode(WhiteTable); + } + else + { + code = runData.UncompressGetCode(BlackTable); + } + + runData.Offset += code.BitLength; + + if (code.RunLength < 0) + { + goto endloop; + } + + iBitPos += code.RunLength; + + if (code.RunLength < 64) + { + whiteRun = !whiteRun; + runOffsets[refOffset++] = iBitPos; + break; + } + } + } + + endloop: + + if (runOffsets[refOffset] != width) + { + runOffsets[refOffset] = width; + } + + return code != null && code.RunLength != MMRConstants.EOL ? refOffset : MMRConstants.EOL; + } + + /// + /// For little endian, the tables are structured like this: + /// + /// v--------v length = FIRST_LEVEL_TABLE_LENGTH + /// v---- - v length = SECOND_LEVEL_TABLE_LENGTH + /// A code word which fits into the first level table(length= 3) + /// [Cccvvvvv] + /// + /// A code word which needs the second level table also(length= 10) + /// [Cccccccc] -> [ccvvv] + /// + /// + /// "C" denotes the first code word bit + /// "c" denotes a code word bit + /// "v" denotes a variant bit + /// + private static Code[] CreateLittleEndianTable(int[][] codes) + { + var firstLevelTable = new Code[FIRST_LEVEL_TABLE_MASK + 1]; + for (int i = 0; i < codes.Length; i++) + { + var code = new Code(codes[i]); + + if (code.BitLength <= FIRST_LEVEL_TABLE_SIZE) + { + int variantLength = FIRST_LEVEL_TABLE_SIZE - code.BitLength; + int baseWord = code.CodeWord << variantLength; + + for (int variant = (1 << variantLength) - 1; variant >= 0; variant--) + { + int index = baseWord | variant; + firstLevelTable[index] = code; + } + } + else + { + // init second level table + int firstLevelIndex = (int)((uint)code.CodeWord >> code.BitLength + - FIRST_LEVEL_TABLE_SIZE); + + if (firstLevelTable[firstLevelIndex] == null) + { + var firstLevelCode = new Code(new int[3]); + firstLevelCode.SubTable = new Code[SECOND_LEVEL_TABLE_MASK + 1]; + firstLevelTable[firstLevelIndex] = firstLevelCode; + } + + // fill second level table + if (code.BitLength <= FIRST_LEVEL_TABLE_SIZE + SECOND_LEVEL_TABLE_SIZE) + { + Code[] secondLevelTable = firstLevelTable[firstLevelIndex].SubTable; + int variantLength = FIRST_LEVEL_TABLE_SIZE + SECOND_LEVEL_TABLE_SIZE + - code.BitLength; + int baseWord = (code.CodeWord << variantLength) & SECOND_LEVEL_TABLE_MASK; + + for (int variant = (1 << variantLength) - 1; variant >= 0; variant--) + { + secondLevelTable[baseWord | variant] = code; + } + } + else + { + throw new ArgumentException("Code table overflow in MMRDecompressor"); + } + } + } + return firstLevelTable; + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/Node.cs b/src/UglyToad.PdfPig/Filters/Jbig2/Node.cs new file mode 100644 index 000000000..48c6e12f7 --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/Node.cs @@ -0,0 +1,10 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + /// + /// Base class for all nodes in a Huffman tree. + /// + internal abstract class Node + { + public abstract long Decode(IImageInputStream iis); + } +} \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/OutOfBandNode.cs b/src/UglyToad.PdfPig/Filters/Jbig2/OutOfBandNode.cs new file mode 100644 index 000000000..f676f71f4 --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/OutOfBandNode.cs @@ -0,0 +1,21 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + using static HuffmanTable; + + /// + /// Represents a out of band node in a Huffman tree. + /// + internal sealed class OutOfBandNode : Node + { + public OutOfBandNode(Code c) + { + } + + public override sealed long Decode(IImageInputStream iis) + { + return long.MaxValue; + } + + } + +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/PageInformation.cs b/src/UglyToad.PdfPig/Filters/Jbig2/PageInformation.cs new file mode 100644 index 000000000..f8f72e5bf --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/PageInformation.cs @@ -0,0 +1,155 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + /// + /// This class represents the segment type "Page information", 7.4.8 (page 73). + /// + internal sealed class PageInformation : ISegmentData + { + private SubInputStream subInputStream; + + // Page bitmap width, four bytes, 7.4.8.1 + public int BitmapWidth { get; private set; } + + // Page bitmap height, four bytes, 7.4.8.2 + public int BitmapHeight { get; private set; } + + // Page X resolution, four bytes, 7.4.8.3 + public int ResolutionX { get; private set; } + + // Page Y resolution, four bytes, 7.4.8.4 + public int ResolutionY { get; private set; } + + // Page segment flags, one byte, 7.4.8.5 + public bool IsCombinationOperatorOverrideAllowed { get; private set; } + + public CombinationOperator CombinationOperator { get; private set; } + + public bool RequiresAuxiliaryBuffer { get; private set; } + + public short DefaultPixelValue { get; private set; } + + public bool MightContainRefinements { get; private set; } + + public bool IsLossless { get; private set; } + + // Page striping information, two byte, 7.4.8.6 + public bool IsStriped { get; private set; } + + public short MaxStripeSize { get; private set; } + + private void ParseHeader() + { + ReadWidthAndHeight(); + ReadResolution(); + + // Bit 7 + subInputStream.ReadBit(); // dirty read + + // Bit 6 + ReadCombinationOperatorOverrideAllowed(); + + // Bit 5 + ReadRequiresAuxiliaryBuffer(); + + // Bit 3-4 + ReadCombinationOperator(); + + // Bit 2 + ReadDefaultPixelvalue(); + + // Bit 1 + ReadContainsRefinement(); + + // Bit 0 + ReadIsLossless(); + + // Bit 15 + ReadIsStriped(); + + // Bit 0-14 + ReadMaxStripeSize(); + } + + private void ReadResolution() + { + ResolutionX = (int)(subInputStream.ReadBits(32) & 0xffffffff); + ResolutionY = (int)(subInputStream.ReadBits(32) & 0xffffffff); + } + + private void ReadCombinationOperatorOverrideAllowed() + { + // Bit 6 + if (subInputStream.ReadBit() == 1) + { + IsCombinationOperatorOverrideAllowed = true; + } + } + + private void ReadRequiresAuxiliaryBuffer() + { + // Bit 5 + if (subInputStream.ReadBit() == 1) + { + RequiresAuxiliaryBuffer = true; + } + } + + private void ReadCombinationOperator() + { + // Bit 3-4 + CombinationOperator = CombinationOperators + .TranslateOperatorCodeToEnum((short)(subInputStream.ReadBits(2) & 0xf)); + } + + private void ReadDefaultPixelvalue() + { + // Bit 2 + DefaultPixelValue = (short)subInputStream.ReadBit(); + } + + private void ReadContainsRefinement() + { + // Bit 1 + if (subInputStream.ReadBit() == 1) + { + MightContainRefinements = true; + } + } + + private void ReadIsLossless() + { + // Bit 0 + if (subInputStream.ReadBit() == 1) + { + IsLossless = true; + } + } + + private void ReadIsStriped() + { + // Bit 15 + if (subInputStream.ReadBit() == 1) + { + IsStriped = true; + } + } + + private void ReadMaxStripeSize() + { + // Bit 0-14 + MaxStripeSize = (short)(subInputStream.ReadBits(15) & 0xffff); + } + + private void ReadWidthAndHeight() + { + BitmapWidth = (int)subInputStream.ReadBits(32); + BitmapHeight = (int)subInputStream.ReadBits(32); + } + + public void Init(SegmentHeader header, SubInputStream sis) + { + subInputStream = sis; + ParseHeader(); + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/PatternDictionary.cs b/src/UglyToad.PdfPig/Filters/Jbig2/PatternDictionary.cs new file mode 100644 index 000000000..b529dd187 --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/PatternDictionary.cs @@ -0,0 +1,178 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + using System.Collections.Generic; + + /// + /// This class represents the segment type "Pattern dictionary", 7.4.4. + /// + internal sealed class PatternDictionary : IJbigDictionary + { + private SubInputStream subInputStream; + + // Segment data structure (only necessary if MMR is used) + private long dataHeaderOffset = 0; + private long dataHeaderLength; + private long dataOffset; + private long dataLength; + + private short[] gbAtX = null; + private short[] gbAtY = null; + + // Decoded bitmaps, stored to be used by segments, that refer to it + private List patterns; + + // Pattern dictionary flags, 7.4.4.1.1 + public bool IsMMREncoded { get; private set; } + public byte HdTemplate { get; private set; } + + // Width of the patterns in the pattern dictionary, 7.4.4.1.2 + public short HdpWidth { get; private set; } + + // Height of the patterns in the pattern dictionary, 7.4.4.1.3 + public short HdpHeight { get; private set; } + + // Largest gray-scale value, 7.4.4.1.4 + // Value: one less than the number of patterns defined in this pattern dictionary + public int GrayMax { get; private set; } + + private void ParseHeader() + { + // Bit 3-7 + subInputStream.ReadBits(5); // Dirty read ... + + // Bit 1-2 + ReadTemplate(); + + // Bit 0 + ReadIsMMREncoded(); + + ReadPatternWidthAndHeight(); + + ReadGrayMax(); + + ComputeSegmentDataStructure(); + + CheckInput(); + } + + private void ReadTemplate() + { + // Bit 1-2 + HdTemplate = (byte)subInputStream.ReadBits(2); + } + + private void ReadIsMMREncoded() + { + // Bit 0 + if (subInputStream.ReadBit() == 1) + { + IsMMREncoded = true; + } + } + + private void ReadPatternWidthAndHeight() + { + HdpWidth = (sbyte)subInputStream.ReadByte(); + HdpHeight = (sbyte)subInputStream.ReadByte(); + } + + private void ReadGrayMax() + { + GrayMax = (int)(subInputStream.ReadBits(32) & 0xffffffff); + } + + private void ComputeSegmentDataStructure() + { + dataOffset = subInputStream.Position; + dataHeaderLength = dataOffset - dataHeaderOffset; + dataLength = subInputStream.Length - dataHeaderLength; + } + + private void CheckInput() + { + if (HdpHeight < 1 || HdpWidth < 1) + { + throw new InvalidHeaderValueException("Width/Heigth must be greater than zero."); + } + } + + /// + /// This method decodes a pattern dictionary segment and returns an array of + /// s. Each is a pattern. + /// The procedure is described in 6.7.5 (page 43). + /// + /// An array of s as result of the decoding procedure. + public List GetDictionary() + { + if (null == patterns) + { + if (!IsMMREncoded) + { + SetGbAtPixels(); + } + + // 2) + GenericRegion genericRegion = new GenericRegion(subInputStream); + genericRegion.SetParameters(IsMMREncoded, dataOffset, dataLength, HdpHeight, + (GrayMax + 1) * HdpWidth, HdTemplate, false, false, gbAtX, gbAtY); + + Jbig2Bitmap collectiveBitmap = genericRegion.GetRegionBitmap(); + + // 4) + ExtractPatterns(collectiveBitmap); + } + + return patterns; + } + + private void ExtractPatterns(Jbig2Bitmap collectiveBitmap) + { + // 3) + int gray = 0; + patterns = new List(GrayMax + 1); + + // 4) + while (gray <= GrayMax) + { + // 4) a) Retrieve a pattern bitmap by extracting it out of the collective bitmap + Jbig2Rectangle roi = new Jbig2Rectangle(HdpWidth * gray, 0, HdpWidth, HdpHeight); + Jbig2Bitmap patternBitmap = Jbig2Bitmaps.Extract(roi, collectiveBitmap); + patterns.Add(patternBitmap); + + // 4) b) + gray++; + } + } + + private void SetGbAtPixels() + { + if (HdTemplate == 0) + { + gbAtX = new short[4]; + gbAtY = new short[4]; + gbAtX[0] = (short)-HdpWidth; + gbAtY[0] = 0; + gbAtX[1] = -3; + gbAtY[1] = -1; + gbAtX[2] = 2; + gbAtY[2] = -2; + gbAtX[3] = -2; + gbAtY[3] = -2; + + } + else + { + gbAtX = new short[1]; + gbAtY = new short[1]; + gbAtX[0] = (short)-HdpWidth; + gbAtY[0] = 0; + } + } + + public void Init(SegmentHeader header, SubInputStream sis) + { + subInputStream = sis; + ParseHeader(); + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/Profiles.cs b/src/UglyToad.PdfPig/Filters/Jbig2/Profiles.cs new file mode 100644 index 000000000..842115d75 --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/Profiles.cs @@ -0,0 +1,12 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + /// + /// This class is not implemented yet and empty. Wait for use cases. + /// + internal sealed class Profiles : ISegmentData + { + public void Init(SegmentHeader header, SubInputStream sis) + { + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/RegionSegmentInformation.cs b/src/UglyToad.PdfPig/Filters/Jbig2/RegionSegmentInformation.cs new file mode 100644 index 000000000..1b9cd493d --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/RegionSegmentInformation.cs @@ -0,0 +1,70 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + /// + /// This class represents the "Region segment information" field, 7.4.1(page 50). + /// Every region segment data starts with this part. + /// + internal sealed class RegionSegmentInformation : ISegmentData + { + private readonly SubInputStream subInputStream; + + /// + /// Region segment bitmap width, 7.4.1.1 + /// + public int BitmapWidth { get; set; } + + /// + /// Region segment bitmap height, 7.4.1.2 + /// + public int BitmapHeight { get; set; } + + /// + /// Region segment bitmap X location, 7.4.1.3 + /// + public int X { get; private set; } + + /// + /// Region segment bitmap Y location, 7.4.1.4 + /// + public int Y { get; private set; } + + /// + /// Region segment flags, 7.4.1.5 + /// + public CombinationOperator CombinationOperator { get; private set; } + + public RegionSegmentInformation(SubInputStream subInputStream) + { + this.subInputStream = subInputStream; + } + + public RegionSegmentInformation(int bitmapWidth, int bitmapHeight) + { + BitmapWidth = bitmapWidth; + BitmapHeight = bitmapHeight; + } + + public void ParseHeader() + { + BitmapWidth = (int)(subInputStream.ReadBits(32) & 0xffffffff); + BitmapHeight = (int)(subInputStream.ReadBits(32) & 0xffffffff); + X = (int)(subInputStream.ReadBits(32) & 0xffffffff); + Y = (int)(subInputStream.ReadBits(32) & 0xffffffff); + + // Bit 3-7 + subInputStream.ReadBits(5); // Dirty read... reserved bits are 0 + + // Bit 0-2 + ReadCombinationOperator(); + } + + private void ReadCombinationOperator() + { + CombinationOperator = CombinationOperators.TranslateOperatorCodeToEnum((short)(subInputStream.ReadBits(3) & 0xf)); + } + + public void Init(SegmentHeader header, SubInputStream sis) + { + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/SegmentHeader.cs b/src/UglyToad.PdfPig/Filters/Jbig2/SegmentHeader.cs new file mode 100644 index 000000000..f5b248f1f --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/SegmentHeader.cs @@ -0,0 +1,305 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + using System; + using System.Collections.Generic; + using System.Text; + + /// + /// The basic class for all JBIG2 segments. + /// + internal sealed class SegmentHeader + { + private static readonly Dictionary SEGMENT_TYPE_MAP = new Dictionary + { + { 0, typeof(SymbolDictionary) }, { 4, typeof(TextRegion) }, + { 6, typeof(TextRegion) }, { 7, typeof(TextRegion) }, { 16, typeof(PatternDictionary) }, + { 20, typeof(HalftoneRegion) }, { 22, typeof(HalftoneRegion) }, + { 23, typeof(HalftoneRegion) }, { 36, typeof(GenericRegion) }, + { 38, typeof(GenericRegion) }, { 39, typeof(GenericRegion) }, + { 40, typeof(GenericRefinementRegion) }, { 42, typeof(GenericRefinementRegion) }, + { 43, typeof(GenericRefinementRegion) }, { 48, typeof(PageInformation) }, + { 50, typeof(EndOfStripe) }, { 52, typeof(Profiles) }, { 53, typeof(Table) } + }; + + private readonly SubInputStream subInputStream; + + private byte pageAssociationFieldSize; + + private SegmentHeader[] referredToSegments; + + private WeakReference segmentData; + + public int SegmentNumber { get; private set; } + + public int SegmentType { get; private set; } + + public int PageAssociation { get; private set; } + + public long SegmentHeaderLength { get; private set; } + + public long SegmentDataLength { get; private set; } + + public long SegmentDataStartOffset { get; set; } + + public bool IsRetained { get; private set; } + + public SegmentHeader(Jbig2Document document, SubInputStream sis, long offset, int organisationType) + { + subInputStream = sis; + Parse(document, sis, offset, organisationType); + } + + private void Parse(Jbig2Document document, IImageInputStream subInputStream, long offset, int organisationType) + { + subInputStream.Seek(offset); + + // 7.2.2 Segment number + ReadSegmentNumber(subInputStream); + + // 7.2.3 Segment header flags + ReadSegmentHeaderFlag(subInputStream); + + // 7.2.4 Amount of referred-to segments + int countOfReferredToSegments = ReadAmountOfReferredToSegments(subInputStream); + + // 7.2.5 Referred-to segments numbers + int[] referredToSegmentNumbers = ReadReferredToSegmentsNumbers(subInputStream, countOfReferredToSegments); + + // 7.2.6 Segment page association (Checks how big the page association field is.) + ReadSegmentPageAssociation(document, subInputStream, countOfReferredToSegments, referredToSegmentNumbers); + + // 7.2.7 Segment data length (Contains the length of the data part (in bytes).) + ReadSegmentDataLength(subInputStream); + + ReadDataStartOffset(subInputStream, organisationType); + ReadSegmentHeaderLength(subInputStream, offset); + } + + /// + /// 7.2.2 Segment number + /// + private void ReadSegmentNumber(IImageInputStream subInputStream) + { + SegmentNumber = (int)(subInputStream.ReadBits(32) & 0xffffffff); + } + + /// + /// 7.2.3 Segment header flags + /// + /// + private void ReadSegmentHeaderFlag(IImageInputStream subInputStream) + { + // Bit 7: Retain Flag, if 1, this segment is flagged as retained; + IsRetained = subInputStream.ReadBit() == 1; + + // Bit 6: Size of the page association field. One byte if 0, four bytes if 1; + pageAssociationFieldSize = (byte)subInputStream.ReadBit(); + + // Bit 5-0: Contains the values (between 0 and 62 with gaps) for segment types, specified in 7.3 + SegmentType = (int)(subInputStream.ReadBits(6) & 0xff); + } + + /// + /// 7.2.4 Amount of referred-to segments + /// + private int ReadAmountOfReferredToSegments(IImageInputStream subInputStream) + { + int countOfRTS = (int)(subInputStream.ReadBits(3) & 0xf); + + byte[] retainBit; + + if (countOfRTS <= 4) + { + // Short format + retainBit = new byte[5]; + for (int i = 0; i <= 4; i++) + { + retainBit[i] = (byte)subInputStream.ReadBit(); + } + } + else + { + // Long format + countOfRTS = (int)(subInputStream.ReadBits(29) & 0xffffffff); + + int arrayLength = (countOfRTS + 8) >> 3; + retainBit = new byte[arrayLength <<= 3]; + + for (int i = 0; i < arrayLength; i++) + { + retainBit[i] = (byte)subInputStream.ReadBit(); + } + } + return countOfRTS; + } + + /// + /// 7.2.5 Referred-to segments numbers + /// Gathers all segment numbers of referred-to segments.The segments itself are stored in the + /// array. + /// + /// Wrapped source data input stream. + /// The number of referred - to segments. + /// An array with the segment number of all referred - to segments. + private int[] ReadReferredToSegmentsNumbers(IImageInputStream subInputStream, int countOfReferredToSegments) + { + int[] result = new int[countOfReferredToSegments]; + + if (countOfReferredToSegments > 0) + { + short rtsSize = 1; + if (SegmentNumber > 256) + { + rtsSize = 2; + if (SegmentNumber > 65536) + { + rtsSize = 4; + } + } + + referredToSegments = new SegmentHeader[countOfReferredToSegments]; + + for (int i = 0; i < countOfReferredToSegments; i++) + { + result[i] = (int)(subInputStream.ReadBits(rtsSize << 3) & 0xffffffff); + } + } + + return result; + } + + /// + /// 7.2.6 Segment page association + /// + private void ReadSegmentPageAssociation(Jbig2Document document, IImageInputStream subInputStream, + int countOfReferredToSegments, int[] referredToSegmentNumbers) + { + if (pageAssociationFieldSize == 0) + { + // Short format + PageAssociation = (short)(subInputStream.ReadBits(8) & 0xff); + } + else + { + // Long format + PageAssociation = (int)(subInputStream.ReadBits(32) & 0xffffffff); + } + + if (countOfReferredToSegments > 0) + { + Jbig2Page page = document.GetPage(PageAssociation); + for (int i = 0; i < countOfReferredToSegments; i++) + { + referredToSegments[i] = (null != page ? page.GetSegment(referredToSegmentNumbers[i]) + : document.GetGlobalSegment(referredToSegmentNumbers[i])); + } + } + } + + /// + /// 7.2.7 Segment data length. Reads the length of the data part in bytes. + /// + private void ReadSegmentDataLength(IImageInputStream subInputStream) + { + SegmentDataLength = subInputStream.ReadBits(32) & 0xffffffff; + } + + /// + /// Sets the offset only if organization type is SEQUENTIAL. If random, data starts after segment headers and can be + /// determined when all segment headers are parsed and allocated. + /// + private void ReadDataStartOffset(IImageInputStream subInputStream, int organisationType) + { + if (organisationType == Jbig2Document.SEQUENTIAL) + { + SegmentDataStartOffset = subInputStream.Position; + } + } + + private void ReadSegmentHeaderLength(IImageInputStream subInputStream, long offset) + { + SegmentHeaderLength = subInputStream.Position - offset; + } + + public SegmentHeader[] GetRtSegments() + { + return referredToSegments; + } + + /// + /// Creates and returns a new that provides the data part of this segment. + /// It is a clipped view of the source input stream. + /// + /// The that represents the data part of the segment. + public SubInputStream GetDataInputStream() + { + return new SubInputStream(subInputStream, SegmentDataStartOffset, SegmentDataLength); + } + + /// + /// Retrieves the segments' data part. + /// + public ISegmentData GetSegmentData() + { + ISegmentData segmentDataPart = null; + + if (null != segmentData) + { + segmentData.TryGetTarget(out segmentDataPart); + } + + if (null == segmentDataPart) + { + try + { + if (!SEGMENT_TYPE_MAP.TryGetValue(SegmentType, out var segmentClassType)) + { + throw new InvalidOperationException("No segment class for type " + SegmentType); + } + + segmentDataPart = (ISegmentData)Activator.CreateInstance(segmentClassType); + segmentDataPart.Init(this, GetDataInputStream()); + + segmentData = new WeakReference(segmentDataPart); + } + catch (Exception e) + { + throw new InvalidOperationException("Can't instantiate segment class", e); + } + } + + return segmentDataPart; + } + + public void CleanSegmentData() + { + if (segmentData != null) + { + segmentData = null; + } + } + + public override string ToString() + { + StringBuilder stringBuilder = new StringBuilder(); + + if (referredToSegments != null) + { + foreach (SegmentHeader s in referredToSegments) + { + stringBuilder.Append(s.SegmentNumber + " "); + } + } + else + { + stringBuilder.Append("none"); + } + + return "\n#SegmentNr: " + SegmentNumber // + + "\n SegmentType: " + SegmentType // + + "\n PageAssociation: " + PageAssociation // + + "\n Referred-to segments: " + stringBuilder.ToString() // + + "\n"; // + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/StandardTables.cs b/src/UglyToad.PdfPig/Filters/Jbig2/StandardTables.cs new file mode 100644 index 000000000..1f724b336 --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/StandardTables.cs @@ -0,0 +1,262 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + using System.Collections.Generic; + + internal static class StandardTables + { + class StandardTable : HuffmanTable + { + public StandardTable(int[][] table) + { + var codeTable = new List(); + + for (int i = 0; i < table.Length; i++) + { + int prefixLength = table[i][0]; + int rangeLength = table[i][1]; + int rangeLow = table[i][2]; + bool isLowerRange = false; + if (table[i].Length > 3) + { + isLowerRange = true; + } + + codeTable.Add(new Code(prefixLength, rangeLength, rangeLow, isLowerRange)); + } + + InitTree(codeTable); + } + } + + // Fourth Value (999) is used for the LowerRange-line + private static readonly int[][][] TABLES = new[] { + // B1 + new []{ new []{ 1, 4, 0 }, // + new []{ 2, 8, 16 }, // + new []{ 3, 16, 272 }, // + new []{ 3, 32, 65808 } /* high */ + }, + // B2 + new []{ new []{ 1, 0, 0 }, // + new []{ 2, 0, 1 }, // + new []{ 3, 0, 2 }, // + new []{ 4, 3, 3 }, // + new []{ 5, 6, 11 }, // + new []{ 6, 32, 75 }, /* high */ + new []{ 6, -1, 0 } /* OOB */ + }, + // B3 + new []{ new []{ 8, 8, -256 }, // + new []{ 1, 0, 0 }, // + new []{ 2, 0, 1 }, // + new []{ 3, 0, 2 }, // + new []{ 4, 3, 3 }, // + new []{ 5, 6, 11 }, // + new []{ 8, 32, -257, 999 }, /* low */ + new []{ 7, 32, 75 }, /* high */ + new []{ 6, -1, 0 } /* OOB */ + }, + // B4 + new []{ new []{ 1, 0, 1 }, // + new []{ 2, 0, 2 }, // + new []{ 3, 0, 3 }, // + new []{ 4, 3, 4 }, // + new []{ 5, 6, 12 }, // + new []{ 5, 32, 76 } /* high */ + }, + // B5 + new []{ new []{ 7, 8, -255 }, // + new []{ 1, 0, 1 }, // + new []{ 2, 0, 2 }, // + new []{ 3, 0, 3 }, // + new []{ 4, 3, 4 }, // + new []{ 5, 6, 12 }, // + new []{ 7, 32, -256, 999 }, /* low */ + new []{ 6, 32, 76 } /* high */ + }, + // B6 + new []{ new []{ 5, 10, -2048 }, // + new []{ 4, 9, -1024 }, // + new []{ 4, 8, -512 }, // + new []{ 4, 7, -256 }, // + new []{ 5, 6, -128 }, // + new []{ 5, 5, -64 }, // + new []{ 4, 5, -32 }, // + new []{ 2, 7, 0 }, // + new []{ 3, 7, 128 }, // + new []{ 3, 8, 256 }, // + new []{ 4, 9, 512 }, // + new []{ 4, 10, 1024 }, // + new []{ 6, 32, -2049, 999 }, /* low */ + new []{ 6, 32, 2048 } /* high */ + }, + // B7 + new []{ new []{ 4, 9, -1024 }, // + new []{ 3, 8, -512 }, // + new []{ 4, 7, -256 }, // + new []{ 5, 6, -128 }, // + new []{ 5, 5, -64 }, // + new []{ 4, 5, -32 }, // + new []{ 4, 5, 0 }, // + new []{ 5, 5, 32 }, // + new []{ 5, 6, 64 }, // + new []{ 4, 7, 128 }, // + new []{ 3, 8, 256 }, // + new []{ 3, 9, 512 }, // + new []{ 3, 10, 1024 }, // + new []{ 5, 32, -1025, 999 }, /* low */ + new []{ 5, 32, 2048 } /* high */ + }, + // B8 + new []{ new []{ 8, 3, -15 }, // + new []{ 9, 1, -7 }, // + new []{ 8, 1, -5 }, // + new []{ 9, 0, -3 }, // + new []{ 7, 0, -2 }, // + new []{ 4, 0, -1 }, // + new []{ 2, 1, 0 }, // + new []{ 5, 0, 2 }, // + new []{ 6, 0, 3 }, // + new []{ 3, 4, 4 }, // + new []{ 6, 1, 20 }, // + new []{ 4, 4, 22 }, // + new []{ 4, 5, 38 }, // + new []{ 5, 6, 70 }, // + new []{ 5, 7, 134 }, // + new []{ 6, 7, 262 }, // + new []{ 7, 8, 390 }, // + new []{ 6, 10, 646 }, // + new []{ 9, 32, -16, 999 }, /* low */ + new []{ 9, 32, 1670 }, /* high */ + new []{ 2, -1, 0 } /* OOB */ + }, + // B9 + new []{ new []{ 8, 4, -31 }, // + new []{ 9, 2, -15 }, // + new []{ 8, 2, -11 }, // + new []{ 9, 1, -7 }, // + new []{ 7, 1, -5 }, // + new []{ 4, 1, -3 }, // + new []{ 3, 1, -1 }, // + new []{ 3, 1, 1 }, // + new []{ 5, 1, 3 }, // + new []{ 6, 1, 5 }, // + new []{ 3, 5, 7 }, // + new []{ 6, 2, 39 }, // + new []{ 4, 5, 43 }, // + new []{ 4, 6, 75 }, // + new []{ 5, 7, 139 }, // + new []{ 5, 8, 267 }, // + new []{ 6, 8, 523 }, // + new []{ 7, 9, 779 }, // + new []{ 6, 11, 1291 }, // + new []{ 9, 32, -32, 999 }, /* low */ + new []{ 9, 32, 3339 }, /* high */ + new []{ 2, -1, 0 } /* OOB */ + }, + // B10 + new []{ new []{ 7, 4, -21 }, // + new []{ 8, 0, -5 }, // + new []{ 7, 0, -4 }, // + new []{ 5, 0, -3 }, // + new []{ 2, 2, -2 }, // + new []{ 5, 0, 2 }, // + new []{ 6, 0, 3 }, // + new []{ 7, 0, 4 }, // + new []{ 8, 0, 5 }, // + new []{ 2, 6, 6 }, // + new []{ 5, 5, 70 }, // + new []{ 6, 5, 102 }, // + new []{ 6, 6, 134 }, // + new []{ 6, 7, 198 }, // + new []{ 6, 8, 326 }, // + new []{ 6, 9, 582 }, // + new []{ 6, 10, 1094 }, // + new []{ 7, 11, 2118 }, // + new []{ 8, 32, -22, 999 }, /* low */ + new []{ 8, 32, 4166 }, /* high */ + new []{ 2, -1, 0 } /* OOB */ + }, + // B11 + new []{ new []{ 1, 0, 1 }, // + new []{ 2, 1, 2 }, // + new []{ 4, 0, 4 }, // + new []{ 4, 1, 5 }, // + new []{ 5, 1, 7 }, // + new []{ 5, 2, 9 }, // + new []{ 6, 2, 13 }, // + new []{ 7, 2, 17 }, // + new []{ 7, 3, 21 }, // + new []{ 7, 4, 29 }, // + new []{ 7, 5, 45 }, // + new []{ 7, 6, 77 }, // + new []{ 7, 32, 141 } /* high */ + }, + // B12 + new []{ new []{ 1, 0, 1 }, // + new []{ 2, 0, 2 }, // + new []{ 3, 1, 3 }, // + new []{ 5, 0, 5 }, // + new []{ 5, 1, 6 }, // + new []{ 6, 1, 8 }, // + new []{ 7, 0, 10 }, // + new []{ 7, 1, 11 }, // + new []{ 7, 2, 13 }, // + new []{ 7, 3, 17 }, // + new []{ 7, 4, 25 }, // + new []{ 8, 5, 41 }, // + new []{ 8, 32, 73 } // + }, + // B13 + new []{ new []{ 1, 0, 1 }, // + new []{ 3, 0, 2 }, // + new []{ 4, 0, 3 }, // + new []{ 5, 0, 4 }, // + new []{ 4, 1, 5 }, // + new []{ 3, 3, 7 }, // + new []{ 6, 1, 15 }, // + new []{ 6, 2, 17 }, // + new []{ 6, 3, 21 }, // + new []{ 6, 4, 29 }, // + new []{ 6, 5, 45 }, // + new []{ 7, 6, 77 }, // + new []{ 7, 32, 141 } /* high */ + }, + // B14 + new []{ new []{ 3, 0, -2 }, // + new []{ 3, 0, -1 }, // + new []{ 1, 0, 0 }, // + new []{ 3, 0, 1 }, // + new []{ 3, 0, 2 } // + }, + // B15 + new []{ new []{ 7, 4, -24 }, // + new []{ 6, 2, -8 }, // + new []{ 5, 1, -4 }, // + new []{ 4, 0, -2 }, // + new []{ 3, 0, -1 }, // + new []{ 1, 0, 0 }, // + new []{ 3, 0, 1 }, // + new []{ 4, 0, 2 }, // + new []{ 5, 1, 3 }, // + new []{ 6, 2, 5 }, // + new []{ 7, 4, 9 }, // + new []{ 7, 32, -25, 999 }, /* low */ + new []{ 7, 32, 25 } /* high */ + } }; + + private static readonly HuffmanTable[] STANDARD_TABLES = new HuffmanTable[TABLES.Length]; + + public static HuffmanTable getTable(int number) + { + HuffmanTable table = STANDARD_TABLES[number - 1]; + if (table == null) + { + table = new StandardTable(TABLES[number - 1]); + STANDARD_TABLES[number - 1] = table; + } + + return table; + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/SubInputStream.cs b/src/UglyToad.PdfPig/Filters/Jbig2/SubInputStream.cs new file mode 100644 index 000000000..6769eea3b --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/SubInputStream.cs @@ -0,0 +1,132 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + using System; + + /// + /// A wrapper for an which is able to provide a view of a specific part of the wrapped stream. + /// Read accesses to the wrapped stream are synchronized, so that users of this stream need to deal with synchronization + /// against other users of the same instance, but not against other users of the wrapped stream. + /// + internal sealed class SubInputStream : AbstractImageInputStream + { + private readonly IImageInputStream wrappedStream; + + // The position in the wrapped stream at which the window starts. Offset is an absolut value. + private readonly long offset; + + // The length of the window. Length is an relative value. + private readonly long length; + + // A buffer which is used to improve read performance. + private readonly byte[] buffer = new byte[4096]; + + //Location of the first byte in the buffer with respect to the start of the stream. + private long bufferBase; + + // Location of the last byte in the buffer with respect to the start of the stream. + private long bufferTop; + + private long streamPosition; + + /// + public override sealed long Length => length; + + /// + public override sealed long Position => streamPosition; + + /// + /// Constructs a new SubInputStream which provides a view of the wrapped stream. + /// + /// The stream to be wrapped. + /// The absolute position in the wrapped stream at which the sub-stream starts. + /// The length of the sub-stream. + public SubInputStream(IImageInputStream iis, long offset, long length) + { + this.wrappedStream = iis; + this.offset = offset; + this.length = length; + } + + /// + public override sealed int Read() + { + if (streamPosition >= length) + { + return -1; + } + + if (streamPosition >= bufferTop || streamPosition < bufferBase) + { + if (!FillBuffer()) + { + return -1; + } + } + + int read = 0xff & buffer[(int)(streamPosition - bufferBase)]; + + streamPosition++; + + return read; + } + + /// + public override sealed int Read(byte[] b, int off, int len) + { + if (streamPosition >= length) + { + return -1; + } + + lock (wrappedStream) + { + var desiredPosition = streamPosition + offset; + if (wrappedStream.Position != desiredPosition) + { + wrappedStream.Seek(desiredPosition); + } + + int toRead = (int)Math.Min(len, length - Position); + int read = wrappedStream.Read(b, off, toRead); + streamPosition += read; + + return read; + } + } + + /// + public override sealed void Seek(long pos) + { + streamPosition = pos; + } + + /// + public override sealed void Dispose() + { + wrappedStream.Dispose(); + } + + /// + /// Fill the buffer at the current stream position. + /// + /// true if successful, false otherwise + private bool FillBuffer() + { + lock (wrappedStream) + { + var desiredPosition = streamPosition + offset; + if (wrappedStream.Position != desiredPosition) + { + wrappedStream.Seek(desiredPosition); + } + + bufferBase = streamPosition; + int toRead = (int)Math.Min(buffer.Length, length - streamPosition); + int read = wrappedStream.Read(buffer, 0, toRead); + bufferTop = bufferBase + read; + + return read > 0; + } + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/SymbolDictionary.cs b/src/UglyToad.PdfPig/Filters/Jbig2/SymbolDictionary.cs new file mode 100644 index 000000000..841fa18bd --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/SymbolDictionary.cs @@ -0,0 +1,935 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + using System; + using System.Collections.Generic; + + /// + /// This class represents the data of segment type "Symbol dictionary". Parsing is described in + /// 7.4.2.1.1 - 7.4.1.1.5 and decoding procedure is described in 6.5. + /// + internal sealed class SymbolDictionary : IJbigDictionary + { + private SubInputStream subInputStream; + + // Symbol dictionary flags, 7.4.2.1.1 + private short sdrTemplate; + private byte sdTemplate; + private bool isCodingContextRetained; + private bool isCodingContextUsed; + private short sdHuffAggInstanceSelection; + private short sdHuffBMSizeSelection; + private short sdHuffDecodeWidthSelection; + private short sdHuffDecodeHeightSelection; + private bool useRefinementAggregation; + private bool isHuffmanEncoded; + + // Symbol dictionary AT flags, 7.4.2.1.2 + private short[] sdATX; + private short[] sdATY; + + // Symbol dictionary refinement AT flags, 7.4.2.1.3 + private short[] sdrATX; + private short[] sdrATY; + + // Number of exported symbols, 7.4.2.1.4 + private int amountOfExportSymbolss; + + // Number of new symbols, 7.4.2.1.5 + private int amountOfNewSymbols; + + // Further parameters + private SegmentHeader segmentHeader; + private int amountOfImportedSymbolss; + private List importSymbols; + private int amountOfDecodedSymbols; + private Jbig2Bitmap[] newSymbols; + + // User-supplied tables + private HuffmanTable dhTable; + private HuffmanTable dwTable; + private HuffmanTable bmSizeTable; + private HuffmanTable aggInstTable; + + // Return value of that segment + private List exportSymbols; + private List sbSymbols; + + private ArithmeticDecoder arithmeticDecoder; + private ArithmeticIntegerDecoder iDecoder; + + private TextRegion textRegion; + private GenericRegion genericRegion; + private GenericRefinementRegion genericRefinementRegion; + private CX cx; + + private CX cxIADH; + private CX cxIADW; + private CX cxIAAI; + private CX cxIAEX; + private CX cxIARDX; + private CX cxIARDY; + private CX cxIADT; + + internal CX cxIAID; + private int sbSymCodeLen; + + public SymbolDictionary() + { + } + + public SymbolDictionary(SubInputStream subInputStream, SegmentHeader segmentHeader) + { + this.subInputStream = subInputStream; + this.segmentHeader = segmentHeader; + } + + private void ParseHeader() + { + ReadRegionFlags(); + SetAtPixels(); + SetRefinementAtPixels(); + ReadAmountOfExportedSymbols(); + ReadAmountOfNewSymbols(); + SetInSyms(); + + if (isCodingContextUsed) + { + SegmentHeader[] rtSegments = segmentHeader.GetRtSegments(); + + for (int i = rtSegments.Length - 1; i >= 0; i--) + { + if (rtSegments[i].SegmentType == 0) + { + SymbolDictionary symbolDictionary = + (SymbolDictionary)rtSegments[i].GetSegmentData(); + + if (symbolDictionary.isCodingContextRetained) + { + // 7.4.2.2 3) + SetRetainedCodingContexts(symbolDictionary); + } + break; + } + } + } + + CheckInput(); + } + + private void ReadRegionFlags() + { + // Bit 13-15 + subInputStream.ReadBits(3); // Dirty read... reserved bits must be 0 + + // Bit 12 + sdrTemplate = (short)subInputStream.ReadBit(); + + // Bit 10-11 + sdTemplate = (byte)(subInputStream.ReadBits(2) & 0xf); + + // Bit 9 + if (subInputStream.ReadBit() == 1) + { + isCodingContextRetained = true; + } + + // Bit 8 + if (subInputStream.ReadBit() == 1) + { + isCodingContextUsed = true; + } + + // Bit 7 + sdHuffAggInstanceSelection = (short)subInputStream.ReadBit(); + + // Bit 6 + sdHuffBMSizeSelection = (short)subInputStream.ReadBit(); + + // Bit 4-5 + sdHuffDecodeWidthSelection = (short)(subInputStream.ReadBits(2) & 0xf); + + // Bit 2-3 + sdHuffDecodeHeightSelection = (short)(subInputStream.ReadBits(2) & 0xf); + + // Bit 1 + if (subInputStream.ReadBit() == 1) + { + useRefinementAggregation = true; + } + + // Bit 0 + if (subInputStream.ReadBit() == 1) + { + isHuffmanEncoded = true; + } + } + + private void SetAtPixels() + { + if (!isHuffmanEncoded) + { + if (sdTemplate == 0) + { + ReadAtPixels(4); + } + else + { + ReadAtPixels(1); + } + } + } + + private void SetRefinementAtPixels() + { + if (useRefinementAggregation && sdrTemplate == 0) + { + ReadRefinementAtPixels(2); + } + } + + private void ReadAtPixels(int amountOfPixels) + { + sdATX = new short[amountOfPixels]; + sdATY = new short[amountOfPixels]; + + for (int i = 0; i < amountOfPixels; i++) + { + sdATX[i] = (sbyte)subInputStream.ReadByte(); + sdATY[i] = (sbyte)subInputStream.ReadByte(); + } + } + + private void ReadRefinementAtPixels(int amountOfAtPixels) + { + sdrATX = new short[amountOfAtPixels]; + sdrATY = new short[amountOfAtPixels]; + + for (int i = 0; i < amountOfAtPixels; i++) + { + sdrATX[i] = (sbyte)subInputStream.ReadByte(); + sdrATY[i] = (sbyte)subInputStream.ReadByte(); + } + } + + private void ReadAmountOfExportedSymbols() + { + amountOfExportSymbolss = (int)subInputStream.ReadBits(32); + } + + private void ReadAmountOfNewSymbols() + { + amountOfNewSymbols = (int)subInputStream.ReadBits(32); + } + + private void SetInSyms() + { + if (segmentHeader.GetRtSegments() != null) + { + RetrieveImportSymbols(); + } + else + { + importSymbols = new List(); + } + } + + private void SetRetainedCodingContexts(SymbolDictionary sd) + { + arithmeticDecoder = sd.arithmeticDecoder; + isHuffmanEncoded = sd.isHuffmanEncoded; + useRefinementAggregation = sd.useRefinementAggregation; + sdTemplate = sd.sdTemplate; + sdrTemplate = sd.sdrTemplate; + sdATX = sd.sdATX; + sdATY = sd.sdATY; + sdrATX = sd.sdrATX; + sdrATY = sd.sdrATY; + cx = sd.cx; + } + + private void CheckInput() + { + if (isHuffmanEncoded) + { + if (sdTemplate != 0) + { + sdTemplate = 0; + } + + if (!useRefinementAggregation) + { + if (isCodingContextRetained) + { + isCodingContextRetained = false; + } + + if (isCodingContextUsed) + { + isCodingContextUsed = false; + } + } + } + else + { + if (sdHuffBMSizeSelection != 0) + { + sdHuffBMSizeSelection = 0; + } + + if (sdHuffDecodeWidthSelection != 0) + { + sdHuffDecodeWidthSelection = 0; + } + + if (sdHuffDecodeHeightSelection != 0) + { + sdHuffDecodeHeightSelection = 0; + } + } + + if (!useRefinementAggregation) + { + if (sdrTemplate != 0) + { + sdrTemplate = 0; + } + } + + if (!isHuffmanEncoded || !useRefinementAggregation) + { + if (sdHuffAggInstanceSelection != 0) + { + sdHuffAggInstanceSelection = 0; + } + } + } + + /// + /// 6.5.5 Decoding the symbol dictionary. + /// + /// List of decoded symbol bitmaps. + public List GetDictionary() + { + if (null == exportSymbols) + { + if (useRefinementAggregation) + { + sbSymCodeLen = GetSbSymCodeLen(); + } + + if (!isHuffmanEncoded) + { + SetCodingStatistics(); + } + + // 6.5.5 1) + newSymbols = new Jbig2Bitmap[amountOfNewSymbols]; + + // 6.5.5 2) + int[] newSymbolsWidths = null; + if (isHuffmanEncoded && !useRefinementAggregation) + { + newSymbolsWidths = new int[amountOfNewSymbols]; + } + + SetSymbolsArray(); + + // 6.5.5 3) + int heightClassHeight = 0; + amountOfDecodedSymbols = 0; + + // 6.5.5 4 a) + while (amountOfDecodedSymbols < amountOfNewSymbols) + { + // 6.5.5 4 b) + heightClassHeight += (int)DecodeHeightClassDeltaHeight(); + int symbolWidth = 0; + int totalWidth = 0; + int heightClassFirstSymbolIndex = amountOfDecodedSymbols; + + // 6.5.5 4 c) + + // Repeat until OOB - OOB sends a break; + while (true) + { + // 4 c) i) + long differenceWidth = DecodeDifferenceWidth(); + + // If result is OOB (out-of-band), then all the symbols in this height + // class has been decoded; proceed to step 4 d). Also exit, if the + // expected number of symbols have been decoded. + // The latter exit condition guards against pathological cases where + // a symbol's DW never contains OOB and thus never terminates. + if (differenceWidth == long.MaxValue + || amountOfDecodedSymbols >= amountOfNewSymbols) + { + break; + } + + symbolWidth += (int)differenceWidth; + totalWidth += symbolWidth; + + // 4 c) ii) + if (!isHuffmanEncoded || useRefinementAggregation) + { + if (!useRefinementAggregation) + { + // 6.5.8.1 - Direct coded + DecodeDirectlyThroughGenericRegion(symbolWidth, heightClassHeight); + } + else + { + // 6.5.8.2 - Refinement/Aggregate-coded + DecodeAggregate(symbolWidth, heightClassHeight); + } + } + else if (isHuffmanEncoded && !useRefinementAggregation) + { + // 4 c) iii) + newSymbolsWidths[amountOfDecodedSymbols] = symbolWidth; + } + amountOfDecodedSymbols++; + } + + // 6.5.5 4 d) + if (isHuffmanEncoded && !useRefinementAggregation) + { + // 6.5.9 + long bmSize; + if (sdHuffBMSizeSelection == 0) + { + bmSize = StandardTables.getTable(1).Decode(subInputStream); + } + else + { + bmSize = HuffDecodeBmSize(); + } + + subInputStream.SkipBits(); + + Jbig2Bitmap heightClassCollectiveBitmap = + DecodeHeightClassCollectiveBitmap(bmSize, heightClassHeight, totalWidth); + + subInputStream.SkipBits(); + DecodeHeightClassBitmap(heightClassCollectiveBitmap, + heightClassFirstSymbolIndex, heightClassHeight, newSymbolsWidths); + } + } + + // 5) + // 6.5.10 1) - 5) + + int[] exFlags = GetToExportFlags(); + + // 6.5.10 6) - 8) + SetExportedSymbols(exFlags); + } + + return exportSymbols; + } + + private void SetCodingStatistics() + { + if (cxIADT == null) + { + cxIADT = new CX(512, 1); + } + + if (cxIADH == null) + { + cxIADH = new CX(512, 1); + } + + if (cxIADW == null) + { + cxIADW = new CX(512, 1); + } + + if (cxIAAI == null) + { + cxIAAI = new CX(512, 1); + } + + if (cxIAEX == null) + { + cxIAEX = new CX(512, 1); + } + + if (useRefinementAggregation && cxIAID == null) + { + cxIAID = new CX(1 << sbSymCodeLen, 1); + cxIARDX = new CX(512, 1); + cxIARDY = new CX(512, 1); + } + + if (cx == null) + { + cx = new CX(65536, 1); + } + + if (arithmeticDecoder == null) + { + arithmeticDecoder = new ArithmeticDecoder(subInputStream); + } + + if (iDecoder == null) + { + iDecoder = new ArithmeticIntegerDecoder(arithmeticDecoder); + } + } + + private void DecodeHeightClassBitmap(Jbig2Bitmap heightClassCollectiveBitmap, + int heightClassFirstSymbol, int heightClassHeight, + int[] newSymbolsWidths) + { + for (int i = heightClassFirstSymbol; i < amountOfDecodedSymbols; i++) + { + int startColumn = 0; + + for (int j = heightClassFirstSymbol; j <= i - 1; j++) + { + startColumn += newSymbolsWidths[j]; + } + + var roi = new Jbig2Rectangle(startColumn, 0, newSymbolsWidths[i], heightClassHeight); + var symbolBitmap = Jbig2Bitmaps.Extract(roi, heightClassCollectiveBitmap); + newSymbols[i] = symbolBitmap; + sbSymbols.Add(symbolBitmap); + } + } + + private void DecodeAggregate(int symbolWidth, int heightClassHeight) + { + // 6.5.8.2 1) + // 6.5.8.2.1 - Number of symbol instances in aggregation + long amountOfRefinementAggregationInstances; + if (isHuffmanEncoded) + { + amountOfRefinementAggregationInstances = HuffDecodeRefAggNInst(); + } + else + { + amountOfRefinementAggregationInstances = iDecoder.Decode(cxIAAI); + } + + if (amountOfRefinementAggregationInstances > 1) + { + // 6.5.8.2 2) + DecodeThroughTextRegion(symbolWidth, heightClassHeight, + amountOfRefinementAggregationInstances); + } + else if (amountOfRefinementAggregationInstances == 1) + { + // 6.5.8.2 3) refers to 6.5.8.2.2 + DecodeRefinedSymbol(symbolWidth, heightClassHeight); + } + } + + private long HuffDecodeRefAggNInst() + { + if (sdHuffAggInstanceSelection == 0) + { + return StandardTables.getTable(1).Decode(subInputStream); + } + else if (sdHuffAggInstanceSelection == 1) + { + if (aggInstTable == null) + { + int aggregationInstanceNumber = 0; + + if (sdHuffDecodeHeightSelection == 3) + { + aggregationInstanceNumber++; + } + if (sdHuffDecodeWidthSelection == 3) + { + aggregationInstanceNumber++; + } + if (sdHuffBMSizeSelection == 3) + { + aggregationInstanceNumber++; + } + + aggInstTable = GetUserTable(aggregationInstanceNumber); + } + return aggInstTable.Decode(subInputStream); + } + return 0; + } + + private void DecodeThroughTextRegion(int symbolWidth, int heightClassHeight, + long amountOfRefinementAggregationInstances) + { + if (textRegion == null) + { + textRegion = new TextRegion(subInputStream, null); + + textRegion.SetContexts(cx, // default context + new CX(512, 1), // IADT + new CX(512, 1), // IAFS + new CX(512, 1), // IADS + new CX(512, 1), // IAIT + cxIAID, // IAID + new CX(512, 1), // IARDW + new CX(512, 1), // IARDH + new CX(512, 1), // IARDX + new CX(512, 1) // IARDY + ); + } + + // 6.5.8.2.4 Concatenating the array used as parameter later. + SetSymbolsArray(); + + // 6.5.8.2 2) Parameters set according to Table 17, page 36 + textRegion.SetParameters(arithmeticDecoder, iDecoder, isHuffmanEncoded, true, symbolWidth, + heightClassHeight, amountOfRefinementAggregationInstances, 1, + (amountOfImportedSymbolss + amountOfDecodedSymbols), 0, 0, 0, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 0, sdrTemplate, sdrATX, sdrATY, sbSymbols, sbSymCodeLen); + + AddSymbol(textRegion); + } + + private void DecodeRefinedSymbol(int symbolWidth, int heightClassHeight) + { + int id; + int rdx; + int rdy; + if (isHuffmanEncoded) + { + // 2) - 4) + id = (int)subInputStream.ReadBits(sbSymCodeLen); + rdx = (int)StandardTables.getTable(15).Decode(subInputStream); + rdy = (int)StandardTables.getTable(15).Decode(subInputStream); + + // 5) a) + /* symInRefSize = */ + StandardTables.getTable(1).Decode(subInputStream); + + // 5) b) - Skip over remaining bits + subInputStream.SkipBits(); + } + else + { + // 2) - 4) + id = iDecoder.DecodeIAID(cxIAID, sbSymCodeLen); + rdx = (int)iDecoder.Decode(cxIARDX); + rdy = (int)iDecoder.Decode(cxIARDY); + } + + // 6) + SetSymbolsArray(); + Jbig2Bitmap ibo = sbSymbols[id]; + DecodeNewSymbols(symbolWidth, heightClassHeight, ibo, rdx, rdy); + + // 7) + if (isHuffmanEncoded) + { + subInputStream.SkipBits(); + // Make sure that the processed bytes are equal to the value read in step 5 a) + } + } + + private void DecodeNewSymbols(int symWidth, int hcHeight, Jbig2Bitmap ibo, int rdx, int rdy) + { + if (genericRefinementRegion == null) + { + genericRefinementRegion = new GenericRefinementRegion(subInputStream); + + if (arithmeticDecoder == null) + { + arithmeticDecoder = new ArithmeticDecoder(subInputStream); + } + + if (cx == null) + { + cx = new CX(65536, 1); + } + } + + // Parameters as shown in Table 18, page 36 + genericRefinementRegion.SetParameters(cx, arithmeticDecoder, sdrTemplate, symWidth, + hcHeight, ibo, rdx, rdy, false, sdrATX, sdrATY); + + AddSymbol(genericRefinementRegion); + } + + private void DecodeDirectlyThroughGenericRegion(int symWidth, int hcHeight) + { + if (genericRegion == null) + { + genericRegion = new GenericRegion(subInputStream); + } + + // Parameters set according to Table 16, page 35 + genericRegion.SetParameters(false, sdTemplate, false, false, sdATX, sdATY, symWidth, + hcHeight, cx, arithmeticDecoder); + + AddSymbol(genericRegion); + } + + private void AddSymbol(IRegion region) + { + Jbig2Bitmap symbol = region.GetRegionBitmap(); + newSymbols[amountOfDecodedSymbols] = symbol; + sbSymbols.Add(symbol); + } + + private long DecodeDifferenceWidth() + { + if (isHuffmanEncoded) + { + switch (sdHuffDecodeWidthSelection) + { + case 0: + return StandardTables.getTable(2).Decode(subInputStream); + + case 1: + return StandardTables.getTable(3).Decode(subInputStream); + + case 3: + if (dwTable == null) + { + int dwNr = 0; + + if (sdHuffDecodeHeightSelection == 3) + { + dwNr++; + } + dwTable = GetUserTable(dwNr); + } + + return dwTable.Decode(subInputStream); + } + } + else + { + return iDecoder.Decode(cxIADW); + } + return 0; + } + + private long DecodeHeightClassDeltaHeight() + { + if (isHuffmanEncoded) + { + return DecodeHeightClassDeltaHeightWithHuffman(); + } + else + { + return iDecoder.Decode(cxIADH); + } + } + + + /// + /// 6.5.6 if isHuffmanEncoded, + /// + /// Result of decoding HCDH + private long DecodeHeightClassDeltaHeightWithHuffman() + { + switch (sdHuffDecodeHeightSelection) + { + case 0: + return StandardTables.getTable(4).Decode(subInputStream); + + case 1: + return StandardTables.getTable(5).Decode(subInputStream); + + case 3: + if (dhTable == null) + { + dhTable = GetUserTable(0); + } + return dhTable.Decode(subInputStream); + } + + return 0; + } + + private Jbig2Bitmap DecodeHeightClassCollectiveBitmap(long bmSize, + int heightClassHeight, int totalWidth) + { + if (bmSize == 0) + { + Jbig2Bitmap heightClassCollectiveBitmap = new Jbig2Bitmap(totalWidth, heightClassHeight); + + for (int i = 0; i < heightClassCollectiveBitmap.GetByteArray().Length; i++) + { + heightClassCollectiveBitmap.SetByte(i, subInputStream.ReadByte()); + } + + return heightClassCollectiveBitmap; + } + else + { + if (genericRegion == null) + { + genericRegion = new GenericRegion(subInputStream); + } + + genericRegion.SetParameters(true, subInputStream.Position, bmSize, + heightClassHeight, totalWidth); + + return genericRegion.GetRegionBitmap(); + } + } + + private void SetExportedSymbols(int[] toExportFlags) + { + exportSymbols = new List(amountOfExportSymbolss); + + for (int i = 0; i < amountOfImportedSymbolss + amountOfNewSymbols; i++) + { + if (toExportFlags[i] == 1) + { + if (i < amountOfImportedSymbolss) + { + exportSymbols.Add(importSymbols[i]); + } + else + { + exportSymbols.Add(newSymbols[i - amountOfImportedSymbolss]); + } + } + } + } + + private int[] GetToExportFlags() + { + int currentExportFlag = 0; + int[] exportFlags = new int[amountOfImportedSymbolss + amountOfNewSymbols]; + + long exRunLength; + for (int exportIndex = 0; exportIndex < amountOfImportedSymbolss + + amountOfNewSymbols; exportIndex += (int)exRunLength) + { + if (isHuffmanEncoded) + { + exRunLength = StandardTables.getTable(1).Decode(subInputStream); + } + else + { + exRunLength = iDecoder.Decode(cxIAEX); + } + + if (exRunLength != 0) + { + for (int index = exportIndex; index < exportIndex + exRunLength; index++) + { + exportFlags[index] = currentExportFlag; + } + } + + currentExportFlag = (currentExportFlag == 0) ? 1 : 0; + } + + return exportFlags; + } + + private long HuffDecodeBmSize() + { + if (bmSizeTable == null) + { + int bmNr = 0; + + if (sdHuffDecodeHeightSelection == 3) + { + bmNr++; + } + + if (sdHuffDecodeWidthSelection == 3) + { + bmNr++; + } + + bmSizeTable = GetUserTable(bmNr); + } + return bmSizeTable.Decode(subInputStream); + } + + /// + /// 6.5.8.2.3 - Setting SBSYMCODES and SBSYMCODELEN + /// + /// Result of computing SBSYMCODELEN + private int GetSbSymCodeLen() + { + if (isHuffmanEncoded) + { + return Math.Max( + (int)(Math.Ceiling( + Math.Log(amountOfImportedSymbolss + amountOfNewSymbols) / Math.Log(2))), + 1); + } + else + { + return (int)(Math + .Ceiling(Math.Log(amountOfImportedSymbolss + amountOfNewSymbols) / Math.Log(2))); + } + } + + + /// + /// 6.5.8.2.4 - Setting SBSYMS + /// + private void SetSymbolsArray() + { + if (importSymbols == null) + { + RetrieveImportSymbols(); + } + + if (sbSymbols == null) + { + sbSymbols = new List(); + sbSymbols.AddRange(importSymbols); + } + } + + /// + /// Concatenates symbols from all referred-to segments. + /// + private void RetrieveImportSymbols() + { + importSymbols = new List(); + foreach (SegmentHeader referredToSegmentHeader in segmentHeader.GetRtSegments()) + { + if (referredToSegmentHeader.SegmentType == 0) + { + SymbolDictionary sd = (SymbolDictionary)referredToSegmentHeader + .GetSegmentData(); + importSymbols.AddRange(sd.GetDictionary()); + amountOfImportedSymbolss += sd.amountOfExportSymbolss; + } + } + } + + private HuffmanTable GetUserTable(int tablePosition) + { + int tableCounter = 0; + + foreach (SegmentHeader referredToSegmentHeader in segmentHeader.GetRtSegments()) + { + if (referredToSegmentHeader.SegmentType == 53) + { + if (tableCounter == tablePosition) + { + Table t = (Table)referredToSegmentHeader.GetSegmentData(); + return new EncodedTable(t); + } + else + { + tableCounter++; + } + } + } + return null; + } + + public void Init(SegmentHeader header, SubInputStream sis) + { + subInputStream = sis; + segmentHeader = header; + ParseHeader(); + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/Table.cs b/src/UglyToad.PdfPig/Filters/Jbig2/Table.cs new file mode 100644 index 000000000..c4afe1a2e --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/Table.cs @@ -0,0 +1,56 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + /// + /// This class represents a "Table" segment. It handles custom tables, see Annex B. + /// + internal sealed class Table : ISegmentData + { + private SubInputStream subInputStream; + + // Code table flags, B.2.1, page 87 + public int HtOutOfBand { get; private set; } + public int HtPS { get; private set; } + public int HtRS { get; private set; } + + // Code table lowest value, B.2.2, page 87 + public int HtLow { get; private set; } + + // Code table highest value, B.2.3, page 87 + public int HtHigh { get; private set; } + + private void ParseHeader() + { + int bit; + + // Bit 7 + if ((bit = subInputStream.ReadBit()) == 1) + { + throw new InvalidHeaderValueException( + "B.2.1 Code table flags: Bit 7 must be zero, but was " + bit); + } + + // Bit 4-6 + HtRS = (int)((subInputStream.ReadBits(3) + 1) & 0xf); + + // Bit 1-3 + HtPS = (int)((subInputStream.ReadBits(3) + 1) & 0xf); + + // Bit 0 + HtOutOfBand = subInputStream.ReadBit(); + + HtLow = (int)subInputStream.ReadBits(32); + HtHigh = (int)subInputStream.ReadBits(32); + } + + public void Init(SegmentHeader header, SubInputStream sis) + { + subInputStream = sis; + ParseHeader(); + } + + public SubInputStream getSubInputStream() + { + return subInputStream; + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/TextRegion.cs b/src/UglyToad.PdfPig/Filters/Jbig2/TextRegion.cs new file mode 100644 index 000000000..e8ba6b8db --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/TextRegion.cs @@ -0,0 +1,1149 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + using System; + using System.Collections.Generic; + using UglyToad.PdfPig.Util; + using static HuffmanTable; + + /// + /// This class represented the segment type "Text region", 7.4.3, page 56. + /// + internal sealed class TextRegion : IRegion + { + private SubInputStream subInputStream; + + // Text region segment flags, 7.4.3.1.1 + private short sbrTemplate; + private short sbdsOffset; // 6.4.8 + private short defaultPixel; + private CombinationOperator combinationOperator; + private short isTransposed; + private short referenceCorner; + private short logSBStrips; + private bool useRefinement; + private bool isHuffmanEncoded; + + // Text region segment Huffman flags, 7.4.3.1.2 + private short sbHuffRSize; + private short sbHuffRDY; + private short sbHuffRDX; + private short sbHuffRDHeight; + private short sbHuffRDWidth; + private short sbHuffDT; + private short sbHuffDS; + private short sbHuffFS; + + // Text region refinement AT flags, 7.4.3.1.3 + private short[] sbrATX; + private short[] sbrATY; + + // Number of symbol instances, 7.4.3.1.4 + private long amountOfSymbolInstances; + + // Further parameters + private long currentS; + private int sbStrips; + private int amountOfSymbols; + + private Jbig2Bitmap regionBitmap; + private List symbols = new List(); + + private ArithmeticDecoder arithmeticDecoder; + private ArithmeticIntegerDecoder integerDecoder; + private GenericRefinementRegion genericRefinementRegion; + + private CX cxIADT; + private CX cxIAFS; + private CX cxIADS; + private CX cxIAIT; + private CX cxIARI; + private CX cxIARDW; + private CX cxIARDH; + private CX cxIAID; + private CX cxIARDX; + private CX cxIARDY; + private CX cx; + + // codeTable including a code to each symbol used in that region + private int symbolCodeLength; + private FixedSizeTable symbolCodeTable; + private SegmentHeader segmentHeader; + + // User-supplied tables + private HuffmanTable fsTable; + private HuffmanTable dsTable; + private HuffmanTable table; + private HuffmanTable rdwTable; + private HuffmanTable rdhTable; + private HuffmanTable rdxTable; + private HuffmanTable rdyTable; + private HuffmanTable rSizeTable; + + // Region segment information field, 7.4.1 + public RegionSegmentInformation RegionInfo { get; private set; } + + public TextRegion() + { + } + + public TextRegion(SubInputStream subInputStream, SegmentHeader segmentHeader) + { + this.subInputStream = subInputStream; + this.RegionInfo = new RegionSegmentInformation(subInputStream); + this.segmentHeader = segmentHeader; + } + + private void ParseHeader() + { + RegionInfo.ParseHeader(); + + ReadRegionFlags(); + + if (isHuffmanEncoded) + { + ReadHuffmanFlags(); + } + + ReadUseRefinement(); + + ReadAmountOfSymbolInstances(); + + // 7.4.3.1.7 + GetSymbols(); + + ComputeSymbolCodeLength(); + + CheckInput(); + } + + private void ReadRegionFlags() + { + // Bit 15 + sbrTemplate = (short)subInputStream.ReadBit(); + + // Bit 10-14 + sbdsOffset = (short)(subInputStream.ReadBits(5)); + if (sbdsOffset > 0x0f) + { + sbdsOffset -= 0x20; + } + + // Bit 9 + defaultPixel = (short)subInputStream.ReadBit(); + + // Bit 7-8 + combinationOperator = CombinationOperators + .TranslateOperatorCodeToEnum((short)(subInputStream.ReadBits(2) & 0x3)); + + // Bit 6 + isTransposed = (short)subInputStream.ReadBit(); + + // Bit 4-5 + referenceCorner = (short)(subInputStream.ReadBits(2) & 0x3); + + // Bit 2-3 + logSBStrips = (short)(subInputStream.ReadBits(2) & 0x3); + sbStrips = (1 << logSBStrips); + + // Bit 1 + if (subInputStream.ReadBit() == 1) + { + useRefinement = true; + } + + // Bit 0 + if (subInputStream.ReadBit() == 1) + { + isHuffmanEncoded = true; + } + } + + private void ReadHuffmanFlags() + { + // Bit 15 + subInputStream.ReadBit(); // Dirty read... + + // Bit 14 + sbHuffRSize = (short)subInputStream.ReadBit(); + + // Bit 12-13 + sbHuffRDY = (short)(subInputStream.ReadBits(2) & 0xf); + + // Bit 10-11 + sbHuffRDX = (short)(subInputStream.ReadBits(2) & 0xf); + + // Bit 8-9 + sbHuffRDHeight = (short)(subInputStream.ReadBits(2) & 0xf); + + // Bit 6-7 + sbHuffRDWidth = (short)(subInputStream.ReadBits(2) & 0xf); + + // Bit 4-5 + sbHuffDT = (short)(subInputStream.ReadBits(2) & 0xf); + + // Bit 2-3 + sbHuffDS = (short)(subInputStream.ReadBits(2) & 0xf); + + // Bit 0-1 + sbHuffFS = (short)(subInputStream.ReadBits(2) & 0xf); + } + + private void ReadUseRefinement() + { + if (useRefinement && sbrTemplate == 0) + { + sbrATX = new short[2]; + sbrATY = new short[2]; + + // Byte 0 + sbrATX[0] = (sbyte)subInputStream.ReadByte(); + + // Byte 1 + sbrATY[0] = (sbyte)subInputStream.ReadByte(); + + // Byte 2 + sbrATX[1] = (sbyte)subInputStream.ReadByte(); + + // Byte 3 + sbrATY[1] = (sbyte)subInputStream.ReadByte(); + } + } + + private void ReadAmountOfSymbolInstances() + { + amountOfSymbolInstances = subInputStream.ReadBits(32) & 0xffffffff; + + // sanity check: don't decode more than one symbol per pixel + long pixels = (long)RegionInfo.BitmapWidth * (long)RegionInfo.BitmapHeight; + if (pixels < amountOfSymbolInstances) + { + amountOfSymbolInstances = pixels; + } + } + + private void GetSymbols() + { + if (segmentHeader.GetRtSegments() != null) + { + InitSymbols(); + } + } + + private void ComputeSymbolCodeLength() + { + if (isHuffmanEncoded) + { + SymbolIDCodeLengths(); + } + else + { + symbolCodeLength = (int)Math.Ceiling((Math.Log(amountOfSymbols) / Math.Log(2))); + } + } + + private void CheckInput() + { + if (!useRefinement) + { + if (sbrTemplate != 0) + { + sbrTemplate = 0; + } + } + + if (sbHuffFS == 2 || sbHuffRDWidth == 2 || sbHuffRDHeight == 2 || sbHuffRDX == 2 + || sbHuffRDY == 2) + { + throw new InvalidHeaderValueException( + "Huffman flag value of text region segment is not permitted"); + } + + if (!useRefinement) + { + if (sbHuffRSize != 0) + { + sbHuffRSize = 0; + } + if (sbHuffRDY != 0) + { + sbHuffRDY = 0; + } + if (sbHuffRDX != 0) + { + sbHuffRDX = 0; + } + if (sbHuffRDWidth != 0) + { + sbHuffRDWidth = 0; + } + if (sbHuffRDHeight != 0) + { + sbHuffRDHeight = 0; + } + } + } + + public Jbig2Bitmap GetRegionBitmap() + { + if (!isHuffmanEncoded) + { + SetCodingStatistics(); + } + + CreateRegionBitmap(); + DecodeSymbolInstances(); + + // 4) + return regionBitmap; + } + + private void SetCodingStatistics() + { + if (cxIADT == null) + { + cxIADT = new CX(512, 1); + } + + if (cxIAFS == null) + { + cxIAFS = new CX(512, 1); + } + + if (cxIADS == null) + { + cxIADS = new CX(512, 1); + } + + if (cxIAIT == null) + { + cxIAIT = new CX(512, 1); + } + + if (cxIARI == null) + { + cxIARI = new CX(512, 1); + } + + if (cxIARDW == null) + { + cxIARDW = new CX(512, 1); + } + + if (cxIARDH == null) + { + cxIARDH = new CX(512, 1); + } + + if (cxIAID == null) + { + cxIAID = new CX(1 << symbolCodeLength, 1); + } + + if (cxIARDX == null) + { + cxIARDX = new CX(512, 1); + } + + if (cxIARDY == null) + { + cxIARDY = new CX(512, 1); + } + + if (arithmeticDecoder == null) + { + arithmeticDecoder = new ArithmeticDecoder(subInputStream); + } + + if (integerDecoder == null) + { + integerDecoder = new ArithmeticIntegerDecoder(arithmeticDecoder); + } + } + + private void CreateRegionBitmap() + { + // 6.4.5 + regionBitmap = new Jbig2Bitmap(RegionInfo.BitmapWidth, RegionInfo.BitmapHeight); + + // 1) + if (defaultPixel != 0) + { + ArrayHelper.Fill(regionBitmap.GetByteArray(), (byte)0xff); + } + } + + private long DecodeStripT() + { + long stripT; + // 2) + if (isHuffmanEncoded) + { + // 6.4.6 + if (sbHuffDT == 3) + { + if (table == null) + { + int dtNr = 0; + + if (sbHuffFS == 3) + { + dtNr++; + } + + if (sbHuffDS == 3) + { + dtNr++; + } + + table = getUserTable(dtNr); + } + stripT = table.Decode(subInputStream); + } + else + { + stripT = StandardTables.getTable(11 + sbHuffDT).Decode(subInputStream); + } + } + else + { + stripT = integerDecoder.Decode(cxIADT); + } + + return stripT * -(sbStrips); + } + + private void DecodeSymbolInstances() + { + long stripT = DecodeStripT(); + + // Last two sentences of 6.4.5 2) + long firstS = 0; + long instanceCounter = 0; + + // 6.4.5 3 a) + while (instanceCounter < amountOfSymbolInstances) + { + long dT = DecodeDT(); + stripT += dT; + long dfS; + + // 3 c) symbol instances in the strip + bool first = true; + currentS = 0; + + // do until OOB + for (; ; ) + { + // 3 c) i) - first symbol instance in the strip + if (first) + { + // 6.4.7 + dfS = DecodeDfS(); + firstS += dfS; + currentS = firstS; + first = false; + // 3 c) ii) - the remaining symbol instances in the strip + } + else + { + // 6.4.8 + long idS = DecodeIdS(); + + // If result is OOB, then all the symbol instances in this strip have been decoded; proceed to step + // 3 d) respectively 3 b). Also exit, if the expected number of instances have been decoded. + // The latter exit condition guards against pathological cases where a strip's S never contains OOB + // and thus never terminates as illustrated in + // https://bugs.chromium.org/p/chromium/issues/detail?id=450971 case pdfium-loop2.pdf. + if (idS == long.MaxValue || instanceCounter >= amountOfSymbolInstances) + { + break; + } + + currentS += (idS + sbdsOffset); + } + + // 3 c) iii) + long currentT = DecodeCurrentT(); + long t = stripT + currentT; + + // 3 c) iv) + long id = DecodeID(); + + // 3 c) v) + long r = DecodeRI(); + // 6.4.11 + Jbig2Bitmap ib = DecodeIb(r, id); + + // vi) + Blit(ib, t); + + instanceCounter++; + } + } + } + + private long DecodeDT() + { + // 3) b) + // 6.4.6 + long dT; + if (isHuffmanEncoded) + { + if (sbHuffDT == 3) + { + dT = table.Decode(subInputStream); + } + else + { + dT = StandardTables.getTable(11 + sbHuffDT).Decode(subInputStream); + } + } + else + { + dT = integerDecoder.Decode(cxIADT); + } + + return (dT * sbStrips); + } + + private long DecodeDfS() + { + if (isHuffmanEncoded) + { + if (sbHuffFS == 3) + { + if (fsTable == null) + { + fsTable = getUserTable(0); + } + return fsTable.Decode(subInputStream); + } + else + { + return StandardTables.getTable(6 + sbHuffFS).Decode(subInputStream); + } + } + else + { + return integerDecoder.Decode(cxIAFS); + } + } + + private long DecodeIdS() + { + if (isHuffmanEncoded) + { + if (sbHuffDS == 3) + { + if (dsTable == null) + { + int dsNr = 0; + if (sbHuffFS == 3) + { + dsNr++; + } + + dsTable = getUserTable(dsNr); + } + return dsTable.Decode(subInputStream); + + } + else + { + return StandardTables.getTable(8 + sbHuffDS).Decode(subInputStream); + } + } + else + { + return integerDecoder.Decode(cxIADS); + } + } + + private long DecodeCurrentT() + { + if (sbStrips != 1) + { + if (isHuffmanEncoded) + { + return subInputStream.ReadBits(logSBStrips); + } + else + { + return integerDecoder.Decode(cxIAIT); + } + } + + return 0; + } + + private long DecodeID() + { + if (isHuffmanEncoded) + { + if (symbolCodeTable == null) + { + return subInputStream.ReadBits(symbolCodeLength); + } + + return symbolCodeTable.Decode(subInputStream); + } + else + { + return integerDecoder.DecodeIAID(cxIAID, symbolCodeLength); + } + } + + private long DecodeRI() + { + if (useRefinement) + { + if (isHuffmanEncoded) + { + return subInputStream.ReadBit(); + } + else + { + return integerDecoder.Decode(cxIARI); + } + } + return 0; + } + + private Jbig2Bitmap DecodeIb(long r, long id) + { + Jbig2Bitmap ib; + + if (r == 0) + { + ib = symbols[(int)id]; + } + else + { + // 1) - 4) + long rdw = DecodeRdw(); + long rdh = DecodeRdh(); + long rdx = DecodeRdx(); + long rdy = DecodeRdy(); + + // 5) + /* long symInRefSize = 0; */ + if (isHuffmanEncoded) + { + /* symInRefSize = */ + DecodeSymInRefSize(); + subInputStream.SkipBits(); + } + + // 6) + Jbig2Bitmap ibo = symbols[(int)id]; + int wo = ibo.Width; + int ho = ibo.Height; + + int genericRegionReferenceDX = (int)((rdw >> 1) + rdx); + int genericRegionReferenceDY = (int)((rdh >> 1) + rdy); + + if (genericRefinementRegion == null) + { + genericRefinementRegion = new GenericRefinementRegion(subInputStream); + } + + genericRefinementRegion.SetParameters(cx, arithmeticDecoder, sbrTemplate, + (int)(wo + rdw), (int)(ho + rdh), ibo, genericRegionReferenceDX, + genericRegionReferenceDY, false, sbrATX, sbrATY); + + ib = genericRefinementRegion.GetRegionBitmap(); + + // 7 + if (isHuffmanEncoded) + { + subInputStream.SkipBits(); + } + } + return ib; + } + + private long DecodeRdw() + { + if (isHuffmanEncoded) + { + if (sbHuffRDWidth == 3) + { + if (rdwTable == null) + { + int rdwNr = 0; + if (sbHuffFS == 3) + { + rdwNr++; + } + + if (sbHuffDS == 3) + { + rdwNr++; + } + + if (sbHuffDT == 3) + { + rdwNr++; + } + + rdwTable = getUserTable(rdwNr); + } + return rdwTable.Decode(subInputStream); + + } + else + { + return StandardTables.getTable(14 + sbHuffRDWidth).Decode(subInputStream); + } + } + else + { + return integerDecoder.Decode(cxIARDW); + } + } + + private long DecodeRdh() + { + if (isHuffmanEncoded) + { + if (sbHuffRDHeight == 3) + { + if (rdhTable == null) + { + int rdhNr = 0; + + if (sbHuffFS == 3) + { + rdhNr++; + } + + if (sbHuffDS == 3) + { + rdhNr++; + } + + if (sbHuffDT == 3) + { + rdhNr++; + } + + if (sbHuffRDWidth == 3) + { + rdhNr++; + } + + rdhTable = getUserTable(rdhNr); + } + return rdhTable.Decode(subInputStream); + } + else + { + return StandardTables.getTable(14 + sbHuffRDHeight).Decode(subInputStream); + } + } + else + { + return integerDecoder.Decode(cxIARDH); + } + } + + private long DecodeRdx() + { + if (isHuffmanEncoded) + { + if (sbHuffRDX == 3) + { + if (rdxTable == null) + { + int rdxNr = 0; + if (sbHuffFS == 3) + { + rdxNr++; + } + + if (sbHuffDS == 3) + { + rdxNr++; + } + + if (sbHuffDT == 3) + { + rdxNr++; + } + + if (sbHuffRDWidth == 3) + { + rdxNr++; + } + + if (sbHuffRDHeight == 3) + { + rdxNr++; + } + + rdxTable = getUserTable(rdxNr); + } + return rdxTable.Decode(subInputStream); + } + else + { + return StandardTables.getTable(14 + sbHuffRDX).Decode(subInputStream); + } + } + else + { + return integerDecoder.Decode(cxIARDX); + } + } + + private long DecodeRdy() + { + if (isHuffmanEncoded) + { + if (sbHuffRDY == 3) + { + if (rdyTable == null) + { + int rdyNr = 0; + if (sbHuffFS == 3) + { + rdyNr++; + } + + if (sbHuffDS == 3) + { + rdyNr++; + } + + if (sbHuffDT == 3) + { + rdyNr++; + } + + if (sbHuffRDWidth == 3) + { + rdyNr++; + } + + if (sbHuffRDHeight == 3) + { + rdyNr++; + } + + if (sbHuffRDX == 3) + { + rdyNr++; + } + + rdyTable = getUserTable(rdyNr); + } + return rdyTable.Decode(subInputStream); + } + else + { + return StandardTables.getTable(14 + sbHuffRDY).Decode(subInputStream); + } + } + else + { + return integerDecoder.Decode(cxIARDY); + } + } + + private long DecodeSymInRefSize() + { + if (sbHuffRSize == 0) + { + return StandardTables.getTable(1).Decode(subInputStream); + } + else + { + if (rSizeTable == null) + { + int rSizeNr = 0; + + if (sbHuffFS == 3) + { + rSizeNr++; + } + + if (sbHuffDS == 3) + { + rSizeNr++; + } + + if (sbHuffDT == 3) + { + rSizeNr++; + } + + if (sbHuffRDWidth == 3) + { + rSizeNr++; + } + + if (sbHuffRDHeight == 3) + { + rSizeNr++; + } + + if (sbHuffRDX == 3) + { + rSizeNr++; + } + + if (sbHuffRDY == 3) + { + rSizeNr++; + } + + rSizeTable = getUserTable(rSizeNr); + } + return rSizeTable.Decode(subInputStream); + } + + } + + private void Blit(Jbig2Bitmap ib, long t) + { + if (isTransposed == 0 && (referenceCorner == 2 || referenceCorner == 3)) + { + currentS += ib.Width - 1; + } + else if (isTransposed == 1 && (referenceCorner == 0 || referenceCorner == 2)) + { + currentS += ib.Height - 1; + } + + // vii) + long s = currentS; + + // viii) + if (isTransposed == 1) + { + long swap = t; + t = s; + s = swap; + } + + if (referenceCorner != 1) + { + if (referenceCorner == 0) + { + // BL + t -= ib.Height - 1; + } + else if (referenceCorner == 2) + { + // BR + t -= ib.Height - 1; + s -= ib.Width - 1; + } + else if (referenceCorner == 3) + { + // TR + s -= ib.Width - 1; + } + } + + Jbig2Bitmaps.Blit(ib, regionBitmap, (int)s, (int)t, combinationOperator); + + // x) + if (isTransposed == 0 && (referenceCorner == 0 || referenceCorner == 1)) + { + currentS += ib.Width - 1; + } + + if (isTransposed == 1 && (referenceCorner == 1 || referenceCorner == 3)) + { + currentS += ib.Height - 1; + } + + } + + private void InitSymbols() + { + foreach (SegmentHeader segment in segmentHeader.GetRtSegments()) + { + if (segment.SegmentType == 0) + { + SymbolDictionary sd = (SymbolDictionary)segment.GetSegmentData(); + + sd.cxIAID = cxIAID; + symbols.AddRange(sd.GetDictionary()); + } + } + amountOfSymbols = symbols.Count; + } + + private HuffmanTable getUserTable(int tablePosition) + { + int tableCounter = 0; + + foreach (SegmentHeader referredToSegmentHeader in segmentHeader.GetRtSegments()) + { + if (referredToSegmentHeader.SegmentType == 53) + { + if (tableCounter == tablePosition) + { + Table t = (Table)referredToSegmentHeader.GetSegmentData(); + return new EncodedTable(t); + } + else + { + tableCounter++; + } + } + } + return null; + } + + private void SymbolIDCodeLengths() + { + // 1) - 2) + List runCodeTable = new List(); + + for (int i = 0; i < 35; i++) + { + int prefLen = (int)(subInputStream.ReadBits(4) & 0xf); + if (prefLen > 0) + { + runCodeTable.Add(new Code(prefLen, 0, i, false)); + } + } + + HuffmanTable ht = new FixedSizeTable(runCodeTable); + + // 3) - 5) + long previousCodeLength = 0; + + int counter = 0; + List sbSymCodes = new List(); + while (counter < amountOfSymbols) + { + long code = ht.Decode(subInputStream); + if (code < 32) + { + if (code > 0) + { + sbSymCodes.Add(new Code((int)code, 0, counter, false)); + } + + previousCodeLength = code; + counter++; + } + else + { + + long runLength = 0; + long currCodeLength = 0; + if (code == 32) + { + runLength = 3 + subInputStream.ReadBits(2); + if (counter > 0) + { + currCodeLength = previousCodeLength; + } + } + else if (code == 33) + { + runLength = 3 + subInputStream.ReadBits(3); + } + else if (code == 34) + { + runLength = 11 + subInputStream.ReadBits(7); + } + + for (int j = 0; j < runLength; j++) + { + if (currCodeLength > 0) + { + sbSymCodes.Add(new Code((int)currCodeLength, 0, counter, false)); + } + counter++; + } + } + } + + // 6) - Skip over remaining bits in the last Byte read + subInputStream.SkipBits(); + + // 7) + symbolCodeTable = new FixedSizeTable(sbSymCodes); + + } + + public void Init(SegmentHeader header, SubInputStream sis) + { + segmentHeader = header; + subInputStream = sis; + RegionInfo = new RegionSegmentInformation(subInputStream); + ParseHeader(); + } + + internal void SetContexts(CX cx, CX cxIADT, CX cxIAFS, CX cxIADS, CX cxIAIT, CX cxIAID, + CX cxIARDW, CX cxIARDH, CX cxIARDX, CX cxIARDY) + { + this.cx = cx; + + this.cxIADT = cxIADT; + this.cxIAFS = cxIAFS; + this.cxIADS = cxIADS; + this.cxIAIT = cxIAIT; + + this.cxIAID = cxIAID; + + this.cxIARDW = cxIARDW; + this.cxIARDH = cxIARDH; + this.cxIARDX = cxIARDX; + this.cxIARDY = cxIARDY; + } + + internal void SetParameters(ArithmeticDecoder arithmeticDecoder, + ArithmeticIntegerDecoder iDecoder, bool isHuffmanEncoded, bool sbRefine, int sbw, + int sbh, long sbNumInstances, int sbStrips, int sbNumSyms, short sbDefaultPixel, + short sbCombinationOperator, short transposed, short refCorner, short sbdsOffset, + short sbHuffFS, short sbHuffDS, short sbHuffDT, short sbHuffRDWidth, + short sbHuffRDHeight, short sbHuffRDX, short sbHuffRDY, short sbHuffRSize, + short sbrTemplate, short[] sbrATX, short[] sbrATY, List sbSyms, + int sbSymCodeLen) + { + + this.arithmeticDecoder = arithmeticDecoder; + + this.integerDecoder = iDecoder; + + this.isHuffmanEncoded = isHuffmanEncoded; + this.useRefinement = sbRefine; + + this.RegionInfo.BitmapWidth = sbw; + this.RegionInfo.BitmapHeight = sbh; + + this.amountOfSymbolInstances = sbNumInstances; + this.sbStrips = sbStrips; + this.amountOfSymbols = sbNumSyms; + this.defaultPixel = sbDefaultPixel; + this.combinationOperator = CombinationOperators + .TranslateOperatorCodeToEnum(sbCombinationOperator); + this.isTransposed = transposed; + this.referenceCorner = refCorner; + this.sbdsOffset = sbdsOffset; + + this.sbHuffFS = sbHuffFS; + this.sbHuffDS = sbHuffDS; + this.sbHuffDT = sbHuffDT; + this.sbHuffRDWidth = sbHuffRDWidth; + this.sbHuffRDHeight = sbHuffRDHeight; + this.sbHuffRDX = sbHuffRDX; + this.sbHuffRDY = sbHuffRDY; + this.sbHuffRSize = sbHuffRSize; + + this.sbrTemplate = sbrTemplate; + this.sbrATX = sbrATX; + this.sbrATY = sbrATY; + + this.symbols = sbSyms; + this.symbolCodeLength = sbSymCodeLen; + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/Utils.cs b/src/UglyToad.PdfPig/Filters/Jbig2/Utils.cs new file mode 100644 index 000000000..82d44d2c4 --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/Utils.cs @@ -0,0 +1,32 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + using System; + + internal static class Utils + { + public static int HighestOneBit(this int number) + { + return (int)Math.Pow(2, Convert.ToString(number, 2).Length - 1); + } + + public static int GetMinY(this Jbig2Rectangle r) + { + return r.Y; + } + + public static int GetMaxY(this Jbig2Rectangle r) + { + return r.Y + r.Height; + } + + public static int GetMaxX(this Jbig2Rectangle r) + { + return r.X + r.Width; + } + + public static int GetMinX(this Jbig2Rectangle r) + { + return r.X; + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2/ValueNode.cs b/src/UglyToad.PdfPig/Filters/Jbig2/ValueNode.cs new file mode 100644 index 000000000..b9e0dff80 --- /dev/null +++ b/src/UglyToad.PdfPig/Filters/Jbig2/ValueNode.cs @@ -0,0 +1,46 @@ +namespace UglyToad.PdfPig.Filters.Jbig2 +{ + using static HuffmanTable; + + /// + /// Represents a value node in a Huffman tree. It is a leaf of a tree. + /// + internal sealed class ValueNode : Node + { + private readonly int rangeLength; + private readonly int rangeLow; + private readonly bool isLowerRange; + + public ValueNode(Code c) + { + rangeLength = c.RangeLength; + rangeLow = c.RangeLow; + isLowerRange = c.IsLowerRange; + } + + public override sealed long Decode(IImageInputStream iis) + { + if (isLowerRange) + { + // B.4 4) + return (rangeLow - iis.ReadBits(rangeLength)); + } + else + { + // B.4 5) + return rangeLow + iis.ReadBits(rangeLength); + } + } + + internal static string BitPattern(int v, int len) + { + var result = new char[len]; + for (int i = 1; i <= len; i++) + { + result[i - 1] = (v >> (len - i) & 1) != 0 ? '1' : '0'; + } + + return new string(result); + } + } +} diff --git a/src/UglyToad.PdfPig/Filters/Jbig2DecodeFilter.cs b/src/UglyToad.PdfPig/Filters/Jbig2DecodeFilter.cs index ee4c04418..3beed295f 100644 --- a/src/UglyToad.PdfPig/Filters/Jbig2DecodeFilter.cs +++ b/src/UglyToad.PdfPig/Filters/Jbig2DecodeFilter.cs @@ -1,19 +1,47 @@ namespace UglyToad.PdfPig.Filters { - using System; - using System.Collections.Generic; - using Tokens; - + using System.Collections.Generic; + using System.Linq; + using Tokens; + using UglyToad.PdfPig.Filters.Jbig2; + internal class Jbig2DecodeFilter : IFilter { /// - public bool IsSupported { get; } = false; + public bool IsSupported { get; } = true; /// public byte[] Decode(IReadOnlyList input, DictionaryToken streamDictionary, int filterIndex) - { - throw new NotSupportedException("The JBIG2 Filter for monochrome image data is not currently supported. " + - "Try accessing the raw compressed data directly."); + { + var decodeParms = DecodeParameterResolver.GetFilterParameters(streamDictionary, filterIndex); + Jbig2Document globalDocument = null; + if (decodeParms.TryGet(NameToken.Jbig2Globals, out StreamToken tok)) + { + globalDocument = new Jbig2Document(new ImageInputStream(tok.Data.ToArray())); + } + + using (var jbig2 = new Jbig2Document(new ImageInputStream(input.ToArray()), + globalDocument != null ? globalDocument.GetGlobalSegments() : null)) + { + var page = jbig2.GetPage(1); + var bitmap = page.GetBitmap(); + + var pageInfo = + (PageInformation)page.GetPageInformationSegment().GetSegmentData(); + + if (globalDocument != null) + { + globalDocument.Dispose(); + } + + var isImageMask = streamDictionary.ContainsKey(NameToken.ImageMask) || + streamDictionary.ContainsKey(NameToken.Im); + + // Invert bits if the default pixel value is black + return (pageInfo.DefaultPixelValue != 0 || isImageMask) ? + bitmap.GetByteArray().Select(x => (byte)~x).ToArray() : + bitmap.GetByteArray(); + } } } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Images/ColorSpaceDetailsByteConverter.cs b/src/UglyToad.PdfPig/Images/ColorSpaceDetailsByteConverter.cs index 163149a0a..aa2de6560 100644 --- a/src/UglyToad.PdfPig/Images/ColorSpaceDetailsByteConverter.cs +++ b/src/UglyToad.PdfPig/Images/ColorSpaceDetailsByteConverter.cs @@ -42,8 +42,8 @@ public static byte[] Convert(ColorSpaceDetails details, IReadOnlyList deco if (strideWidth != imageWidth) { decoded = RemoveStridePadding(decoded.ToArray(), strideWidth, imageWidth, imageHeight, bytesPerPixel); - } - + } + decoded = details.Transform(decoded); return decoded.ToArray(); diff --git a/src/UglyToad.PdfPig/Parser/FileStructure/XrefOffsetValidator.cs b/src/UglyToad.PdfPig/Parser/FileStructure/XrefOffsetValidator.cs index 55cd733b8..591af8e09 100644 --- a/src/UglyToad.PdfPig/Parser/FileStructure/XrefOffsetValidator.cs +++ b/src/UglyToad.PdfPig/Parser/FileStructure/XrefOffsetValidator.cs @@ -80,8 +80,6 @@ private long CalculateXRefFixedOffset(long objectOffset, ISeekableTokenScanner s return 0; } - - private long BruteForceSearchForXref(long xrefOffset, ISeekableTokenScanner scanner, IInputBytes reader) { long newOffset = -1; diff --git a/src/UglyToad.PdfPig/PdfExtensions.cs b/src/UglyToad.PdfPig/PdfExtensions.cs index 9d4154553..c52daa7ab 100644 --- a/src/UglyToad.PdfPig/PdfExtensions.cs +++ b/src/UglyToad.PdfPig/PdfExtensions.cs @@ -32,6 +32,17 @@ internal static bool TryGet(this DictionaryToken dictionary, NameToken name, return true; } + internal static T Get(this DictionaryToken dictionary, NameToken name) where T : class, IToken + { + if (!dictionary.TryGet(name, out var token) || !(token is T typedToken)) + { + throw new PdfDocumentFormatException($"Dictionary does not contain token with name {name} of type {typeof(T).Name}."); + } + + return typedToken; + } + + internal static T Get(this DictionaryToken dictionary, NameToken name, IPdfTokenScanner scanner) where T : class, IToken { if (!dictionary.TryGet(name, out var token) || !(token is T typedToken)) @@ -47,6 +58,49 @@ internal static T Get(this DictionaryToken dictionary, NameToken name, IPdfTo return typedToken; } + /// + /// Returns an equivalent token where any indirect references of child objects are + /// recursively traversed and resolved. + /// + internal static T Resolve(this T token, IPdfTokenScanner scanner) where T : IToken + { + return (T) ResolveInternal(token, scanner); + } + + private static IToken ResolveInternal(this IToken token, IPdfTokenScanner scanner) + { + if (token is StreamToken stream) + { + return new StreamToken(Resolve(stream.StreamDictionary, scanner), stream.Data); + } + + if (token is DictionaryToken dict) + { + var resolvedItems = new Dictionary(); + foreach (var kvp in dict.Data) + { + var value = kvp.Value is IndirectReferenceToken reference ? scanner.Get(reference.Data).Data : kvp.Value; + resolvedItems[NameToken.Create(kvp.Key)] = ResolveInternal(value, scanner); + } + + return new DictionaryToken(resolvedItems); + } + + if (token is ArrayToken arr) + { + var resolvedItems = new List(); + for (int i = 0; i < arr.Length; i++) + { + var value = arr.Data[i] is IndirectReferenceToken reference ? scanner.Get(reference.Data).Data : arr.Data[i]; + resolvedItems.Add(ResolveInternal(value, scanner)); + } + return new ArrayToken(resolvedItems); + } + + var val = token is IndirectReferenceToken tokenReference ? scanner.Get(tokenReference.Data).Data : token; + return val; + } + /// /// Get the decoded data from this stream. /// diff --git a/src/UglyToad.PdfPig/Util/ArrayHelper.cs b/src/UglyToad.PdfPig/Util/ArrayHelper.cs index fc72157ab..704a2d863 100644 --- a/src/UglyToad.PdfPig/Util/ArrayHelper.cs +++ b/src/UglyToad.PdfPig/Util/ArrayHelper.cs @@ -4,6 +4,11 @@ internal static class ArrayHelper { + public static void Fill(T[] array, T value) + { + Fill(array, 0, array.Length - 1, value); + } + public static void Fill(T[] array, int start, int end, T value) { if (array == null) diff --git a/src/UglyToad.PdfPig/XObjects/XObjectFactory.cs b/src/UglyToad.PdfPig/XObjects/XObjectFactory.cs index 0dc9ba995..e3b350d86 100644 --- a/src/UglyToad.PdfPig/XObjects/XObjectFactory.cs +++ b/src/UglyToad.PdfPig/XObjects/XObjectFactory.cs @@ -29,15 +29,15 @@ public static XObjectImage ReadImage(XObjectContentRecord xObject, IPdfTokenScan throw new InvalidOperationException($"Cannot create an image from an XObject with type: {xObject.Type}."); } - var dictionary = xObject.Stream.StreamDictionary; + var dictionary = xObject.Stream.StreamDictionary.Resolve(pdfScanner); var bounds = xObject.AppliedTransformation.Transform(new PdfRectangle(new PdfPoint(0, 0), new PdfPoint(1, 1))); - var width = dictionary.Get(NameToken.Width, pdfScanner).Int; - var height = dictionary.Get(NameToken.Height, pdfScanner).Int; + var width = dictionary.Get(NameToken.Width, pdfScanner).Int; + var height = dictionary.Get(NameToken.Height, pdfScanner).Int; - var isImageMask = dictionary.TryGet(NameToken.ImageMask, pdfScanner, out BooleanToken isMaskToken) - && isMaskToken.Data; + var isImageMask = dictionary.TryGet(NameToken.ImageMask, out BooleanToken isMaskToken) + && isMaskToken.Data; var isJpxDecode = dictionary.TryGet(NameToken.Filter, out var token) && token is NameToken filterName @@ -46,7 +46,7 @@ public static XObjectImage ReadImage(XObjectContentRecord xObject, IPdfTokenScan int bitsPerComponent = 0; if (!isImageMask && !isJpxDecode) { - if (!dictionary.TryGet(NameToken.BitsPerComponent, pdfScanner, out NumericToken bitsPerComponentToken)) + if (!dictionary.TryGet(NameToken.BitsPerComponent, out NumericToken bitsPerComponentToken)) { throw new PdfDocumentFormatException($"No bits per component defined for image: {dictionary}."); } @@ -64,45 +64,20 @@ public static XObjectImage ReadImage(XObjectContentRecord xObject, IPdfTokenScan intent = renderingIntentToken.Data.ToRenderingIntent(); } - var interpolate = dictionary.TryGet(NameToken.Interpolate, pdfScanner, out BooleanToken interpolateToken) - && interpolateToken.Data; - - DictionaryToken filterDictionary = xObject.Stream.StreamDictionary; - if (xObject.Stream.StreamDictionary.TryGet(NameToken.Filter, out var filterToken) - && filterToken is IndirectReferenceToken) - { - if (filterDictionary.TryGet(NameToken.Filter, pdfScanner, out ArrayToken filterArray)) - { - filterDictionary = filterDictionary.With(NameToken.Filter, filterArray); - } - else if (filterDictionary.TryGet(NameToken.Filter, pdfScanner, out NameToken filterNameToken)) - { - filterDictionary = filterDictionary.With(NameToken.Filter, filterNameToken); - } - else - { - filterDictionary = null; - } - } - - var supportsFilters = filterDictionary != null; - if (filterDictionary != null) - { - var filters = filterProvider.GetFilters(filterDictionary, pdfScanner); - foreach (var filter in filters) - { - if (!filter.IsSupported) - { - supportsFilters = false; - break; - } - } - } - - var decodeParams = dictionary.GetObjectOrDefault(NameToken.DecodeParms, NameToken.Dp); - if (decodeParams is IndirectReferenceToken refToken) - { - dictionary = dictionary.With(NameToken.DecodeParms, pdfScanner.Get(refToken.Data).Data); + var interpolate = dictionary.TryGet(NameToken.Interpolate, out BooleanToken interpolateToken) + && interpolateToken.Data; + + + var supportsFilters = true; + + var filters = filterProvider.GetFilters(dictionary, pdfScanner); + foreach (var filter in filters) + { + if (!filter.IsSupported) + { + supportsFilters = false; + break; + } } var streamToken = new StreamToken(dictionary, xObject.Stream.Data); @@ -118,17 +93,17 @@ public static XObjectImage ReadImage(XObjectContentRecord xObject, IPdfTokenScan .Select(x => x.Data) .ToArray(); } - + ColorSpaceDetails details = null; if (!isImageMask) { if (dictionary.TryGet(NameToken.ColorSpace, pdfScanner, out NameToken colorSpaceNameToken)) - { + { details = resourceStore.GetColorSpaceDetails(colorSpaceNameToken, dictionary); } else if (dictionary.TryGet(NameToken.ColorSpace, pdfScanner, out ArrayToken colorSpaceArrayToken) && colorSpaceArrayToken.Length > 0 && colorSpaceArrayToken.Data[0] is NameToken firstColorSpaceName) - { + { details = resourceStore.GetColorSpaceDetails(firstColorSpaceName, dictionary); } else if (!isJpxDecode)