diff -r 5b37998e522e -r 162c1de6545a web/lib/Zend/Pdf/Cmap/SegmentToDelta.php --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/web/lib/Zend/Pdf/Cmap/SegmentToDelta.php Fri Mar 11 15:05:35 2011 +0100 @@ -0,0 +1,407 @@ + $characterCode) { + + /* These tables only cover the 16-bit character range. + */ + if ($characterCode > 0xffff) { + $glyphNumbers[$key] = Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH; + continue; + } + + /* Determine where to start the binary search. The segments are + * ordered from lowest-to-highest. We are looking for the first + * segment whose end code is greater than or equal to our character + * code. + * + * If the end code at the top of the search range is larger, then + * our target is probably below it. + * + * If it is smaller, our target is probably above it, so move the + * search range to the end of the segment list. + */ + if ($this->_searchRangeEndCode >= $characterCode) { + $searchIndex = $this->_searchRange; + } else { + $searchIndex = $this->_segmentCount; + } + + /* Now do a binary search to find the first segment whose end code + * is greater or equal to our character code. No matter the number + * of segments (there may be hundreds in a large font), we will only + * need to perform $this->_searchIterations. + */ + for ($i = 1; $i <= $this->_searchIterations; $i++) { + if ($this->_segmentTableEndCodes[$searchIndex] >= $characterCode) { + $subtableIndex = $searchIndex; + $searchIndex -= $this->_searchRange >> $i; + } else { + $searchIndex += $this->_searchRange >> $i; + } + } + + /* If the segment's start code is greater than our character code, + * that character is not represented in this font. Move on. + */ + if ($this->_segmentTableStartCodes[$subtableIndex] > $characterCode) { + $glyphNumbers[$key] = Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH; + continue; + } + + if ($this->_segmentTableIdRangeOffsets[$subtableIndex] == 0) { + /* This segment uses a simple mapping from character code to + * glyph number. + */ + $glyphNumbers[$key] = ($characterCode + $this->_segmentTableIdDeltas[$subtableIndex]) % 65536; + + } else { + /* This segment relies on the glyph index array to determine the + * glyph number. The calculation below determines the correct + * index into that array. It's a little odd because the range + * offset in the font file is designed to quickly provide an + * address of the index in the raw binary data instead of the + * index itself. Since we've parsed the data into arrays, we + * must process it a bit differently. + */ + $glyphIndex = ($characterCode - $this->_segmentTableStartCodes[$subtableIndex] + + $this->_segmentTableIdRangeOffsets[$subtableIndex] - $this->_segmentCount + + $subtableIndex - 1); + $glyphNumbers[$key] = $this->_glyphIndexArray[$glyphIndex]; + + } + + } + return $glyphNumbers; + } + + /** + * Returns the glyph number corresponding to the Unicode character. + * + * If a particular character doesn't exist in this font, the special 'missing + * character glyph' will be substituted. + * + * See also {@link glyphNumbersForCharacters()} which is optimized for bulk + * operations. + * + * @param integer $characterCode Unicode character code (code point). + * @return integer Glyph number. + */ + public function glyphNumberForCharacter($characterCode) + { + /* This code is pretty much a copy of glyphNumbersForCharacters(). + * See that method for inline documentation. + */ + + if ($characterCode > 0xffff) { + return Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH; + } + + if ($this->_searchRangeEndCode >= $characterCode) { + $searchIndex = $this->_searchRange; + } else { + $searchIndex = $this->_segmentCount; + } + + for ($i = 1; $i <= $this->_searchIterations; $i++) { + if ($this->_segmentTableEndCodes[$searchIndex] >= $characterCode) { + $subtableIndex = $searchIndex; + $searchIndex -= $this->_searchRange >> $i; + } else { + $searchIndex += $this->_searchRange >> $i; + } + } + + if ($this->_segmentTableStartCodes[$subtableIndex] > $characterCode) { + return Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH; + } + + if ($this->_segmentTableIdRangeOffsets[$subtableIndex] == 0) { + $glyphNumber = ($characterCode + $this->_segmentTableIdDeltas[$subtableIndex]) % 65536; + } else { + $glyphIndex = ($characterCode - $this->_segmentTableStartCodes[$subtableIndex] + + $this->_segmentTableIdRangeOffsets[$subtableIndex] - $this->_segmentCount + + $subtableIndex - 1); + $glyphNumber = $this->_glyphIndexArray[$glyphIndex]; + } + return $glyphNumber; + } + + /** + * Returns an array containing the Unicode characters that have entries in + * this character map. + * + * @return array Unicode character codes. + */ + public function getCoveredCharacters() + { + $characterCodes = array(); + for ($i = 1; $i <= $this->_segmentCount; $i++) { + for ($code = $this->_segmentTableStartCodes[$i]; $code <= $this->_segmentTableEndCodes[$i]; $code++) { + $characterCodes[] = $code; + } + } + return $characterCodes; + } + + + /** + * Returns an array containing the glyphs numbers that have entries in this character map. + * Keys are Unicode character codes (integers) + * + * This functionality is partially covered by glyphNumbersForCharacters(getCoveredCharacters()) + * call, but this method do it in more effective way (prepare complete list instead of searching + * glyph for each character code). + * + * @internal + * @return array Array representing => pairs. + */ + public function getCoveredCharactersGlyphs() + { + $glyphNumbers = array(); + + for ($segmentNum = 1; $segmentNum <= $this->_segmentCount; $segmentNum++) { + if ($this->_segmentTableIdRangeOffsets[$segmentNum] == 0) { + $delta = $this->_segmentTableIdDeltas[$segmentNum]; + + for ($code = $this->_segmentTableStartCodes[$segmentNum]; + $code <= $this->_segmentTableEndCodes[$segmentNum]; + $code++) { + $glyphNumbers[$code] = ($code + $delta) % 65536; + } + } else { + $code = $this->_segmentTableStartCodes[$segmentNum]; + $glyphIndex = $this->_segmentTableIdRangeOffsets[$segmentNum] - ($this->_segmentCount - $segmentNum) - 1; + + while ($code <= $this->_segmentTableEndCodes[$segmentNum]) { + $glyphNumbers[$code] = $this->_glyphIndexArray[$glyphIndex]; + + $code++; + $glyphIndex++; + } + } + } + + return $glyphNumbers; + } + + + + /* Object Lifecycle */ + + /** + * Object constructor + * + * Parses the raw binary table data. Throws an exception if the table is + * malformed. + * + * @param string $cmapData Raw binary cmap table data. + * @throws Zend_Pdf_Exception + */ + public function __construct($cmapData) + { + /* Sanity check: The table should be at least 23 bytes in size. + */ + $actualLength = strlen($cmapData); + if ($actualLength < 23) { + require_once 'Zend/Pdf/Exception.php'; + throw new Zend_Pdf_Exception('Insufficient table data', + Zend_Pdf_Exception::CMAP_TABLE_DATA_TOO_SMALL); + } + + /* Sanity check: Make sure this is right data for this table type. + */ + $type = $this->_extractUInt2($cmapData, 0); + if ($type != Zend_Pdf_Cmap::TYPE_SEGMENT_TO_DELTA) { + require_once 'Zend/Pdf/Exception.php'; + throw new Zend_Pdf_Exception('Wrong cmap table type', + Zend_Pdf_Exception::CMAP_WRONG_TABLE_TYPE); + } + + $length = $this->_extractUInt2($cmapData, 2); + if ($length != $actualLength) { + require_once 'Zend/Pdf/Exception.php'; + throw new Zend_Pdf_Exception("Table length ($length) does not match actual length ($actualLength)", + Zend_Pdf_Exception::CMAP_WRONG_TABLE_LENGTH); + } + + /* Mapping tables should be language-independent. The font may not work + * as expected if they are not. Unfortunately, many font files in the + * wild incorrectly record a language ID in this field, so we can't + * call this a failure. + */ + $language = $this->_extractUInt2($cmapData, 4); + if ($language != 0) { + // Record a warning here somehow? + } + + /* These two values are stored premultiplied by two which is convienent + * when using the binary data directly, but we're parsing it out to + * native PHP data types, so divide by two. + */ + $this->_segmentCount = $this->_extractUInt2($cmapData, 6) >> 1; + $this->_searchRange = $this->_extractUInt2($cmapData, 8) >> 1; + + $this->_searchIterations = $this->_extractUInt2($cmapData, 10) + 1; + + $offset = 14; + for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) { + $this->_segmentTableEndCodes[$i] = $this->_extractUInt2($cmapData, $offset); + } + + $this->_searchRangeEndCode = $this->_segmentTableEndCodes[$this->_searchRange]; + + $offset += 2; // reserved bytes + + for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) { + $this->_segmentTableStartCodes[$i] = $this->_extractUInt2($cmapData, $offset); + } + + for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) { + $this->_segmentTableIdDeltas[$i] = $this->_extractInt2($cmapData, $offset); // signed + } + + /* The range offset helps determine the index into the glyph index array. + * Like the segment count and search range above, it's stored as a byte + * multiple in the font, so divide by two as we extract the values. + */ + for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) { + $this->_segmentTableIdRangeOffsets[$i] = $this->_extractUInt2($cmapData, $offset) >> 1; + } + + /* The size of the glyph index array varies by font and depends on the + * extent of the usage of range offsets versus deltas. Some fonts may + * not have any entries in this array. + */ + for (; $offset < $length; $offset += 2) { + $this->_glyphIndexArray[] = $this->_extractUInt2($cmapData, $offset); + } + + /* Sanity check: After reading all of the data, we should be at the end + * of the table. + */ + if ($offset != $length) { + require_once 'Zend/Pdf/Exception.php'; + throw new Zend_Pdf_Exception("Ending offset ($offset) does not match length ($length)", + Zend_Pdf_Exception::CMAP_FINAL_OFFSET_NOT_LENGTH); + } + } + +}