web/lib/Zend/Pdf/Cmap/SegmentToDelta.php
changeset 64 162c1de6545a
parent 19 1c2f13fd785c
child 68 ecaf28ffe26e
equal deleted inserted replaced
63:5b37998e522e 64:162c1de6545a
       
     1 <?php
       
     2 /**
       
     3  * Zend Framework
       
     4  *
       
     5  * LICENSE
       
     6  *
       
     7  * This source file is subject to the new BSD license that is bundled
       
     8  * with this package in the file LICENSE.txt.
       
     9  * It is also available through the world-wide-web at this URL:
       
    10  * http://framework.zend.com/license/new-bsd
       
    11  * If you did not receive a copy of the license and are unable to
       
    12  * obtain it through the world-wide-web, please send an email
       
    13  * to license@zend.com so we can send you a copy immediately.
       
    14  *
       
    15  * @category   Zend
       
    16  * @package    Zend_Pdf
       
    17  * @subpackage Fonts
       
    18  * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
       
    19  * @license    http://framework.zend.com/license/new-bsd     New BSD License
       
    20  * @version    $Id: SegmentToDelta.php 20096 2010-01-06 02:05:09Z bkarwin $
       
    21  */
       
    22 
       
    23 /** Zend_Pdf_Cmap */
       
    24 require_once 'Zend/Pdf/Cmap.php';
       
    25 
       
    26 
       
    27 /**
       
    28  * Implements the "segment mapping to delta values" character map (type 4).
       
    29  *
       
    30  * This is the Microsoft standard mapping table type for OpenType fonts. It
       
    31  * provides the ability to cover multiple contiguous ranges of the Unicode
       
    32  * character set, with the exception of Unicode Surrogates (U+D800 - U+DFFF).
       
    33  *
       
    34  * @package    Zend_Pdf
       
    35  * @subpackage Fonts
       
    36  * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
       
    37  * @license    http://framework.zend.com/license/new-bsd     New BSD License
       
    38  */
       
    39 class Zend_Pdf_Cmap_SegmentToDelta extends Zend_Pdf_Cmap
       
    40 {
       
    41   /**** Instance Variables ****/
       
    42 
       
    43 
       
    44     /**
       
    45      * The number of segments in the table.
       
    46      * @var integer
       
    47      */
       
    48     protected $_segmentCount = 0;
       
    49 
       
    50     /**
       
    51      * The size of the binary search range for segments.
       
    52      * @var integer
       
    53      */
       
    54     protected $_searchRange = 0;
       
    55 
       
    56     /**
       
    57      * The number of binary search steps required to cover the entire search
       
    58      * range.
       
    59      * @var integer
       
    60      */
       
    61     protected $_searchIterations = 0;
       
    62 
       
    63     /**
       
    64      * Array of ending character codes for each segment.
       
    65      * @var array
       
    66      */
       
    67     protected $_segmentTableEndCodes = array();
       
    68 
       
    69     /**
       
    70      * The ending character code for the segment at the end of the low search
       
    71      * range.
       
    72      * @var integer
       
    73      */
       
    74     protected $_searchRangeEndCode = 0;
       
    75 
       
    76     /**
       
    77      * Array of starting character codes for each segment.
       
    78      * @var array
       
    79      */
       
    80     protected $_segmentTableStartCodes = array();
       
    81 
       
    82     /**
       
    83      * Array of character code to glyph delta values for each segment.
       
    84      * @var array
       
    85      */
       
    86     protected $_segmentTableIdDeltas = array();
       
    87 
       
    88     /**
       
    89      * Array of offsets into the glyph index array for each segment.
       
    90      * @var array
       
    91      */
       
    92     protected $_segmentTableIdRangeOffsets = array();
       
    93 
       
    94     /**
       
    95      * Glyph index array. Stores glyph numbers, used with range offset.
       
    96      * @var array
       
    97      */
       
    98     protected $_glyphIndexArray = array();
       
    99 
       
   100 
       
   101 
       
   102   /**** Public Interface ****/
       
   103 
       
   104 
       
   105   /* Concrete Class Implementation */
       
   106 
       
   107     /**
       
   108      * Returns an array of glyph numbers corresponding to the Unicode characters.
       
   109      *
       
   110      * If a particular character doesn't exist in this font, the special 'missing
       
   111      * character glyph' will be substituted.
       
   112      *
       
   113      * See also {@link glyphNumberForCharacter()}.
       
   114      *
       
   115      * @param array $characterCodes Array of Unicode character codes (code points).
       
   116      * @return array Array of glyph numbers.
       
   117      */
       
   118     public function glyphNumbersForCharacters($characterCodes)
       
   119     {
       
   120         $glyphNumbers = array();
       
   121         foreach ($characterCodes as $key => $characterCode) {
       
   122 
       
   123             /* These tables only cover the 16-bit character range.
       
   124              */
       
   125             if ($characterCode > 0xffff) {
       
   126                 $glyphNumbers[$key] = Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
       
   127                 continue;
       
   128             }
       
   129 
       
   130             /* Determine where to start the binary search. The segments are
       
   131              * ordered from lowest-to-highest. We are looking for the first
       
   132              * segment whose end code is greater than or equal to our character
       
   133              * code.
       
   134              *
       
   135              * If the end code at the top of the search range is larger, then
       
   136              * our target is probably below it.
       
   137              *
       
   138              * If it is smaller, our target is probably above it, so move the
       
   139              * search range to the end of the segment list.
       
   140              */
       
   141             if ($this->_searchRangeEndCode >= $characterCode) {
       
   142                 $searchIndex = $this->_searchRange;
       
   143             } else {
       
   144                 $searchIndex = $this->_segmentCount;
       
   145             }
       
   146 
       
   147             /* Now do a binary search to find the first segment whose end code
       
   148              * is greater or equal to our character code. No matter the number
       
   149              * of segments (there may be hundreds in a large font), we will only
       
   150              * need to perform $this->_searchIterations.
       
   151              */
       
   152             for ($i = 1; $i <= $this->_searchIterations; $i++) {
       
   153                 if ($this->_segmentTableEndCodes[$searchIndex] >= $characterCode) {
       
   154                     $subtableIndex = $searchIndex;
       
   155                     $searchIndex -= $this->_searchRange >> $i;
       
   156                 } else {
       
   157                     $searchIndex += $this->_searchRange >> $i;
       
   158                 }
       
   159             }
       
   160 
       
   161             /* If the segment's start code is greater than our character code,
       
   162              * that character is not represented in this font. Move on.
       
   163              */
       
   164             if ($this->_segmentTableStartCodes[$subtableIndex] > $characterCode) {
       
   165                 $glyphNumbers[$key] = Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
       
   166                 continue;
       
   167             }
       
   168 
       
   169             if ($this->_segmentTableIdRangeOffsets[$subtableIndex] == 0) {
       
   170                 /* This segment uses a simple mapping from character code to
       
   171                  * glyph number.
       
   172                  */
       
   173                 $glyphNumbers[$key] = ($characterCode + $this->_segmentTableIdDeltas[$subtableIndex]) % 65536;
       
   174 
       
   175             } else {
       
   176                 /* This segment relies on the glyph index array to determine the
       
   177                  * glyph number. The calculation below determines the correct
       
   178                  * index into that array. It's a little odd because the range
       
   179                  * offset in the font file is designed to quickly provide an
       
   180                  * address of the index in the raw binary data instead of the
       
   181                  * index itself. Since we've parsed the data into arrays, we
       
   182                  * must process it a bit differently.
       
   183                  */
       
   184                 $glyphIndex = ($characterCode - $this->_segmentTableStartCodes[$subtableIndex] +
       
   185                                $this->_segmentTableIdRangeOffsets[$subtableIndex] - $this->_segmentCount +
       
   186                                $subtableIndex - 1);
       
   187                 $glyphNumbers[$key] = $this->_glyphIndexArray[$glyphIndex];
       
   188 
       
   189             }
       
   190 
       
   191         }
       
   192         return $glyphNumbers;
       
   193     }
       
   194 
       
   195     /**
       
   196      * Returns the glyph number corresponding to the Unicode character.
       
   197      *
       
   198      * If a particular character doesn't exist in this font, the special 'missing
       
   199      * character glyph' will be substituted.
       
   200      *
       
   201      * See also {@link glyphNumbersForCharacters()} which is optimized for bulk
       
   202      * operations.
       
   203      *
       
   204      * @param integer $characterCode Unicode character code (code point).
       
   205      * @return integer Glyph number.
       
   206      */
       
   207     public function glyphNumberForCharacter($characterCode)
       
   208     {
       
   209         /* This code is pretty much a copy of glyphNumbersForCharacters().
       
   210          * See that method for inline documentation.
       
   211          */
       
   212 
       
   213         if ($characterCode > 0xffff) {
       
   214             return Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
       
   215         }
       
   216 
       
   217         if ($this->_searchRangeEndCode >= $characterCode) {
       
   218             $searchIndex = $this->_searchRange;
       
   219         } else {
       
   220             $searchIndex = $this->_segmentCount;
       
   221         }
       
   222 
       
   223         for ($i = 1; $i <= $this->_searchIterations; $i++) {
       
   224             if ($this->_segmentTableEndCodes[$searchIndex] >= $characterCode) {
       
   225                 $subtableIndex = $searchIndex;
       
   226                 $searchIndex -= $this->_searchRange >> $i;
       
   227             } else {
       
   228                 $searchIndex += $this->_searchRange >> $i;
       
   229             }
       
   230         }
       
   231 
       
   232         if ($this->_segmentTableStartCodes[$subtableIndex] > $characterCode) {
       
   233             return Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH;
       
   234         }
       
   235 
       
   236         if ($this->_segmentTableIdRangeOffsets[$subtableIndex] == 0) {
       
   237             $glyphNumber = ($characterCode + $this->_segmentTableIdDeltas[$subtableIndex]) % 65536;
       
   238         } else {
       
   239             $glyphIndex = ($characterCode - $this->_segmentTableStartCodes[$subtableIndex] +
       
   240                            $this->_segmentTableIdRangeOffsets[$subtableIndex] - $this->_segmentCount +
       
   241                            $subtableIndex - 1);
       
   242             $glyphNumber = $this->_glyphIndexArray[$glyphIndex];
       
   243         }
       
   244         return $glyphNumber;
       
   245     }
       
   246 
       
   247     /**
       
   248      * Returns an array containing the Unicode characters that have entries in
       
   249      * this character map.
       
   250      *
       
   251      * @return array Unicode character codes.
       
   252      */
       
   253     public function getCoveredCharacters()
       
   254     {
       
   255         $characterCodes = array();
       
   256         for ($i = 1; $i <= $this->_segmentCount; $i++) {
       
   257             for ($code = $this->_segmentTableStartCodes[$i]; $code <= $this->_segmentTableEndCodes[$i]; $code++) {
       
   258                 $characterCodes[] = $code;
       
   259             }
       
   260         }
       
   261         return $characterCodes;
       
   262     }
       
   263 
       
   264 
       
   265     /**
       
   266      * Returns an array containing the glyphs numbers that have entries in this character map.
       
   267      * Keys are Unicode character codes (integers)
       
   268      *
       
   269      * This functionality is partially covered by glyphNumbersForCharacters(getCoveredCharacters())
       
   270      * call, but this method do it in more effective way (prepare complete list instead of searching
       
   271      * glyph for each character code).
       
   272      *
       
   273      * @internal
       
   274      * @return array Array representing <Unicode character code> => <glyph number> pairs.
       
   275      */
       
   276     public function getCoveredCharactersGlyphs()
       
   277     {
       
   278         $glyphNumbers = array();
       
   279 
       
   280         for ($segmentNum = 1; $segmentNum <= $this->_segmentCount; $segmentNum++) {
       
   281             if ($this->_segmentTableIdRangeOffsets[$segmentNum] == 0) {
       
   282                 $delta = $this->_segmentTableIdDeltas[$segmentNum];
       
   283 
       
   284                 for ($code =  $this->_segmentTableStartCodes[$segmentNum];
       
   285                      $code <= $this->_segmentTableEndCodes[$segmentNum];
       
   286                      $code++) {
       
   287                     $glyphNumbers[$code] = ($code + $delta) % 65536;
       
   288                 }
       
   289             } else {
       
   290                 $code       = $this->_segmentTableStartCodes[$segmentNum];
       
   291                 $glyphIndex = $this->_segmentTableIdRangeOffsets[$segmentNum] - ($this->_segmentCount - $segmentNum) - 1;
       
   292 
       
   293                 while ($code <= $this->_segmentTableEndCodes[$segmentNum]) {
       
   294                     $glyphNumbers[$code] = $this->_glyphIndexArray[$glyphIndex];
       
   295 
       
   296                     $code++;
       
   297                     $glyphIndex++;
       
   298                 }
       
   299             }
       
   300         }
       
   301 
       
   302         return $glyphNumbers;
       
   303     }
       
   304 
       
   305 
       
   306 
       
   307   /* Object Lifecycle */
       
   308 
       
   309     /**
       
   310      * Object constructor
       
   311      *
       
   312      * Parses the raw binary table data. Throws an exception if the table is
       
   313      * malformed.
       
   314      *
       
   315      * @param string $cmapData Raw binary cmap table data.
       
   316      * @throws Zend_Pdf_Exception
       
   317      */
       
   318     public function __construct($cmapData)
       
   319     {
       
   320         /* Sanity check: The table should be at least 23 bytes in size.
       
   321          */
       
   322         $actualLength = strlen($cmapData);
       
   323         if ($actualLength < 23) {
       
   324             require_once 'Zend/Pdf/Exception.php';
       
   325             throw new Zend_Pdf_Exception('Insufficient table data',
       
   326                                          Zend_Pdf_Exception::CMAP_TABLE_DATA_TOO_SMALL);
       
   327         }
       
   328 
       
   329         /* Sanity check: Make sure this is right data for this table type.
       
   330          */
       
   331         $type = $this->_extractUInt2($cmapData, 0);
       
   332         if ($type != Zend_Pdf_Cmap::TYPE_SEGMENT_TO_DELTA) {
       
   333             require_once 'Zend/Pdf/Exception.php';
       
   334             throw new Zend_Pdf_Exception('Wrong cmap table type',
       
   335                                          Zend_Pdf_Exception::CMAP_WRONG_TABLE_TYPE);
       
   336         }
       
   337 
       
   338         $length = $this->_extractUInt2($cmapData, 2);
       
   339         if ($length != $actualLength) {
       
   340             require_once 'Zend/Pdf/Exception.php';
       
   341             throw new Zend_Pdf_Exception("Table length ($length) does not match actual length ($actualLength)",
       
   342                                          Zend_Pdf_Exception::CMAP_WRONG_TABLE_LENGTH);
       
   343         }
       
   344 
       
   345         /* Mapping tables should be language-independent. The font may not work
       
   346          * as expected if they are not. Unfortunately, many font files in the
       
   347          * wild incorrectly record a language ID in this field, so we can't
       
   348          * call this a failure.
       
   349          */
       
   350         $language = $this->_extractUInt2($cmapData, 4);
       
   351         if ($language != 0) {
       
   352             // Record a warning here somehow?
       
   353         }
       
   354 
       
   355         /* These two values are stored premultiplied by two which is convienent
       
   356          * when using the binary data directly, but we're parsing it out to
       
   357          * native PHP data types, so divide by two.
       
   358          */
       
   359         $this->_segmentCount = $this->_extractUInt2($cmapData, 6) >> 1;
       
   360         $this->_searchRange  = $this->_extractUInt2($cmapData, 8) >> 1;
       
   361 
       
   362         $this->_searchIterations = $this->_extractUInt2($cmapData, 10) + 1;
       
   363 
       
   364         $offset = 14;
       
   365         for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
       
   366             $this->_segmentTableEndCodes[$i] = $this->_extractUInt2($cmapData, $offset);
       
   367         }
       
   368 
       
   369         $this->_searchRangeEndCode = $this->_segmentTableEndCodes[$this->_searchRange];
       
   370 
       
   371         $offset += 2;    // reserved bytes
       
   372 
       
   373         for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
       
   374             $this->_segmentTableStartCodes[$i] = $this->_extractUInt2($cmapData, $offset);
       
   375         }
       
   376 
       
   377         for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
       
   378             $this->_segmentTableIdDeltas[$i] = $this->_extractInt2($cmapData, $offset);    // signed
       
   379         }
       
   380 
       
   381         /* The range offset helps determine the index into the glyph index array.
       
   382          * Like the segment count and search range above, it's stored as a byte
       
   383          * multiple in the font, so divide by two as we extract the values.
       
   384          */
       
   385         for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) {
       
   386             $this->_segmentTableIdRangeOffsets[$i] = $this->_extractUInt2($cmapData, $offset) >> 1;
       
   387         }
       
   388 
       
   389         /* The size of the glyph index array varies by font and depends on the
       
   390          * extent of the usage of range offsets versus deltas. Some fonts may
       
   391          * not have any entries in this array.
       
   392          */
       
   393         for (; $offset < $length; $offset += 2) {
       
   394             $this->_glyphIndexArray[] = $this->_extractUInt2($cmapData, $offset);
       
   395         }
       
   396 
       
   397         /* Sanity check: After reading all of the data, we should be at the end
       
   398          * of the table.
       
   399          */
       
   400         if ($offset != $length) {
       
   401             require_once 'Zend/Pdf/Exception.php';
       
   402             throw new Zend_Pdf_Exception("Ending offset ($offset) does not match length ($length)",
       
   403                                          Zend_Pdf_Exception::CMAP_FINAL_OFFSET_NOT_LENGTH);
       
   404         }
       
   405     }
       
   406 
       
   407 }