|
1 <?php |
|
2 /** |
|
3 * Zend Framework |
|
4 * |
|
5 * LICENSE |
|
6 * |
|
7 * This source file is subject to the new BSD license that is bundled |
|
8 * with this package in the file LICENSE.txt. |
|
9 * It is also available through the world-wide-web at this URL: |
|
10 * http://framework.zend.com/license/new-bsd |
|
11 * If you did not receive a copy of the license and are unable to |
|
12 * obtain it through the world-wide-web, please send an email |
|
13 * to license@zend.com so we can send you a copy immediately. |
|
14 * |
|
15 * @category Zend |
|
16 * @package Zend_Pdf |
|
17 * @subpackage Fonts |
|
18 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
19 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
20 * @version $Id: SegmentToDelta.php 20096 2010-01-06 02:05:09Z bkarwin $ |
|
21 */ |
|
22 |
|
23 /** Zend_Pdf_Cmap */ |
|
24 require_once 'Zend/Pdf/Cmap.php'; |
|
25 |
|
26 |
|
27 /** |
|
28 * Implements the "segment mapping to delta values" character map (type 4). |
|
29 * |
|
30 * This is the Microsoft standard mapping table type for OpenType fonts. It |
|
31 * provides the ability to cover multiple contiguous ranges of the Unicode |
|
32 * character set, with the exception of Unicode Surrogates (U+D800 - U+DFFF). |
|
33 * |
|
34 * @package Zend_Pdf |
|
35 * @subpackage Fonts |
|
36 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
37 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
38 */ |
|
39 class Zend_Pdf_Cmap_SegmentToDelta extends Zend_Pdf_Cmap |
|
40 { |
|
41 /**** Instance Variables ****/ |
|
42 |
|
43 |
|
44 /** |
|
45 * The number of segments in the table. |
|
46 * @var integer |
|
47 */ |
|
48 protected $_segmentCount = 0; |
|
49 |
|
50 /** |
|
51 * The size of the binary search range for segments. |
|
52 * @var integer |
|
53 */ |
|
54 protected $_searchRange = 0; |
|
55 |
|
56 /** |
|
57 * The number of binary search steps required to cover the entire search |
|
58 * range. |
|
59 * @var integer |
|
60 */ |
|
61 protected $_searchIterations = 0; |
|
62 |
|
63 /** |
|
64 * Array of ending character codes for each segment. |
|
65 * @var array |
|
66 */ |
|
67 protected $_segmentTableEndCodes = array(); |
|
68 |
|
69 /** |
|
70 * The ending character code for the segment at the end of the low search |
|
71 * range. |
|
72 * @var integer |
|
73 */ |
|
74 protected $_searchRangeEndCode = 0; |
|
75 |
|
76 /** |
|
77 * Array of starting character codes for each segment. |
|
78 * @var array |
|
79 */ |
|
80 protected $_segmentTableStartCodes = array(); |
|
81 |
|
82 /** |
|
83 * Array of character code to glyph delta values for each segment. |
|
84 * @var array |
|
85 */ |
|
86 protected $_segmentTableIdDeltas = array(); |
|
87 |
|
88 /** |
|
89 * Array of offsets into the glyph index array for each segment. |
|
90 * @var array |
|
91 */ |
|
92 protected $_segmentTableIdRangeOffsets = array(); |
|
93 |
|
94 /** |
|
95 * Glyph index array. Stores glyph numbers, used with range offset. |
|
96 * @var array |
|
97 */ |
|
98 protected $_glyphIndexArray = array(); |
|
99 |
|
100 |
|
101 |
|
102 /**** Public Interface ****/ |
|
103 |
|
104 |
|
105 /* Concrete Class Implementation */ |
|
106 |
|
107 /** |
|
108 * Returns an array of glyph numbers corresponding to the Unicode characters. |
|
109 * |
|
110 * If a particular character doesn't exist in this font, the special 'missing |
|
111 * character glyph' will be substituted. |
|
112 * |
|
113 * See also {@link glyphNumberForCharacter()}. |
|
114 * |
|
115 * @param array $characterCodes Array of Unicode character codes (code points). |
|
116 * @return array Array of glyph numbers. |
|
117 */ |
|
118 public function glyphNumbersForCharacters($characterCodes) |
|
119 { |
|
120 $glyphNumbers = array(); |
|
121 foreach ($characterCodes as $key => $characterCode) { |
|
122 |
|
123 /* These tables only cover the 16-bit character range. |
|
124 */ |
|
125 if ($characterCode > 0xffff) { |
|
126 $glyphNumbers[$key] = Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH; |
|
127 continue; |
|
128 } |
|
129 |
|
130 /* Determine where to start the binary search. The segments are |
|
131 * ordered from lowest-to-highest. We are looking for the first |
|
132 * segment whose end code is greater than or equal to our character |
|
133 * code. |
|
134 * |
|
135 * If the end code at the top of the search range is larger, then |
|
136 * our target is probably below it. |
|
137 * |
|
138 * If it is smaller, our target is probably above it, so move the |
|
139 * search range to the end of the segment list. |
|
140 */ |
|
141 if ($this->_searchRangeEndCode >= $characterCode) { |
|
142 $searchIndex = $this->_searchRange; |
|
143 } else { |
|
144 $searchIndex = $this->_segmentCount; |
|
145 } |
|
146 |
|
147 /* Now do a binary search to find the first segment whose end code |
|
148 * is greater or equal to our character code. No matter the number |
|
149 * of segments (there may be hundreds in a large font), we will only |
|
150 * need to perform $this->_searchIterations. |
|
151 */ |
|
152 for ($i = 1; $i <= $this->_searchIterations; $i++) { |
|
153 if ($this->_segmentTableEndCodes[$searchIndex] >= $characterCode) { |
|
154 $subtableIndex = $searchIndex; |
|
155 $searchIndex -= $this->_searchRange >> $i; |
|
156 } else { |
|
157 $searchIndex += $this->_searchRange >> $i; |
|
158 } |
|
159 } |
|
160 |
|
161 /* If the segment's start code is greater than our character code, |
|
162 * that character is not represented in this font. Move on. |
|
163 */ |
|
164 if ($this->_segmentTableStartCodes[$subtableIndex] > $characterCode) { |
|
165 $glyphNumbers[$key] = Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH; |
|
166 continue; |
|
167 } |
|
168 |
|
169 if ($this->_segmentTableIdRangeOffsets[$subtableIndex] == 0) { |
|
170 /* This segment uses a simple mapping from character code to |
|
171 * glyph number. |
|
172 */ |
|
173 $glyphNumbers[$key] = ($characterCode + $this->_segmentTableIdDeltas[$subtableIndex]) % 65536; |
|
174 |
|
175 } else { |
|
176 /* This segment relies on the glyph index array to determine the |
|
177 * glyph number. The calculation below determines the correct |
|
178 * index into that array. It's a little odd because the range |
|
179 * offset in the font file is designed to quickly provide an |
|
180 * address of the index in the raw binary data instead of the |
|
181 * index itself. Since we've parsed the data into arrays, we |
|
182 * must process it a bit differently. |
|
183 */ |
|
184 $glyphIndex = ($characterCode - $this->_segmentTableStartCodes[$subtableIndex] + |
|
185 $this->_segmentTableIdRangeOffsets[$subtableIndex] - $this->_segmentCount + |
|
186 $subtableIndex - 1); |
|
187 $glyphNumbers[$key] = $this->_glyphIndexArray[$glyphIndex]; |
|
188 |
|
189 } |
|
190 |
|
191 } |
|
192 return $glyphNumbers; |
|
193 } |
|
194 |
|
195 /** |
|
196 * Returns the glyph number corresponding to the Unicode character. |
|
197 * |
|
198 * If a particular character doesn't exist in this font, the special 'missing |
|
199 * character glyph' will be substituted. |
|
200 * |
|
201 * See also {@link glyphNumbersForCharacters()} which is optimized for bulk |
|
202 * operations. |
|
203 * |
|
204 * @param integer $characterCode Unicode character code (code point). |
|
205 * @return integer Glyph number. |
|
206 */ |
|
207 public function glyphNumberForCharacter($characterCode) |
|
208 { |
|
209 /* This code is pretty much a copy of glyphNumbersForCharacters(). |
|
210 * See that method for inline documentation. |
|
211 */ |
|
212 |
|
213 if ($characterCode > 0xffff) { |
|
214 return Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH; |
|
215 } |
|
216 |
|
217 if ($this->_searchRangeEndCode >= $characterCode) { |
|
218 $searchIndex = $this->_searchRange; |
|
219 } else { |
|
220 $searchIndex = $this->_segmentCount; |
|
221 } |
|
222 |
|
223 for ($i = 1; $i <= $this->_searchIterations; $i++) { |
|
224 if ($this->_segmentTableEndCodes[$searchIndex] >= $characterCode) { |
|
225 $subtableIndex = $searchIndex; |
|
226 $searchIndex -= $this->_searchRange >> $i; |
|
227 } else { |
|
228 $searchIndex += $this->_searchRange >> $i; |
|
229 } |
|
230 } |
|
231 |
|
232 if ($this->_segmentTableStartCodes[$subtableIndex] > $characterCode) { |
|
233 return Zend_Pdf_Cmap::MISSING_CHARACTER_GLYPH; |
|
234 } |
|
235 |
|
236 if ($this->_segmentTableIdRangeOffsets[$subtableIndex] == 0) { |
|
237 $glyphNumber = ($characterCode + $this->_segmentTableIdDeltas[$subtableIndex]) % 65536; |
|
238 } else { |
|
239 $glyphIndex = ($characterCode - $this->_segmentTableStartCodes[$subtableIndex] + |
|
240 $this->_segmentTableIdRangeOffsets[$subtableIndex] - $this->_segmentCount + |
|
241 $subtableIndex - 1); |
|
242 $glyphNumber = $this->_glyphIndexArray[$glyphIndex]; |
|
243 } |
|
244 return $glyphNumber; |
|
245 } |
|
246 |
|
247 /** |
|
248 * Returns an array containing the Unicode characters that have entries in |
|
249 * this character map. |
|
250 * |
|
251 * @return array Unicode character codes. |
|
252 */ |
|
253 public function getCoveredCharacters() |
|
254 { |
|
255 $characterCodes = array(); |
|
256 for ($i = 1; $i <= $this->_segmentCount; $i++) { |
|
257 for ($code = $this->_segmentTableStartCodes[$i]; $code <= $this->_segmentTableEndCodes[$i]; $code++) { |
|
258 $characterCodes[] = $code; |
|
259 } |
|
260 } |
|
261 return $characterCodes; |
|
262 } |
|
263 |
|
264 |
|
265 /** |
|
266 * Returns an array containing the glyphs numbers that have entries in this character map. |
|
267 * Keys are Unicode character codes (integers) |
|
268 * |
|
269 * This functionality is partially covered by glyphNumbersForCharacters(getCoveredCharacters()) |
|
270 * call, but this method do it in more effective way (prepare complete list instead of searching |
|
271 * glyph for each character code). |
|
272 * |
|
273 * @internal |
|
274 * @return array Array representing <Unicode character code> => <glyph number> pairs. |
|
275 */ |
|
276 public function getCoveredCharactersGlyphs() |
|
277 { |
|
278 $glyphNumbers = array(); |
|
279 |
|
280 for ($segmentNum = 1; $segmentNum <= $this->_segmentCount; $segmentNum++) { |
|
281 if ($this->_segmentTableIdRangeOffsets[$segmentNum] == 0) { |
|
282 $delta = $this->_segmentTableIdDeltas[$segmentNum]; |
|
283 |
|
284 for ($code = $this->_segmentTableStartCodes[$segmentNum]; |
|
285 $code <= $this->_segmentTableEndCodes[$segmentNum]; |
|
286 $code++) { |
|
287 $glyphNumbers[$code] = ($code + $delta) % 65536; |
|
288 } |
|
289 } else { |
|
290 $code = $this->_segmentTableStartCodes[$segmentNum]; |
|
291 $glyphIndex = $this->_segmentTableIdRangeOffsets[$segmentNum] - ($this->_segmentCount - $segmentNum) - 1; |
|
292 |
|
293 while ($code <= $this->_segmentTableEndCodes[$segmentNum]) { |
|
294 $glyphNumbers[$code] = $this->_glyphIndexArray[$glyphIndex]; |
|
295 |
|
296 $code++; |
|
297 $glyphIndex++; |
|
298 } |
|
299 } |
|
300 } |
|
301 |
|
302 return $glyphNumbers; |
|
303 } |
|
304 |
|
305 |
|
306 |
|
307 /* Object Lifecycle */ |
|
308 |
|
309 /** |
|
310 * Object constructor |
|
311 * |
|
312 * Parses the raw binary table data. Throws an exception if the table is |
|
313 * malformed. |
|
314 * |
|
315 * @param string $cmapData Raw binary cmap table data. |
|
316 * @throws Zend_Pdf_Exception |
|
317 */ |
|
318 public function __construct($cmapData) |
|
319 { |
|
320 /* Sanity check: The table should be at least 23 bytes in size. |
|
321 */ |
|
322 $actualLength = strlen($cmapData); |
|
323 if ($actualLength < 23) { |
|
324 require_once 'Zend/Pdf/Exception.php'; |
|
325 throw new Zend_Pdf_Exception('Insufficient table data', |
|
326 Zend_Pdf_Exception::CMAP_TABLE_DATA_TOO_SMALL); |
|
327 } |
|
328 |
|
329 /* Sanity check: Make sure this is right data for this table type. |
|
330 */ |
|
331 $type = $this->_extractUInt2($cmapData, 0); |
|
332 if ($type != Zend_Pdf_Cmap::TYPE_SEGMENT_TO_DELTA) { |
|
333 require_once 'Zend/Pdf/Exception.php'; |
|
334 throw new Zend_Pdf_Exception('Wrong cmap table type', |
|
335 Zend_Pdf_Exception::CMAP_WRONG_TABLE_TYPE); |
|
336 } |
|
337 |
|
338 $length = $this->_extractUInt2($cmapData, 2); |
|
339 if ($length != $actualLength) { |
|
340 require_once 'Zend/Pdf/Exception.php'; |
|
341 throw new Zend_Pdf_Exception("Table length ($length) does not match actual length ($actualLength)", |
|
342 Zend_Pdf_Exception::CMAP_WRONG_TABLE_LENGTH); |
|
343 } |
|
344 |
|
345 /* Mapping tables should be language-independent. The font may not work |
|
346 * as expected if they are not. Unfortunately, many font files in the |
|
347 * wild incorrectly record a language ID in this field, so we can't |
|
348 * call this a failure. |
|
349 */ |
|
350 $language = $this->_extractUInt2($cmapData, 4); |
|
351 if ($language != 0) { |
|
352 // Record a warning here somehow? |
|
353 } |
|
354 |
|
355 /* These two values are stored premultiplied by two which is convienent |
|
356 * when using the binary data directly, but we're parsing it out to |
|
357 * native PHP data types, so divide by two. |
|
358 */ |
|
359 $this->_segmentCount = $this->_extractUInt2($cmapData, 6) >> 1; |
|
360 $this->_searchRange = $this->_extractUInt2($cmapData, 8) >> 1; |
|
361 |
|
362 $this->_searchIterations = $this->_extractUInt2($cmapData, 10) + 1; |
|
363 |
|
364 $offset = 14; |
|
365 for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) { |
|
366 $this->_segmentTableEndCodes[$i] = $this->_extractUInt2($cmapData, $offset); |
|
367 } |
|
368 |
|
369 $this->_searchRangeEndCode = $this->_segmentTableEndCodes[$this->_searchRange]; |
|
370 |
|
371 $offset += 2; // reserved bytes |
|
372 |
|
373 for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) { |
|
374 $this->_segmentTableStartCodes[$i] = $this->_extractUInt2($cmapData, $offset); |
|
375 } |
|
376 |
|
377 for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) { |
|
378 $this->_segmentTableIdDeltas[$i] = $this->_extractInt2($cmapData, $offset); // signed |
|
379 } |
|
380 |
|
381 /* The range offset helps determine the index into the glyph index array. |
|
382 * Like the segment count and search range above, it's stored as a byte |
|
383 * multiple in the font, so divide by two as we extract the values. |
|
384 */ |
|
385 for ($i = 1; $i <= $this->_segmentCount; $i++, $offset += 2) { |
|
386 $this->_segmentTableIdRangeOffsets[$i] = $this->_extractUInt2($cmapData, $offset) >> 1; |
|
387 } |
|
388 |
|
389 /* The size of the glyph index array varies by font and depends on the |
|
390 * extent of the usage of range offsets versus deltas. Some fonts may |
|
391 * not have any entries in this array. |
|
392 */ |
|
393 for (; $offset < $length; $offset += 2) { |
|
394 $this->_glyphIndexArray[] = $this->_extractUInt2($cmapData, $offset); |
|
395 } |
|
396 |
|
397 /* Sanity check: After reading all of the data, we should be at the end |
|
398 * of the table. |
|
399 */ |
|
400 if ($offset != $length) { |
|
401 require_once 'Zend/Pdf/Exception.php'; |
|
402 throw new Zend_Pdf_Exception("Ending offset ($offset) does not match length ($length)", |
|
403 Zend_Pdf_Exception::CMAP_FINAL_OFFSET_NOT_LENGTH); |
|
404 } |
|
405 } |
|
406 |
|
407 } |