<?php /** * Zend Framework * * LICENSE * * This source file is subject to the new BSD license that is bundled * with this package in the file LICENSE.txt. * It is also available through the world-wide-web at this URL: * http://framework.zend.com/license/new-bsd * If you did not receive a copy of the license and are unable to * obtain it through the world-wide-web, please send an email * to license@zend.com so we can send you a copy immediately. * * @category Zend * @package Zend_Pdf * @copyright Copyright (c) 2005-2014 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License * @version $Id$ */ /** Internally used classes */ require_once 'Zend/Pdf/Element/Array.php'; require_once 'Zend/Pdf/Element/String/Binary.php'; require_once 'Zend/Pdf/Element/Boolean.php'; require_once 'Zend/Pdf/Element/Dictionary.php'; require_once 'Zend/Pdf/Element/Name.php'; require_once 'Zend/Pdf/Element/Null.php'; require_once 'Zend/Pdf/Element/Numeric.php'; require_once 'Zend/Pdf/Element/Object.php'; require_once 'Zend/Pdf/Element/Object/Stream.php'; require_once 'Zend/Pdf/Element/Reference.php'; require_once 'Zend/Pdf/Element/String.php'; /** * PDF string parser * * @package Zend_Pdf * @copyright Copyright (c) 2005-2014 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License */ class Zend_Pdf_StringParser { /** * Source PDF * * @var string */ public $data = ''; /** * Current position in a data * * @var integer */ public $offset = 0; /** * Current reference context * * @var Zend_Pdf_Element_Reference_Context */ private $_context = null; /** * Array of elements of the currently parsed object/trailer * * @var array */ private $_elements = array(); /** * PDF objects factory. * * @var Zend_Pdf_ElementFactory_Interface */ private $_objFactory = null; /** * Clean up resources. * * Clear current state to remove cyclic object references */ public function cleanUp() { $this->_context = null; $this->_elements = array(); $this->_objFactory = null; } /** * Character with code $chCode is white space * * @param integer $chCode * @return boolean */ public static function isWhiteSpace($chCode) { if ($chCode == 0x00 || // null character $chCode == 0x09 || // Tab $chCode == 0x0A || // Line feed $chCode == 0x0C || // Form Feed $chCode == 0x0D || // Carriage return $chCode == 0x20 // Space ) { return true; } else { return false; } } /** * Character with code $chCode is a delimiter character * * @param integer $chCode * @return boolean */ public static function isDelimiter($chCode ) { if ($chCode == 0x28 || // '(' $chCode == 0x29 || // ')' $chCode == 0x3C || // '<' $chCode == 0x3E || // '>' $chCode == 0x5B || // '[' $chCode == 0x5D || // ']' $chCode == 0x7B || // '{' $chCode == 0x7D || // '}' $chCode == 0x2F || // '/' $chCode == 0x25 // '%' ) { return true; } else { return false; } } /** * Skip white space * * @param boolean $skipComment */ public function skipWhiteSpace($skipComment = true) { if ($skipComment) { while (true) { $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset); if ($this->offset < strlen($this->data) && $this->data[$this->offset] == '%') { // Skip comment $this->offset += strcspn($this->data, "\r\n", $this->offset); } else { // Non white space character not equal to '%' is found return; } } } else { $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset); } // /** Original (non-optimized) implementation. */ // // while ($this->offset < strlen($this->data)) { // if (strpos("\x00\t\n\f\r ", $this->data[$this->offset]) !== false) { // $this->offset++; // } else if (ord($this->data[$this->offset]) == 0x25 && $skipComment) { // '%' // $this->skipComment(); // } else { // return; // } // } } /** * Skip comment */ public function skipComment() { while ($this->offset < strlen($this->data)) { if (ord($this->data[$this->offset]) != 0x0A || // Line feed ord($this->data[$this->offset]) != 0x0d // Carriage return ) { $this->offset++; } else { return; } } } /** * Read comment line * * @return string */ public function readComment() { $this->skipWhiteSpace(false); /** Check if it's a comment line */ if ($this->data[$this->offset] != '%') { return ''; } for ($start = $this->offset; $this->offset < strlen($this->data); $this->offset++) { if (ord($this->data[$this->offset]) == 0x0A || // Line feed ord($this->data[$this->offset]) == 0x0d // Carriage return ) { break; } } return substr($this->data, $start, $this->offset-$start); } /** * Returns next lexeme from a pdf stream * * @return string */ public function readLexeme() { // $this->skipWhiteSpace(); while (true) { $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset); if ($this->offset < strlen($this->data) && $this->data[$this->offset] == '%') { $this->offset += strcspn($this->data, "\r\n", $this->offset); } else { break; } } if ($this->offset >= strlen($this->data)) { return ''; } if ( /* self::isDelimiter( ord($this->data[$start]) ) */ strpos('()<>[]{}/%', $this->data[$this->offset]) !== false ) { switch (substr($this->data, $this->offset, 2)) { case '<<': $this->offset += 2; return '<<'; break; case '>>': $this->offset += 2; return '>>'; break; default: return $this->data[$this->offset++]; break; } } else { $start = $this->offset; $compare = ''; if( version_compare( phpversion(), '5.2.5' ) >= 0) { $compare = "()<>[]{}/%\x00\t\n\f\r "; } else { $compare = "()<>[]{}/%\x00\t\n\r "; } $this->offset += strcspn($this->data, $compare, $this->offset); return substr($this->data, $start, $this->offset - $start); } } /** * Read elemental object from a PDF stream * * @return Zend_Pdf_Element * @throws Zend_Pdf_Exception */ public function readElement($nextLexeme = null) { if ($nextLexeme === null) { $nextLexeme = $this->readLexeme(); } /** * Note: readElement() method is a public method and could be invoked from other classes. * If readElement() is used not by Zend_Pdf_StringParser::getObject() method, then we should not care * about _elements member management. */ switch ($nextLexeme) { case '(': return ($this->_elements[] = $this->_readString()); case '<': return ($this->_elements[] = $this->_readBinaryString()); case '/': return ($this->_elements[] = new Zend_Pdf_Element_Name( Zend_Pdf_Element_Name::unescape( $this->readLexeme() ) )); case '[': return ($this->_elements[] = $this->_readArray()); case '<<': return ($this->_elements[] = $this->_readDictionary()); case ')': // fall through to next case case '>': // fall through to next case case ']': // fall through to next case case '>>': // fall through to next case case '{': // fall through to next case case '}': require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.', $this->offset)); default: if (strcasecmp($nextLexeme, 'true') == 0) { return ($this->_elements[] = new Zend_Pdf_Element_Boolean(true)); } else if (strcasecmp($nextLexeme, 'false') == 0) { return ($this->_elements[] = new Zend_Pdf_Element_Boolean(false)); } else if (strcasecmp($nextLexeme, 'null') == 0) { return ($this->_elements[] = new Zend_Pdf_Element_Null()); } $ref = $this->_readReference($nextLexeme); if ($ref !== null) { return ($this->_elements[] = $ref); } return ($this->_elements[] = $this->_readNumeric($nextLexeme)); } } /** * Read string PDF object * Also reads trailing ')' from a pdf stream * * @return Zend_Pdf_Element_String * @throws Zend_Pdf_Exception */ private function _readString() { $start = $this->offset; $openedBrackets = 1; $this->offset += strcspn($this->data, '()\\', $this->offset); while ($this->offset < strlen($this->data)) { switch (ord( $this->data[$this->offset] )) { case 0x28: // '(' - opened bracket in the string, needs balanced pair. $this->offset++; $openedBrackets++; break; case 0x29: // ')' - pair to the opened bracket $this->offset++; $openedBrackets--; break; case 0x5C: // '\\' - escape sequence, skip next char from a check $this->offset += 2; } if ($openedBrackets == 0) { break; // end of string } $this->offset += strcspn($this->data, '()\\', $this->offset); } if ($openedBrackets != 0) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while string reading. Offset - 0x%X. \')\' expected.', $start)); } return new Zend_Pdf_Element_String(Zend_Pdf_Element_String::unescape( substr($this->data, $start, $this->offset - $start - 1) )); } /** * Read binary string PDF object * Also reads trailing '>' from a pdf stream * * @return Zend_Pdf_Element_String_Binary * @throws Zend_Pdf_Exception */ private function _readBinaryString() { $start = $this->offset; $this->offset += strspn($this->data, "\x00\t\n\f\r 0123456789abcdefABCDEF", $this->offset); if ($this->offset >= strlen($this->data) - 1) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while reading binary string. Offset - 0x%X. \'>\' expected.', $start)); } if ($this->data[$this->offset++] != '>') { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected character while binary string reading. Offset - 0x%X.', $this->offset)); } return new Zend_Pdf_Element_String_Binary( Zend_Pdf_Element_String_Binary::unescape( substr($this->data, $start, $this->offset - $start - 1) )); } /** * Read array PDF object * Also reads trailing ']' from a pdf stream * * @return Zend_Pdf_Element_Array * @throws Zend_Pdf_Exception */ private function _readArray() { $elements = array(); while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) { if ($nextLexeme != ']') { $elements[] = $this->readElement($nextLexeme); } else { return new Zend_Pdf_Element_Array($elements); } } require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while array reading. Offset - 0x%X. \']\' expected.', $this->offset)); } /** * Read dictionary PDF object * Also reads trailing '>>' from a pdf stream * * @return Zend_Pdf_Element_Dictionary * @throws Zend_Pdf_Exception */ private function _readDictionary() { $dictionary = new Zend_Pdf_Element_Dictionary(); while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) { if ($nextLexeme != '>>') { $nameStart = $this->offset - strlen($nextLexeme); $name = $this->readElement($nextLexeme); $value = $this->readElement(); if (!$name instanceof Zend_Pdf_Element_Name) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Name object expected while dictionary reading. Offset - 0x%X.', $nameStart)); } $dictionary->add($name, $value); } else { return $dictionary; } } require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while dictionary reading. Offset - 0x%X. \'>>\' expected.', $this->offset)); } /** * Read reference PDF object * * @param string $nextLexeme * @return Zend_Pdf_Element_Reference */ private function _readReference($nextLexeme = null) { $start = $this->offset; if ($nextLexeme === null) { $objNum = $this->readLexeme(); } else { $objNum = $nextLexeme; } if (!ctype_digit($objNum)) { // it's not a reference $this->offset = $start; return null; } $genNum = $this->readLexeme(); if (!ctype_digit($genNum)) { // it's not a reference $this->offset = $start; return null; } $rMark = $this->readLexeme(); if ($rMark != 'R') { // it's not a reference $this->offset = $start; return null; } $ref = new Zend_Pdf_Element_Reference((int)$objNum, (int)$genNum, $this->_context, $this->_objFactory->resolve()); return $ref; } /** * Read numeric PDF object * * @param string $nextLexeme * @return Zend_Pdf_Element_Numeric */ private function _readNumeric($nextLexeme = null) { if ($nextLexeme === null) { $nextLexeme = $this->readLexeme(); } return new Zend_Pdf_Element_Numeric($nextLexeme); } /** * Read inderect object from a PDF stream * * @param integer $offset * @param Zend_Pdf_Element_Reference_Context $context * @return Zend_Pdf_Element_Object */ public function getObject($offset, Zend_Pdf_Element_Reference_Context $context) { if ($offset === null ) { return new Zend_Pdf_Element_Null(); } // Save current offset to make getObject() reentrant $offsetSave = $this->offset; $this->offset = $offset; $this->_context = $context; $this->_elements = array(); $objNum = $this->readLexeme(); if (!ctype_digit($objNum)) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object number expected.', $this->offset - strlen($objNum))); } $genNum = $this->readLexeme(); if (!ctype_digit($genNum)) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object generation number expected.', $this->offset - strlen($genNum))); } $objKeyword = $this->readLexeme(); if ($objKeyword != 'obj') { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'obj\' keyword expected.', $this->offset - strlen($objKeyword))); } $objValue = $this->readElement(); $nextLexeme = $this->readLexeme(); if( $nextLexeme == 'endobj' ) { /** * Object is not generated by factory (thus it's not marked as modified object). * But factory is assigned to the obect. */ $obj = new Zend_Pdf_Element_Object($objValue, (int)$objNum, (int)$genNum, $this->_objFactory->resolve()); foreach ($this->_elements as $element) { $element->setParentObject($obj); } // Restore offset value $this->offset = $offsetSave; return $obj; } /** * It's a stream object */ if ($nextLexeme != 'stream') { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' or \'stream\' keywords expected.', $this->offset - strlen($nextLexeme))); } if (!$objValue instanceof Zend_Pdf_Element_Dictionary) { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Stream extent must be preceded by stream dictionary.', $this->offset - strlen($nextLexeme))); } /** * References are automatically dereferenced at this moment. */ $streamLength = $objValue->Length->value; /** * 'stream' keyword must be followed by either cr-lf sequence or lf character only. * This restriction gives the possibility to recognize all cases exactly */ if ($this->data[$this->offset] == "\r" && $this->data[$this->offset + 1] == "\n" ) { $this->offset += 2; } else if ($this->data[$this->offset] == "\n" ) { $this->offset++; } else { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'stream\' must be followed by either cr-lf sequence or lf character only.', $this->offset - strlen($nextLexeme))); } $dataOffset = $this->offset; $this->offset += $streamLength; $nextLexeme = $this->readLexeme(); if ($nextLexeme != 'endstream') { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endstream\' keyword expected.', $this->offset - strlen($nextLexeme))); } $nextLexeme = $this->readLexeme(); if ($nextLexeme != 'endobj') { require_once 'Zend/Pdf/Exception.php'; throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' keyword expected.', $this->offset - strlen($nextLexeme))); } $obj = new Zend_Pdf_Element_Object_Stream(substr($this->data, $dataOffset, $streamLength), (int)$objNum, (int)$genNum, $this->_objFactory->resolve(), $objValue); foreach ($this->_elements as $element) { $element->setParentObject($obj); } // Restore offset value $this->offset = $offsetSave; return $obj; } /** * Get length of source string * * @return integer */ public function getLength() { return strlen($this->data); } /** * Get source string * * @return string */ public function getString() { return $this->data; } /** * Parse integer value from a binary stream * * @param string $stream * @param integer $offset * @param integer $size * @return integer */ public static function parseIntFromStream($stream, $offset, $size) { $value = 0; for ($count = 0; $count < $size; $count++) { $value *= 256; $value += ord($stream[$offset + $count]); } return $value; } /** * Set current context * * @param Zend_Pdf_Element_Reference_Context $context */ public function setContext(Zend_Pdf_Element_Reference_Context $context) { $this->_context = $context; } /** * Object constructor * * Note: PHP duplicates string, which is sent by value, only of it's updated. * Thus we don't need to care about overhead * * @param string $pdfString * @param Zend_Pdf_ElementFactory_Interface $factory */ public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory) { $this->data = $source; $this->_objFactory = $factory; } }