���ѧۧݧ�ӧ�� �ާ֧ߧ֧էا֧� - ���֧էѧܧ�ڧ��ӧѧ�� - /home3/cpr76684/public_html/Reader.tar
���ѧ٧ѧ�
Wrapper/XMLInternalErrorsHelper.php 0000644 00000004153 15152657741 0013407 0 ustar 00 <?php namespace Box\Spout\Reader\Wrapper; use Box\Spout\Reader\Exception\XMLProcessingException; /** * Trait XMLInternalErrorsHelper */ trait XMLInternalErrorsHelper { /** @var bool Stores whether XML errors were initially stored internally - used to reset */ protected $initialUseInternalErrorsValue; /** * To avoid displaying lots of warning/error messages on screen, * stores errors internally instead. * * @return void */ protected function useXMLInternalErrors() { \libxml_clear_errors(); $this->initialUseInternalErrorsValue = \libxml_use_internal_errors(true); } /** * Throws an XMLProcessingException if an error occured. * It also always resets the "libxml_use_internal_errors" setting back to its initial value. * * @throws \Box\Spout\Reader\Exception\XMLProcessingException * @return void */ protected function resetXMLInternalErrorsSettingAndThrowIfXMLErrorOccured() { if ($this->hasXMLErrorOccured()) { $this->resetXMLInternalErrorsSetting(); throw new XMLProcessingException($this->getLastXMLErrorMessage()); } $this->resetXMLInternalErrorsSetting(); } /** * Returns whether the a XML error has occured since the last time errors were cleared. * * @return bool TRUE if an error occured, FALSE otherwise */ private function hasXMLErrorOccured() { return (\libxml_get_last_error() !== false); } /** * Returns the error message for the last XML error that occured. * @see libxml_get_last_error * * @return string|null Last XML error message or null if no error */ private function getLastXMLErrorMessage() { $errorMessage = null; $error = \libxml_get_last_error(); if ($error !== false) { $errorMessage = \trim($error->message); } return $errorMessage; } /** * @return void */ protected function resetXMLInternalErrorsSetting() { \libxml_use_internal_errors($this->initialUseInternalErrorsValue); } } Wrapper/XMLReader.php 0000644 00000013510 15152657741 0010475 0 ustar 00 <?php namespace Box\Spout\Reader\Wrapper; /** * Class XMLReader * Wrapper around the built-in XMLReader * @see \XMLReader */ class XMLReader extends \XMLReader { use XMLInternalErrorsHelper; const ZIP_WRAPPER = 'zip://'; /** * Opens the XML Reader to read a file located inside a ZIP file. * * @param string $zipFilePath Path to the ZIP file * @param string $fileInsideZipPath Relative or absolute path of the file inside the zip * @return bool TRUE on success or FALSE on failure */ public function openFileInZip($zipFilePath, $fileInsideZipPath) { $wasOpenSuccessful = false; $realPathURI = $this->getRealPathURIForFileInZip($zipFilePath, $fileInsideZipPath); // We need to check first that the file we are trying to read really exist because: // - PHP emits a warning when trying to open a file that does not exist. // - HHVM does not check if file exists within zip file (@link https://github.com/facebook/hhvm/issues/5779) if ($this->fileExistsWithinZip($realPathURI)) { $wasOpenSuccessful = $this->open($realPathURI, null, LIBXML_NONET); } return $wasOpenSuccessful; } /** * Returns the real path for the given path components. * This is useful to avoid issues on some Windows setup. * * @param string $zipFilePath Path to the ZIP file * @param string $fileInsideZipPath Relative or absolute path of the file inside the zip * @return string The real path URI */ public function getRealPathURIForFileInZip($zipFilePath, $fileInsideZipPath) { // The file path should not start with a '/', otherwise it won't be found $fileInsideZipPathWithoutLeadingSlash = \ltrim($fileInsideZipPath, '/'); return (self::ZIP_WRAPPER . \realpath($zipFilePath) . '#' . $fileInsideZipPathWithoutLeadingSlash); } /** * Returns whether the file at the given location exists * * @param string $zipStreamURI URI of a zip stream, e.g. "zip://file.zip#path/inside.xml" * @return bool TRUE if the file exists, FALSE otherwise */ protected function fileExistsWithinZip($zipStreamURI) { $doesFileExists = false; $pattern = '/zip:\/\/([^#]+)#(.*)/'; if (\preg_match($pattern, $zipStreamURI, $matches)) { $zipFilePath = $matches[1]; $innerFilePath = $matches[2]; $zip = new \ZipArchive(); if ($zip->open($zipFilePath) === true) { $doesFileExists = ($zip->locateName($innerFilePath) !== false); $zip->close(); } } return $doesFileExists; } /** * Move to next node in document * @see \XMLReader::read * * @throws \Box\Spout\Reader\Exception\XMLProcessingException If an error/warning occurred * @return bool TRUE on success or FALSE on failure */ #[\ReturnTypeWillChange] public function read() { $this->useXMLInternalErrors(); $wasReadSuccessful = parent::read(); $this->resetXMLInternalErrorsSettingAndThrowIfXMLErrorOccured(); return $wasReadSuccessful; } /** * Read until the element with the given name is found, or the end of the file. * * @param string $nodeName Name of the node to find * @throws \Box\Spout\Reader\Exception\XMLProcessingException If an error/warning occurred * @return bool TRUE on success or FALSE on failure */ public function readUntilNodeFound($nodeName) { do { $wasReadSuccessful = $this->read(); $isNotPositionedOnStartingNode = !$this->isPositionedOnStartingNode($nodeName); } while ($wasReadSuccessful && $isNotPositionedOnStartingNode); return $wasReadSuccessful; } /** * Move cursor to next node skipping all subtrees * @see \XMLReader::next * * @param string|null $localName The name of the next node to move to * @throws \Box\Spout\Reader\Exception\XMLProcessingException If an error/warning occurred * @return bool TRUE on success or FALSE on failure */ #[\ReturnTypeWillChange] public function next($localName = null) { $this->useXMLInternalErrors(); $wasNextSuccessful = parent::next($localName); $this->resetXMLInternalErrorsSettingAndThrowIfXMLErrorOccured(); return $wasNextSuccessful; } /** * @param string $nodeName * @return bool Whether the XML Reader is currently positioned on the starting node with given name */ public function isPositionedOnStartingNode($nodeName) { return $this->isPositionedOnNode($nodeName, self::ELEMENT); } /** * @param string $nodeName * @return bool Whether the XML Reader is currently positioned on the ending node with given name */ public function isPositionedOnEndingNode($nodeName) { return $this->isPositionedOnNode($nodeName, self::END_ELEMENT); } /** * @param string $nodeName * @param int $nodeType * @return bool Whether the XML Reader is currently positioned on the node with given name and type */ private function isPositionedOnNode($nodeName, $nodeType) { // In some cases, the node has a prefix (for instance, "<sheet>" can also be "<x:sheet>"). // So if the given node name does not have a prefix, we need to look at the unprefixed name ("localName"). // @see https://github.com/box/spout/issues/233 $hasPrefix = (\strpos($nodeName, ':') !== false); $currentNodeName = ($hasPrefix) ? $this->name : $this->localName; return ($this->nodeType === $nodeType && $currentNodeName === $nodeName); } /** * @return string The name of the current node, un-prefixed */ public function getCurrentNodeName() { return $this->localName; } } CSV/Reader.php 0000644 00000011072 15152657741 0007130 0 ustar 00 <?php namespace Box\Spout\Reader\CSV; use Box\Spout\Common\Exception\IOException; use Box\Spout\Common\Helper\GlobalFunctionsHelper; use Box\Spout\Common\Manager\OptionsManagerInterface; use Box\Spout\Reader\Common\Creator\InternalEntityFactoryInterface; use Box\Spout\Reader\Common\Entity\Options; use Box\Spout\Reader\CSV\Creator\InternalEntityFactory; use Box\Spout\Reader\ReaderAbstract; /** * Class Reader * This class provides support to read data from a CSV file. */ class Reader extends ReaderAbstract { /** @var resource Pointer to the file to be written */ protected $filePointer; /** @var SheetIterator To iterator over the CSV unique "sheet" */ protected $sheetIterator; /** @var string Original value for the "auto_detect_line_endings" INI value */ protected $originalAutoDetectLineEndings; /** @var bool Whether the code is running with PHP >= 8.1 */ private $isRunningAtLeastPhp81; /** * @param OptionsManagerInterface $optionsManager * @param GlobalFunctionsHelper $globalFunctionsHelper * @param InternalEntityFactoryInterface $entityFactory */ public function __construct( OptionsManagerInterface $optionsManager, GlobalFunctionsHelper $globalFunctionsHelper, InternalEntityFactoryInterface $entityFactory ) { parent::__construct($optionsManager, $globalFunctionsHelper, $entityFactory); $this->isRunningAtLeastPhp81 = \version_compare(PHP_VERSION, '8.1.0') >= 0; } /** * Sets the field delimiter for the CSV. * Needs to be called before opening the reader. * * @param string $fieldDelimiter Character that delimits fields * @return Reader */ public function setFieldDelimiter($fieldDelimiter) { $this->optionsManager->setOption(Options::FIELD_DELIMITER, $fieldDelimiter); return $this; } /** * Sets the field enclosure for the CSV. * Needs to be called before opening the reader. * * @param string $fieldEnclosure Character that enclose fields * @return Reader */ public function setFieldEnclosure($fieldEnclosure) { $this->optionsManager->setOption(Options::FIELD_ENCLOSURE, $fieldEnclosure); return $this; } /** * Sets the encoding of the CSV file to be read. * Needs to be called before opening the reader. * * @param string $encoding Encoding of the CSV file to be read * @return Reader */ public function setEncoding($encoding) { $this->optionsManager->setOption(Options::ENCODING, $encoding); return $this; } /** * Returns whether stream wrappers are supported * * @return bool */ protected function doesSupportStreamWrapper() { return true; } /** * Opens the file at the given path to make it ready to be read. * If setEncoding() was not called, it assumes that the file is encoded in UTF-8. * * @param string $filePath Path of the CSV file to be read * @throws \Box\Spout\Common\Exception\IOException * @return void */ protected function openReader($filePath) { // "auto_detect_line_endings" is deprecated in PHP 8.1 if (!$this->isRunningAtLeastPhp81) { $this->originalAutoDetectLineEndings = \ini_get('auto_detect_line_endings'); \ini_set('auto_detect_line_endings', '1'); } $this->filePointer = $this->globalFunctionsHelper->fopen($filePath, 'r'); if (!$this->filePointer) { throw new IOException("Could not open file $filePath for reading."); } /** @var InternalEntityFactory $entityFactory */ $entityFactory = $this->entityFactory; $this->sheetIterator = $entityFactory->createSheetIterator( $this->filePointer, $this->optionsManager, $this->globalFunctionsHelper ); } /** * Returns an iterator to iterate over sheets. * * @return SheetIterator To iterate over sheets */ protected function getConcreteSheetIterator() { return $this->sheetIterator; } /** * Closes the reader. To be used after reading the file. * * @return void */ protected function closeReader() { if ($this->filePointer) { $this->globalFunctionsHelper->fclose($this->filePointer); } // "auto_detect_line_endings" is deprecated in PHP 8.1 if (!$this->isRunningAtLeastPhp81) { \ini_set('auto_detect_line_endings', $this->originalAutoDetectLineEndings); } } } CSV/Creator/InternalEntityFactory.php 0000644 00000005211 15152657741 0013624 0 ustar 00 <?php namespace Box\Spout\Reader\CSV\Creator; use Box\Spout\Common\Creator\HelperFactory; use Box\Spout\Common\Entity\Cell; use Box\Spout\Common\Entity\Row; use Box\Spout\Common\Helper\GlobalFunctionsHelper; use Box\Spout\Common\Manager\OptionsManagerInterface; use Box\Spout\Reader\Common\Creator\InternalEntityFactoryInterface; use Box\Spout\Reader\CSV\RowIterator; use Box\Spout\Reader\CSV\Sheet; use Box\Spout\Reader\CSV\SheetIterator; /** * Class EntityFactory * Factory to create entities */ class InternalEntityFactory implements InternalEntityFactoryInterface { /** @var HelperFactory */ private $helperFactory; /** * @param HelperFactory $helperFactory */ public function __construct(HelperFactory $helperFactory) { $this->helperFactory = $helperFactory; } /** * @param resource $filePointer Pointer to the CSV file to read * @param OptionsManagerInterface $optionsManager * @param GlobalFunctionsHelper $globalFunctionsHelper * @return SheetIterator */ public function createSheetIterator($filePointer, $optionsManager, $globalFunctionsHelper) { $rowIterator = $this->createRowIterator($filePointer, $optionsManager, $globalFunctionsHelper); $sheet = $this->createSheet($rowIterator); return new SheetIterator($sheet); } /** * @param RowIterator $rowIterator * @return Sheet */ private function createSheet($rowIterator) { return new Sheet($rowIterator); } /** * @param resource $filePointer Pointer to the CSV file to read * @param OptionsManagerInterface $optionsManager * @param GlobalFunctionsHelper $globalFunctionsHelper * @return RowIterator */ private function createRowIterator($filePointer, $optionsManager, $globalFunctionsHelper) { $encodingHelper = $this->helperFactory->createEncodingHelper($globalFunctionsHelper); return new RowIterator($filePointer, $optionsManager, $encodingHelper, $this, $globalFunctionsHelper); } /** * @param Cell[] $cells * @return Row */ public function createRow(array $cells = []) { return new Row($cells, null); } /** * @param mixed $cellValue * @return Cell */ public function createCell($cellValue) { return new Cell($cellValue); } /** * @param array $cellValues * @return Row */ public function createRowFromArray(array $cellValues = []) { $cells = \array_map(function ($cellValue) { return $this->createCell($cellValue); }, $cellValues); return $this->createRow($cells); } } CSV/SheetIterator.php 0000644 00000003666 15152657741 0010522 0 ustar 00 <?php namespace Box\Spout\Reader\CSV; use Box\Spout\Reader\IteratorInterface; /** * Class SheetIterator * Iterate over CSV unique "sheet". */ class SheetIterator implements IteratorInterface { /** @var \Box\Spout\Reader\CSV\Sheet The CSV unique "sheet" */ protected $sheet; /** @var bool Whether the unique "sheet" has already been read */ protected $hasReadUniqueSheet = false; /** * @param Sheet $sheet Corresponding unique sheet */ public function __construct($sheet) { $this->sheet = $sheet; } /** * Rewind the Iterator to the first element * @see http://php.net/manual/en/iterator.rewind.php * * @return void */ #[\ReturnTypeWillChange] public function rewind() { $this->hasReadUniqueSheet = false; } /** * Checks if current position is valid * @see http://php.net/manual/en/iterator.valid.php * * @return bool */ #[\ReturnTypeWillChange] public function valid() { return (!$this->hasReadUniqueSheet); } /** * Move forward to next element * @see http://php.net/manual/en/iterator.next.php * * @return void */ #[\ReturnTypeWillChange] public function next() { $this->hasReadUniqueSheet = true; } /** * Return the current element * @see http://php.net/manual/en/iterator.current.php * * @return \Box\Spout\Reader\CSV\Sheet */ #[\ReturnTypeWillChange] public function current() { return $this->sheet; } /** * Return the key of the current element * @see http://php.net/manual/en/iterator.key.php * * @return int */ #[\ReturnTypeWillChange] public function key() { return 1; } /** * Cleans up what was created to iterate over the object. * * @return void */ public function end() { // do nothing } } CSV/RowIterator.php 0000644 00000021136 15152657741 0010211 0 ustar 00 <?php namespace Box\Spout\Reader\CSV; use Box\Spout\Common\Entity\Row; use Box\Spout\Common\Helper\EncodingHelper; use Box\Spout\Common\Helper\GlobalFunctionsHelper; use Box\Spout\Common\Manager\OptionsManagerInterface; use Box\Spout\Reader\Common\Entity\Options; use Box\Spout\Reader\CSV\Creator\InternalEntityFactory; use Box\Spout\Reader\IteratorInterface; /** * Class RowIterator * Iterate over CSV rows. */ class RowIterator implements IteratorInterface { /** * Value passed to fgetcsv. 0 means "unlimited" (slightly slower but accomodates for very long lines). */ const MAX_READ_BYTES_PER_LINE = 0; /** @var resource Pointer to the CSV file to read */ protected $filePointer; /** @var int Number of read rows */ protected $numReadRows = 0; /** @var Row|null Buffer used to store the current row, while checking if there are more rows to read */ protected $rowBuffer; /** @var bool Indicates whether all rows have been read */ protected $hasReachedEndOfFile = false; /** @var string Defines the character used to delimit fields (one character only) */ protected $fieldDelimiter; /** @var string Defines the character used to enclose fields (one character only) */ protected $fieldEnclosure; /** @var string Encoding of the CSV file to be read */ protected $encoding; /** @var bool Whether empty rows should be returned or skipped */ protected $shouldPreserveEmptyRows; /** @var \Box\Spout\Common\Helper\EncodingHelper Helper to work with different encodings */ protected $encodingHelper; /** @var \Box\Spout\Reader\CSV\Creator\InternalEntityFactory Factory to create entities */ protected $entityFactory; /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */ protected $globalFunctionsHelper; /** * @param resource $filePointer Pointer to the CSV file to read * @param OptionsManagerInterface $optionsManager * @param EncodingHelper $encodingHelper * @param InternalEntityFactory $entityFactory * @param GlobalFunctionsHelper $globalFunctionsHelper */ public function __construct( $filePointer, OptionsManagerInterface $optionsManager, EncodingHelper $encodingHelper, InternalEntityFactory $entityFactory, GlobalFunctionsHelper $globalFunctionsHelper ) { $this->filePointer = $filePointer; $this->fieldDelimiter = $optionsManager->getOption(Options::FIELD_DELIMITER); $this->fieldEnclosure = $optionsManager->getOption(Options::FIELD_ENCLOSURE); $this->encoding = $optionsManager->getOption(Options::ENCODING); $this->shouldPreserveEmptyRows = $optionsManager->getOption(Options::SHOULD_PRESERVE_EMPTY_ROWS); $this->encodingHelper = $encodingHelper; $this->entityFactory = $entityFactory; $this->globalFunctionsHelper = $globalFunctionsHelper; } /** * Rewind the Iterator to the first element * @see http://php.net/manual/en/iterator.rewind.php * * @return void */ #[\ReturnTypeWillChange] public function rewind() { $this->rewindAndSkipBom(); $this->numReadRows = 0; $this->rowBuffer = null; $this->next(); } /** * This rewinds and skips the BOM if inserted at the beginning of the file * by moving the file pointer after it, so that it is not read. * * @return void */ protected function rewindAndSkipBom() { $byteOffsetToSkipBom = $this->encodingHelper->getBytesOffsetToSkipBOM($this->filePointer, $this->encoding); // sets the cursor after the BOM (0 means no BOM, so rewind it) $this->globalFunctionsHelper->fseek($this->filePointer, $byteOffsetToSkipBom); } /** * Checks if current position is valid * @see http://php.net/manual/en/iterator.valid.php * * @return bool */ #[\ReturnTypeWillChange] public function valid() { return ($this->filePointer && !$this->hasReachedEndOfFile); } /** * Move forward to next element. Reads data for the next unprocessed row. * @see http://php.net/manual/en/iterator.next.php * * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8 * @return void */ #[\ReturnTypeWillChange] public function next() { $this->hasReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer); if (!$this->hasReachedEndOfFile) { $this->readDataForNextRow(); } } /** * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8 * @return void */ protected function readDataForNextRow() { do { $rowData = $this->getNextUTF8EncodedRow(); } while ($this->shouldReadNextRow($rowData)); if ($rowData !== false) { // str_replace will replace NULL values by empty strings $rowDataBufferAsArray = \str_replace(null, null, $rowData); $this->rowBuffer = $this->entityFactory->createRowFromArray($rowDataBufferAsArray); $this->numReadRows++; } else { // If we reach this point, it means end of file was reached. // This happens when the last lines are empty lines. $this->hasReachedEndOfFile = true; } } /** * @param array|bool $currentRowData * @return bool Whether the data for the current row can be returned or if we need to keep reading */ protected function shouldReadNextRow($currentRowData) { $hasSuccessfullyFetchedRowData = ($currentRowData !== false); $hasNowReachedEndOfFile = $this->globalFunctionsHelper->feof($this->filePointer); $isEmptyLine = $this->isEmptyLine($currentRowData); return ( (!$hasSuccessfullyFetchedRowData && !$hasNowReachedEndOfFile) || (!$this->shouldPreserveEmptyRows && $isEmptyLine) ); } /** * Returns the next row, converted if necessary to UTF-8. * As fgetcsv() does not manage correctly encoding for non UTF-8 data, * we remove manually whitespace with ltrim or rtrim (depending on the order of the bytes) * * @throws \Box\Spout\Common\Exception\EncodingConversionException If unable to convert data to UTF-8 * @return array|false The row for the current file pointer, encoded in UTF-8 or FALSE if nothing to read */ protected function getNextUTF8EncodedRow() { $encodedRowData = $this->globalFunctionsHelper->fgetcsv($this->filePointer, self::MAX_READ_BYTES_PER_LINE, $this->fieldDelimiter, $this->fieldEnclosure); if ($encodedRowData === false) { return false; } foreach ($encodedRowData as $cellIndex => $cellValue) { switch ($this->encoding) { case EncodingHelper::ENCODING_UTF16_LE: case EncodingHelper::ENCODING_UTF32_LE: // remove whitespace from the beginning of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data $cellValue = \ltrim($cellValue); break; case EncodingHelper::ENCODING_UTF16_BE: case EncodingHelper::ENCODING_UTF32_BE: // remove whitespace from the end of a string as fgetcsv() add extra whitespace when it try to explode non UTF-8 data $cellValue = \rtrim($cellValue); break; } $encodedRowData[$cellIndex] = $this->encodingHelper->attemptConversionToUTF8($cellValue, $this->encoding); } return $encodedRowData; } /** * @param array|bool $lineData Array containing the cells value for the line * @return bool Whether the given line is empty */ protected function isEmptyLine($lineData) { return (\is_array($lineData) && \count($lineData) === 1 && $lineData[0] === null); } /** * Return the current element from the buffer * @see http://php.net/manual/en/iterator.current.php * * @return Row|null */ #[\ReturnTypeWillChange] public function current() { return $this->rowBuffer; } /** * Return the key of the current element * @see http://php.net/manual/en/iterator.key.php * * @return int */ #[\ReturnTypeWillChange] public function key() { return $this->numReadRows; } /** * Cleans up what was created to iterate over the object. * * @return void */ public function end() { // do nothing } } CSV/Manager/OptionsManager.php 0000644 00000002024 15152657741 0012223 0 ustar 00 <?php namespace Box\Spout\Reader\CSV\Manager; use Box\Spout\Common\Helper\EncodingHelper; use Box\Spout\Common\Manager\OptionsManagerAbstract; use Box\Spout\Reader\Common\Entity\Options; /** * Class OptionsManager * CSV Reader options manager */ class OptionsManager extends OptionsManagerAbstract { /** * {@inheritdoc} */ protected function getSupportedOptions() { return [ Options::SHOULD_FORMAT_DATES, Options::SHOULD_PRESERVE_EMPTY_ROWS, Options::FIELD_DELIMITER, Options::FIELD_ENCLOSURE, Options::ENCODING, ]; } /** * {@inheritdoc} */ protected function setDefaultOptions() { $this->setOption(Options::SHOULD_FORMAT_DATES, false); $this->setOption(Options::SHOULD_PRESERVE_EMPTY_ROWS, false); $this->setOption(Options::FIELD_DELIMITER, ','); $this->setOption(Options::FIELD_ENCLOSURE, '"'); $this->setOption(Options::ENCODING, EncodingHelper::ENCODING_UTF8); } } CSV/Sheet.php 0000644 00000002221 15152657741 0006772 0 ustar 00 <?php namespace Box\Spout\Reader\CSV; use Box\Spout\Reader\SheetInterface; /** * Class Sheet */ class Sheet implements SheetInterface { /** @var \Box\Spout\Reader\CSV\RowIterator To iterate over the CSV's rows */ protected $rowIterator; /** * @param RowIterator $rowIterator Corresponding row iterator */ public function __construct(RowIterator $rowIterator) { $this->rowIterator = $rowIterator; } /** * @return \Box\Spout\Reader\CSV\RowIterator */ public function getRowIterator() { return $this->rowIterator; } /** * @return int Index of the sheet */ public function getIndex() { return 0; } /** * @return string Name of the sheet - empty string since CSV does not support that */ public function getName() { return ''; } /** * @return bool Always TRUE as there is only one sheet */ public function isActive() { return true; } /** * @return bool Always TRUE as the only sheet is always visible */ public function isVisible() { return true; } } Exception/SharedStringNotFoundException.php 0000644 00000000235 15152657741 0015161 0 ustar 00 <?php namespace Box\Spout\Reader\Exception; /** * Class SharedStringNotFoundException */ class SharedStringNotFoundException extends ReaderException { } Exception/ReaderNotOpenedException.php 0000644 00000000223 15152657741 0014122 0 ustar 00 <?php namespace Box\Spout\Reader\Exception; /** * Class ReaderNotOpenedException */ class ReaderNotOpenedException extends ReaderException { } Exception/IteratorNotRewindableException.php 0000644 00000000237 15152657741 0015360 0 ustar 00 <?php namespace Box\Spout\Reader\Exception; /** * Class IteratorNotRewindableException */ class IteratorNotRewindableException extends ReaderException { } Exception/XMLProcessingException.php 0000644 00000000217 15152657741 0013604 0 ustar 00 <?php namespace Box\Spout\Reader\Exception; /** * Class XMLProcessingException */ class XMLProcessingException extends ReaderException { } Exception/NoSheetsFoundException.php 0000644 00000000217 15152657741 0013633 0 ustar 00 <?php namespace Box\Spout\Reader\Exception; /** * Class NoSheetsFoundException */ class NoSheetsFoundException extends ReaderException { } Exception/InvalidValueException.php 0000644 00000001261 15152657741 0013472 0 ustar 00 <?php namespace Box\Spout\Reader\Exception; use Throwable; /** * Class InvalidValueException */ class InvalidValueException extends ReaderException { /** @var mixed */ private $invalidValue; /** * @param mixed $invalidValue * @param string $message * @param int $code * @param Throwable|null $previous */ public function __construct($invalidValue, $message = '', $code = 0, Throwable $previous = null) { $this->invalidValue = $invalidValue; parent::__construct($message, $code, $previous); } /** * @return mixed */ public function getInvalidValue() { return $this->invalidValue; } } Exception/ReaderException.php 0000644 00000000311 15152657741 0012304 0 ustar 00 <?php namespace Box\Spout\Reader\Exception; use Box\Spout\Common\Exception\SpoutException; /** * Class ReaderException * * @abstract */ abstract class ReaderException extends SpoutException { } XLSX/Reader.php 0000644 00000010021 15152657741 0007264 0 ustar 00 <?php namespace Box\Spout\Reader\XLSX; use Box\Spout\Common\Exception\IOException; use Box\Spout\Common\Helper\GlobalFunctionsHelper; use Box\Spout\Common\Manager\OptionsManagerInterface; use Box\Spout\Reader\Common\Creator\InternalEntityFactoryInterface; use Box\Spout\Reader\Common\Entity\Options; use Box\Spout\Reader\ReaderAbstract; use Box\Spout\Reader\XLSX\Creator\InternalEntityFactory; use Box\Spout\Reader\XLSX\Creator\ManagerFactory; /** * Class Reader * This class provides support to read data from a XLSX file */ class Reader extends ReaderAbstract { /** @var ManagerFactory */ protected $managerFactory; /** @var \ZipArchive */ protected $zip; /** @var \Box\Spout\Reader\XLSX\Manager\SharedStringsManager Manages shared strings */ protected $sharedStringsManager; /** @var SheetIterator To iterator over the XLSX sheets */ protected $sheetIterator; /** * @param OptionsManagerInterface $optionsManager * @param GlobalFunctionsHelper $globalFunctionsHelper * @param InternalEntityFactoryInterface $entityFactory * @param ManagerFactory $managerFactory */ public function __construct( OptionsManagerInterface $optionsManager, GlobalFunctionsHelper $globalFunctionsHelper, InternalEntityFactoryInterface $entityFactory, ManagerFactory $managerFactory ) { parent::__construct($optionsManager, $globalFunctionsHelper, $entityFactory); $this->managerFactory = $managerFactory; } /** * @param string $tempFolder Temporary folder where the temporary files will be created * @return Reader */ public function setTempFolder($tempFolder) { $this->optionsManager->setOption(Options::TEMP_FOLDER, $tempFolder); return $this; } /** * Returns whether stream wrappers are supported * * @return bool */ protected function doesSupportStreamWrapper() { return false; } /** * Opens the file at the given file path to make it ready to be read. * It also parses the sharedStrings.xml file to get all the shared strings available in memory * and fetches all the available sheets. * * @param string $filePath Path of the file to be read * @throws \Box\Spout\Common\Exception\IOException If the file at the given path or its content cannot be read * @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file * @return void */ protected function openReader($filePath) { /** @var InternalEntityFactory $entityFactory */ $entityFactory = $this->entityFactory; $this->zip = $entityFactory->createZipArchive(); if ($this->zip->open($filePath) === true) { $tempFolder = $this->optionsManager->getOption(Options::TEMP_FOLDER); $this->sharedStringsManager = $this->managerFactory->createSharedStringsManager($filePath, $tempFolder, $entityFactory); if ($this->sharedStringsManager->hasSharedStrings()) { // Extracts all the strings from the sheets for easy access in the future $this->sharedStringsManager->extractSharedStrings(); } $this->sheetIterator = $entityFactory->createSheetIterator( $filePath, $this->optionsManager, $this->sharedStringsManager ); } else { throw new IOException("Could not open $filePath for reading."); } } /** * Returns an iterator to iterate over sheets. * * @return SheetIterator To iterate over sheets */ protected function getConcreteSheetIterator() { return $this->sheetIterator; } /** * Closes the reader. To be used after reading the file. * * @return void */ protected function closeReader() { if ($this->zip) { $this->zip->close(); } if ($this->sharedStringsManager) { $this->sharedStringsManager->cleanup(); } } } XLSX/Creator/HelperFactory.php 0000644 00000002456 15152657741 0012245 0 ustar 00 <?php namespace Box\Spout\Reader\XLSX\Creator; use Box\Spout\Common\Helper\Escaper; use Box\Spout\Reader\XLSX\Helper\CellValueFormatter; use Box\Spout\Reader\XLSX\Manager\SharedStringsManager; use Box\Spout\Reader\XLSX\Manager\StyleManager; /** * Class HelperFactory * Factory to create helpers */ class HelperFactory extends \Box\Spout\Common\Creator\HelperFactory { /** * @param SharedStringsManager $sharedStringsManager Manages shared strings * @param StyleManager $styleManager Manages styles * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings * @param bool $shouldUse1904Dates Whether date/time values should use a calendar starting in 1904 instead of 1900 * @return CellValueFormatter */ public function createCellValueFormatter($sharedStringsManager, $styleManager, $shouldFormatDates, $shouldUse1904Dates) { $escaper = $this->createStringsEscaper(); return new CellValueFormatter($sharedStringsManager, $styleManager, $shouldFormatDates, $shouldUse1904Dates, $escaper); } /** * @return Escaper\XLSX */ public function createStringsEscaper() { /* @noinspection PhpUnnecessaryFullyQualifiedNameInspection */ return new Escaper\XLSX(); } } XLSX/Creator/InternalEntityFactory.php 0000644 00000011765 15152657741 0014002 0 ustar 00 <?php namespace Box\Spout\Reader\XLSX\Creator; use Box\Spout\Common\Entity\Cell; use Box\Spout\Common\Entity\Row; use Box\Spout\Reader\Common\Creator\InternalEntityFactoryInterface; use Box\Spout\Reader\Common\Entity\Options; use Box\Spout\Reader\Common\XMLProcessor; use Box\Spout\Reader\Wrapper\XMLReader; use Box\Spout\Reader\XLSX\Manager\SharedStringsManager; use Box\Spout\Reader\XLSX\RowIterator; use Box\Spout\Reader\XLSX\Sheet; use Box\Spout\Reader\XLSX\SheetIterator; /** * Class InternalEntityFactory * Factory to create entities */ class InternalEntityFactory implements InternalEntityFactoryInterface { /** @var HelperFactory */ private $helperFactory; /** @var ManagerFactory */ private $managerFactory; /** * @param ManagerFactory $managerFactory * @param HelperFactory $helperFactory */ public function __construct(ManagerFactory $managerFactory, HelperFactory $helperFactory) { $this->managerFactory = $managerFactory; $this->helperFactory = $helperFactory; } /** * @param string $filePath Path of the file to be read * @param \Box\Spout\Common\Manager\OptionsManagerInterface $optionsManager Reader's options manager * @param SharedStringsManager $sharedStringsManager Manages shared strings * @return SheetIterator */ public function createSheetIterator($filePath, $optionsManager, $sharedStringsManager) { $sheetManager = $this->managerFactory->createSheetManager( $filePath, $optionsManager, $sharedStringsManager, $this ); return new SheetIterator($sheetManager); } /** * @param string $filePath Path of the XLSX file being read * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml * @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based) * @param string $sheetName Name of the sheet * @param bool $isSheetActive Whether the sheet was defined as active * @param bool $isSheetVisible Whether the sheet is visible * @param \Box\Spout\Common\Manager\OptionsManagerInterface $optionsManager Reader's options manager * @param SharedStringsManager $sharedStringsManager Manages shared strings * @return Sheet */ public function createSheet( $filePath, $sheetDataXMLFilePath, $sheetIndex, $sheetName, $isSheetActive, $isSheetVisible, $optionsManager, $sharedStringsManager ) { $rowIterator = $this->createRowIterator($filePath, $sheetDataXMLFilePath, $optionsManager, $sharedStringsManager); return new Sheet($rowIterator, $sheetIndex, $sheetName, $isSheetActive, $isSheetVisible); } /** * @param string $filePath Path of the XLSX file being read * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml * @param \Box\Spout\Common\Manager\OptionsManagerInterface $optionsManager Reader's options manager * @param SharedStringsManager $sharedStringsManager Manages shared strings * @return RowIterator */ private function createRowIterator($filePath, $sheetDataXMLFilePath, $optionsManager, $sharedStringsManager) { $xmlReader = $this->createXMLReader(); $xmlProcessor = $this->createXMLProcessor($xmlReader); $styleManager = $this->managerFactory->createStyleManager($filePath, $this); $rowManager = $this->managerFactory->createRowManager($this); $shouldFormatDates = $optionsManager->getOption(Options::SHOULD_FORMAT_DATES); $shouldUse1904Dates = $optionsManager->getOption(Options::SHOULD_USE_1904_DATES); $cellValueFormatter = $this->helperFactory->createCellValueFormatter( $sharedStringsManager, $styleManager, $shouldFormatDates, $shouldUse1904Dates ); $shouldPreserveEmptyRows = $optionsManager->getOption(Options::SHOULD_PRESERVE_EMPTY_ROWS); return new RowIterator( $filePath, $sheetDataXMLFilePath, $shouldPreserveEmptyRows, $xmlReader, $xmlProcessor, $cellValueFormatter, $rowManager, $this ); } /** * @param Cell[] $cells * @return Row */ public function createRow(array $cells = []) { return new Row($cells, null); } /** * @param mixed $cellValue * @return Cell */ public function createCell($cellValue) { return new Cell($cellValue); } /** * @return \ZipArchive */ public function createZipArchive() { return new \ZipArchive(); } /** * @return XMLReader */ public function createXMLReader() { return new XMLReader(); } /** * @param $xmlReader * @return XMLProcessor */ public function createXMLProcessor($xmlReader) { return new XMLProcessor($xmlReader); } } XLSX/Creator/ManagerFactory.php 0000644 00000007420 15152657741 0012374 0 ustar 00 <?php namespace Box\Spout\Reader\XLSX\Creator; use Box\Spout\Reader\Common\Manager\RowManager; use Box\Spout\Reader\XLSX\Manager\SharedStringsCaching\CachingStrategyFactory; use Box\Spout\Reader\XLSX\Manager\SharedStringsManager; use Box\Spout\Reader\XLSX\Manager\SheetManager; use Box\Spout\Reader\XLSX\Manager\StyleManager; use Box\Spout\Reader\XLSX\Manager\WorkbookRelationshipsManager; /** * Class ManagerFactory * Factory to create managers */ class ManagerFactory { /** @var HelperFactory */ private $helperFactory; /** @var CachingStrategyFactory */ private $cachingStrategyFactory; /** @var WorkbookRelationshipsManager */ private $cachedWorkbookRelationshipsManager; /** * @param HelperFactory $helperFactory Factory to create helpers * @param CachingStrategyFactory $cachingStrategyFactory Factory to create shared strings caching strategies */ public function __construct(HelperFactory $helperFactory, CachingStrategyFactory $cachingStrategyFactory) { $this->helperFactory = $helperFactory; $this->cachingStrategyFactory = $cachingStrategyFactory; } /** * @param string $filePath Path of the XLSX file being read * @param string $tempFolder Temporary folder where the temporary files to store shared strings will be stored * @param InternalEntityFactory $entityFactory Factory to create entities * @return SharedStringsManager */ public function createSharedStringsManager($filePath, $tempFolder, $entityFactory) { $workbookRelationshipsManager = $this->createWorkbookRelationshipsManager($filePath, $entityFactory); return new SharedStringsManager( $filePath, $tempFolder, $workbookRelationshipsManager, $entityFactory, $this->helperFactory, $this->cachingStrategyFactory ); } /** * @param string $filePath Path of the XLSX file being read * @param InternalEntityFactory $entityFactory Factory to create entities * @return WorkbookRelationshipsManager */ private function createWorkbookRelationshipsManager($filePath, $entityFactory) { if (!isset($this->cachedWorkbookRelationshipsManager)) { $this->cachedWorkbookRelationshipsManager = new WorkbookRelationshipsManager($filePath, $entityFactory); } return $this->cachedWorkbookRelationshipsManager; } /** * @param string $filePath Path of the XLSX file being read * @param \Box\Spout\Common\Manager\OptionsManagerInterface $optionsManager Reader's options manager * @param \Box\Spout\Reader\XLSX\Manager\SharedStringsManager $sharedStringsManager Manages shared strings * @param InternalEntityFactory $entityFactory Factory to create entities * @return SheetManager */ public function createSheetManager($filePath, $optionsManager, $sharedStringsManager, $entityFactory) { $escaper = $this->helperFactory->createStringsEscaper(); return new SheetManager($filePath, $optionsManager, $sharedStringsManager, $escaper, $entityFactory); } /** * @param string $filePath Path of the XLSX file being read * @param InternalEntityFactory $entityFactory Factory to create entities * @return StyleManager */ public function createStyleManager($filePath, $entityFactory) { $workbookRelationshipsManager = $this->createWorkbookRelationshipsManager($filePath, $entityFactory); return new StyleManager($filePath, $workbookRelationshipsManager, $entityFactory); } /** * @param InternalEntityFactory $entityFactory Factory to create entities * @return RowManager */ public function createRowManager($entityFactory) { return new RowManager($entityFactory); } } XLSX/SheetIterator.php 0000644 00000005563 15152657741 0010663 0 ustar 00 <?php namespace Box\Spout\Reader\XLSX; use Box\Spout\Reader\Exception\NoSheetsFoundException; use Box\Spout\Reader\IteratorInterface; use Box\Spout\Reader\XLSX\Manager\SheetManager; /** * Class SheetIterator * Iterate over XLSX sheet. */ class SheetIterator implements IteratorInterface { /** @var \Box\Spout\Reader\XLSX\Sheet[] The list of sheet present in the file */ protected $sheets; /** @var int The index of the sheet being read (zero-based) */ protected $currentSheetIndex; /** * @param SheetManager $sheetManager Manages sheets * @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file */ public function __construct($sheetManager) { // Fetch all available sheets $this->sheets = $sheetManager->getSheets(); if (\count($this->sheets) === 0) { throw new NoSheetsFoundException('The file must contain at least one sheet.'); } } /** * Rewind the Iterator to the first element * @see http://php.net/manual/en/iterator.rewind.php * * @return void */ #[\ReturnTypeWillChange] public function rewind() { $this->currentSheetIndex = 0; } /** * Checks if current position is valid * @see http://php.net/manual/en/iterator.valid.php * * @return bool */ #[\ReturnTypeWillChange] public function valid() { return ($this->currentSheetIndex < \count($this->sheets)); } /** * Move forward to next element * @see http://php.net/manual/en/iterator.next.php * * @return void */ #[\ReturnTypeWillChange] public function next() { // Using isset here because it is way faster than array_key_exists... if (isset($this->sheets[$this->currentSheetIndex])) { $currentSheet = $this->sheets[$this->currentSheetIndex]; $currentSheet->getRowIterator()->end(); $this->currentSheetIndex++; } } /** * Return the current element * @see http://php.net/manual/en/iterator.current.php * * @return \Box\Spout\Reader\XLSX\Sheet */ #[\ReturnTypeWillChange] public function current() { return $this->sheets[$this->currentSheetIndex]; } /** * Return the key of the current element * @see http://php.net/manual/en/iterator.key.php * * @return int */ #[\ReturnTypeWillChange] public function key() { return $this->currentSheetIndex + 1; } /** * Cleans up what was created to iterate over the object. * * @return void */ #[\ReturnTypeWillChange] public function end() { // make sure we are not leaking memory in case the iteration stopped before the end foreach ($this->sheets as $sheet) { $sheet->getRowIterator()->end(); } } } XLSX/RowIterator.php 0000644 00000037330 15152657741 0010357 0 ustar 00 <?php namespace Box\Spout\Reader\XLSX; use Box\Spout\Common\Entity\Cell; use Box\Spout\Common\Entity\Row; use Box\Spout\Common\Exception\IOException; use Box\Spout\Reader\Common\Manager\RowManager; use Box\Spout\Reader\Common\XMLProcessor; use Box\Spout\Reader\Exception\InvalidValueException; use Box\Spout\Reader\Exception\XMLProcessingException; use Box\Spout\Reader\IteratorInterface; use Box\Spout\Reader\Wrapper\XMLReader; use Box\Spout\Reader\XLSX\Creator\InternalEntityFactory; use Box\Spout\Reader\XLSX\Helper\CellHelper; use Box\Spout\Reader\XLSX\Helper\CellValueFormatter; /** * Class RowIterator */ class RowIterator implements IteratorInterface { /** Definition of XML nodes names used to parse data */ const XML_NODE_DIMENSION = 'dimension'; const XML_NODE_WORKSHEET = 'worksheet'; const XML_NODE_ROW = 'row'; const XML_NODE_CELL = 'c'; /** Definition of XML attributes used to parse data */ const XML_ATTRIBUTE_REF = 'ref'; const XML_ATTRIBUTE_SPANS = 'spans'; const XML_ATTRIBUTE_ROW_INDEX = 'r'; const XML_ATTRIBUTE_CELL_INDEX = 'r'; /** @var string Path of the XLSX file being read */ protected $filePath; /** @var string Path of the sheet data XML file as in [Content_Types].xml */ protected $sheetDataXMLFilePath; /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */ protected $xmlReader; /** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */ protected $xmlProcessor; /** @var Helper\CellValueFormatter Helper to format cell values */ protected $cellValueFormatter; /** @var \Box\Spout\Reader\Common\Manager\RowManager Manages rows */ protected $rowManager; /** @var \Box\Spout\Reader\XLSX\Creator\InternalEntityFactory Factory to create entities */ protected $entityFactory; /** * TODO: This variable can be deleted when row indices get preserved * @var int Number of read rows */ protected $numReadRows = 0; /** @var Row Contains the row currently processed */ protected $currentlyProcessedRow; /** @var Row|null Buffer used to store the current row, while checking if there are more rows to read */ protected $rowBuffer; /** @var bool Indicates whether all rows have been read */ protected $hasReachedEndOfFile = false; /** @var int The number of columns the sheet has (0 meaning undefined) */ protected $numColumns = 0; /** @var bool Whether empty rows should be returned or skipped */ protected $shouldPreserveEmptyRows; /** @var int Last row index processed (one-based) */ protected $lastRowIndexProcessed = 0; /** @var int Row index to be processed next (one-based) */ protected $nextRowIndexToBeProcessed = 0; /** @var int Last column index processed (zero-based) */ protected $lastColumnIndexProcessed = -1; /** * @param string $filePath Path of the XLSX file being read * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml * @param bool $shouldPreserveEmptyRows Whether empty rows should be preserved * @param XMLReader $xmlReader XML Reader * @param XMLProcessor $xmlProcessor Helper to process XML files * @param CellValueFormatter $cellValueFormatter Helper to format cell values * @param RowManager $rowManager Manages rows * @param InternalEntityFactory $entityFactory Factory to create entities */ public function __construct( $filePath, $sheetDataXMLFilePath, $shouldPreserveEmptyRows, $xmlReader, XMLProcessor $xmlProcessor, CellValueFormatter $cellValueFormatter, RowManager $rowManager, InternalEntityFactory $entityFactory ) { $this->filePath = $filePath; $this->sheetDataXMLFilePath = $this->normalizeSheetDataXMLFilePath($sheetDataXMLFilePath); $this->shouldPreserveEmptyRows = $shouldPreserveEmptyRows; $this->xmlReader = $xmlReader; $this->cellValueFormatter = $cellValueFormatter; $this->rowManager = $rowManager; $this->entityFactory = $entityFactory; // Register all callbacks to process different nodes when reading the XML file $this->xmlProcessor = $xmlProcessor; $this->xmlProcessor->registerCallback(self::XML_NODE_DIMENSION, XMLProcessor::NODE_TYPE_START, [$this, 'processDimensionStartingNode']); $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']); $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']); $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']); $this->xmlProcessor->registerCallback(self::XML_NODE_WORKSHEET, XMLProcessor::NODE_TYPE_END, [$this, 'processWorksheetEndingNode']); } /** * @param string $sheetDataXMLFilePath Path of the sheet data XML file as in [Content_Types].xml * @return string Path of the XML file containing the sheet data, * without the leading slash. */ protected function normalizeSheetDataXMLFilePath($sheetDataXMLFilePath) { return \ltrim($sheetDataXMLFilePath, '/'); } /** * Rewind the Iterator to the first element. * Initializes the XMLReader object that reads the associated sheet data. * The XMLReader is configured to be safe from billion laughs attack. * @see http://php.net/manual/en/iterator.rewind.php * * @throws \Box\Spout\Common\Exception\IOException If the sheet data XML cannot be read * @return void */ #[\ReturnTypeWillChange] public function rewind() { $this->xmlReader->close(); if ($this->xmlReader->openFileInZip($this->filePath, $this->sheetDataXMLFilePath) === false) { throw new IOException("Could not open \"{$this->sheetDataXMLFilePath}\"."); } $this->numReadRows = 0; $this->lastRowIndexProcessed = 0; $this->nextRowIndexToBeProcessed = 0; $this->rowBuffer = null; $this->hasReachedEndOfFile = false; $this->numColumns = 0; $this->next(); } /** * Checks if current position is valid * @see http://php.net/manual/en/iterator.valid.php * * @return bool */ #[\ReturnTypeWillChange] public function valid() { return (!$this->hasReachedEndOfFile); } /** * Move forward to next element. Reads data describing the next unprocessed row. * @see http://php.net/manual/en/iterator.next.php * * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML * @return void */ #[\ReturnTypeWillChange] public function next() { $this->nextRowIndexToBeProcessed++; if ($this->doesNeedDataForNextRowToBeProcessed()) { $this->readDataForNextRow(); } } /** * Returns whether we need data for the next row to be processed. * We don't need to read data if: * we have already read at least one row * AND * we need to preserve empty rows * AND * the last row that was read is not the row that need to be processed * (i.e. if we need to return empty rows) * * @return bool Whether we need data for the next row to be processed. */ protected function doesNeedDataForNextRowToBeProcessed() { $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0); return ( !$hasReadAtLeastOneRow || !$this->shouldPreserveEmptyRows || $this->lastRowIndexProcessed < $this->nextRowIndexToBeProcessed ); } /** * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML * @return void */ protected function readDataForNextRow() { $this->currentlyProcessedRow = $this->entityFactory->createRow(); try { $this->xmlProcessor->readUntilStopped(); } catch (XMLProcessingException $exception) { throw new IOException("The {$this->sheetDataXMLFilePath} file cannot be read. [{$exception->getMessage()}]"); } $this->rowBuffer = $this->currentlyProcessedRow; } /** * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<dimension>" starting node * @return int A return code that indicates what action should the processor take next */ protected function processDimensionStartingNode($xmlReader) { // Read dimensions of the sheet $dimensionRef = $xmlReader->getAttribute(self::XML_ATTRIBUTE_REF); // returns 'A1:M13' for instance (or 'A1' for empty sheet) if (\preg_match('/[A-Z]+\d+:([A-Z]+\d+)/', $dimensionRef, $matches)) { $this->numColumns = CellHelper::getColumnIndexFromCellIndex($matches[1]) + 1; } return XMLProcessor::PROCESSING_CONTINUE; } /** * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" starting node * @return int A return code that indicates what action should the processor take next */ protected function processRowStartingNode($xmlReader) { // Reset index of the last processed column $this->lastColumnIndexProcessed = -1; // Mark the last processed row as the one currently being read $this->lastRowIndexProcessed = $this->getRowIndex($xmlReader); // Read spans info if present $numberOfColumnsForRow = $this->numColumns; $spans = $xmlReader->getAttribute(self::XML_ATTRIBUTE_SPANS); // returns '1:5' for instance if ($spans) { list(, $numberOfColumnsForRow) = \explode(':', $spans); $numberOfColumnsForRow = (int) $numberOfColumnsForRow; } $cells = \array_fill(0, $numberOfColumnsForRow, $this->entityFactory->createCell('')); $this->currentlyProcessedRow->setCells($cells); return XMLProcessor::PROCESSING_CONTINUE; } /** * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<cell>" starting node * @return int A return code that indicates what action should the processor take next */ protected function processCellStartingNode($xmlReader) { $currentColumnIndex = $this->getColumnIndex($xmlReader); // NOTE: expand() will automatically decode all XML entities of the child nodes $node = $xmlReader->expand(); $cell = $this->getCell($node); $this->currentlyProcessedRow->setCellAtIndex($cell, $currentColumnIndex); $this->lastColumnIndexProcessed = $currentColumnIndex; return XMLProcessor::PROCESSING_CONTINUE; } /** * @return int A return code that indicates what action should the processor take next */ protected function processRowEndingNode() { // if the fetched row is empty and we don't want to preserve it.., if (!$this->shouldPreserveEmptyRows && $this->rowManager->isEmpty($this->currentlyProcessedRow)) { // ... skip it return XMLProcessor::PROCESSING_CONTINUE; } $this->numReadRows++; // If needed, we fill the empty cells if ($this->numColumns === 0) { $this->currentlyProcessedRow = $this->rowManager->fillMissingIndexesWithEmptyCells($this->currentlyProcessedRow); } // at this point, we have all the data we need for the row // so that we can populate the buffer return XMLProcessor::PROCESSING_STOP; } /** * @return int A return code that indicates what action should the processor take next */ protected function processWorksheetEndingNode() { // The closing "</worksheet>" marks the end of the file $this->hasReachedEndOfFile = true; return XMLProcessor::PROCESSING_STOP; } /** * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<row>" node * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid * @return int Row index */ protected function getRowIndex($xmlReader) { // Get "r" attribute if present (from something like <row r="3"...> $currentRowIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ROW_INDEX); return ($currentRowIndex !== null) ? (int) $currentRowIndex : $this->lastRowIndexProcessed + 1; } /** * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<c>" node * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid * @return int Column index */ protected function getColumnIndex($xmlReader) { // Get "r" attribute if present (from something like <c r="A1"...> $currentCellIndex = $xmlReader->getAttribute(self::XML_ATTRIBUTE_CELL_INDEX); return ($currentCellIndex !== null) ? CellHelper::getColumnIndexFromCellIndex($currentCellIndex) : $this->lastColumnIndexProcessed + 1; } /** * Returns the cell with (unescaped) correctly marshalled, cell value associated to the given XML node. * * @param \DOMNode $node * @return Cell The cell set with the associated with the cell */ protected function getCell($node) { try { $cellValue = $this->cellValueFormatter->extractAndFormatNodeValue($node); $cell = $this->entityFactory->createCell($cellValue); } catch (InvalidValueException $exception) { $cell = $this->entityFactory->createCell($exception->getInvalidValue()); $cell->setType(Cell::TYPE_ERROR); } return $cell; } /** * Return the current element, either an empty row or from the buffer. * @see http://php.net/manual/en/iterator.current.php * * @return Row|null */ #[\ReturnTypeWillChange] public function current() { $rowToBeProcessed = $this->rowBuffer; if ($this->shouldPreserveEmptyRows) { // when we need to preserve empty rows, we will either return // an empty row or the last row read. This depends whether the // index of last row that was read matches the index of the last // row whose value should be returned. if ($this->lastRowIndexProcessed !== $this->nextRowIndexToBeProcessed) { // return empty row if mismatch between last processed row // and the row that needs to be returned $rowToBeProcessed = $this->entityFactory->createRow(); } } return $rowToBeProcessed; } /** * Return the key of the current element. Here, the row index. * @see http://php.net/manual/en/iterator.key.php * * @return int */ #[\ReturnTypeWillChange] public function key() { // TODO: This should return $this->nextRowIndexToBeProcessed // but to avoid a breaking change, the return value for // this function has been kept as the number of rows read. return $this->shouldPreserveEmptyRows ? $this->nextRowIndexToBeProcessed : $this->numReadRows; } /** * Cleans up what was created to iterate over the object. * * @return void */ #[\ReturnTypeWillChange] public function end() { $this->xmlReader->close(); } } XLSX/Helper/CellValueFormatter.php 0000644 00000026064 15152657741 0013057 0 ustar 00 <?php namespace Box\Spout\Reader\XLSX\Helper; use Box\Spout\Reader\Exception\InvalidValueException; use Box\Spout\Reader\XLSX\Manager\SharedStringsManager; use Box\Spout\Reader\XLSX\Manager\StyleManager; /** * Class CellValueFormatter * This class provides helper functions to format cell values */ class CellValueFormatter { /** Definition of all possible cell types */ const CELL_TYPE_INLINE_STRING = 'inlineStr'; const CELL_TYPE_STR = 'str'; const CELL_TYPE_SHARED_STRING = 's'; const CELL_TYPE_BOOLEAN = 'b'; const CELL_TYPE_NUMERIC = 'n'; const CELL_TYPE_DATE = 'd'; const CELL_TYPE_ERROR = 'e'; /** Definition of XML nodes names used to parse data */ const XML_NODE_VALUE = 'v'; const XML_NODE_INLINE_STRING_VALUE = 't'; /** Definition of XML attributes used to parse data */ const XML_ATTRIBUTE_TYPE = 't'; const XML_ATTRIBUTE_STYLE_ID = 's'; /** Constants used for date formatting */ const NUM_SECONDS_IN_ONE_DAY = 86400; /** @var SharedStringsManager Manages shared strings */ protected $sharedStringsManager; /** @var StyleManager Manages styles */ protected $styleManager; /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */ protected $shouldFormatDates; /** @var bool Whether date/time values should use a calendar starting in 1904 instead of 1900 */ protected $shouldUse1904Dates; /** @var \Box\Spout\Common\Helper\Escaper\XLSX Used to unescape XML data */ protected $escaper; /** * @param SharedStringsManager $sharedStringsManager Manages shared strings * @param StyleManager $styleManager Manages styles * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings * @param bool $shouldUse1904Dates Whether date/time values should use a calendar starting in 1904 instead of 1900 * @param \Box\Spout\Common\Helper\Escaper\XLSX $escaper Used to unescape XML data */ public function __construct($sharedStringsManager, $styleManager, $shouldFormatDates, $shouldUse1904Dates, $escaper) { $this->sharedStringsManager = $sharedStringsManager; $this->styleManager = $styleManager; $this->shouldFormatDates = $shouldFormatDates; $this->shouldUse1904Dates = $shouldUse1904Dates; $this->escaper = $escaper; } /** * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node. * * @param \DOMNode $node * @throws InvalidValueException If the value is not valid * @return string|int|float|bool|\DateTime The value associated with the cell */ public function extractAndFormatNodeValue($node) { // Default cell type is "n" $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE) ?: self::CELL_TYPE_NUMERIC; $cellStyleId = (int) $node->getAttribute(self::XML_ATTRIBUTE_STYLE_ID); $vNodeValue = $this->getVNodeValue($node); if (($vNodeValue === '') && ($cellType !== self::CELL_TYPE_INLINE_STRING)) { return $vNodeValue; } switch ($cellType) { case self::CELL_TYPE_INLINE_STRING: return $this->formatInlineStringCellValue($node); case self::CELL_TYPE_SHARED_STRING: return $this->formatSharedStringCellValue($vNodeValue); case self::CELL_TYPE_STR: return $this->formatStrCellValue($vNodeValue); case self::CELL_TYPE_BOOLEAN: return $this->formatBooleanCellValue($vNodeValue); case self::CELL_TYPE_NUMERIC: return $this->formatNumericCellValue($vNodeValue, $cellStyleId); case self::CELL_TYPE_DATE: return $this->formatDateCellValue($vNodeValue); default: throw new InvalidValueException($vNodeValue); } } /** * Returns the cell's string value from a node's nested value node * * @param \DOMNode $node * @return string The value associated with the cell */ protected function getVNodeValue($node) { // for cell types having a "v" tag containing the value. // if not, the returned value should be empty string. $vNode = $node->getElementsByTagName(self::XML_NODE_VALUE)->item(0); return ($vNode !== null) ? $vNode->nodeValue : ''; } /** * Returns the cell String value where string is inline. * * @param \DOMNode $node * @return string The value associated with the cell */ protected function formatInlineStringCellValue($node) { // inline strings are formatted this way (they can contain any number of <t> nodes): // <c r="A1" t="inlineStr"><is><t>[INLINE_STRING]</t><t>[INLINE_STRING_2]</t></is></c> $tNodes = $node->getElementsByTagName(self::XML_NODE_INLINE_STRING_VALUE); $cellValue = ''; for ($i = 0; $i < $tNodes->count(); $i++) { $tNode = $tNodes->item($i); $cellValue .= $this->escaper->unescape($tNode->nodeValue); } return $cellValue; } /** * Returns the cell String value from shared-strings file using nodeValue index. * * @param string $nodeValue * @return string The value associated with the cell */ protected function formatSharedStringCellValue($nodeValue) { // shared strings are formatted this way: // <c r="A1" t="s"><v>[SHARED_STRING_INDEX]</v></c> $sharedStringIndex = (int) $nodeValue; $escapedCellValue = $this->sharedStringsManager->getStringAtIndex($sharedStringIndex); $cellValue = $this->escaper->unescape($escapedCellValue); return $cellValue; } /** * Returns the cell String value, where string is stored in value node. * * @param string $nodeValue * @return string The value associated with the cell */ protected function formatStrCellValue($nodeValue) { $escapedCellValue = \trim($nodeValue); $cellValue = $this->escaper->unescape($escapedCellValue); return $cellValue; } /** * Returns the cell Numeric value from string of nodeValue. * The value can also represent a timestamp and a DateTime will be returned. * * @param string $nodeValue * @param int $cellStyleId 0 being the default style * @return int|float|\DateTime The value associated with the cell */ protected function formatNumericCellValue($nodeValue, $cellStyleId) { // Numeric values can represent numbers as well as timestamps. // We need to look at the style of the cell to determine whether it is one or the other. $shouldFormatAsDate = $this->styleManager->shouldFormatNumericValueAsDate($cellStyleId); if ($shouldFormatAsDate) { $cellValue = $this->formatExcelTimestampValue((float) $nodeValue, $cellStyleId); } else { $nodeIntValue = (int) $nodeValue; $nodeFloatValue = (float) $nodeValue; $cellValue = ((float) $nodeIntValue === $nodeFloatValue) ? $nodeIntValue : $nodeFloatValue; } return $cellValue; } /** * Returns a cell's PHP Date value, associated to the given timestamp. * NOTE: The timestamp is a float representing the number of days since the base Excel date: * Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting. * NOTE: The timestamp can also represent a time, if it is a value between 0 and 1. * * @see ECMA-376 Part 1 - §18.17.4 * * @param float $nodeValue * @param int $cellStyleId 0 being the default style * @throws InvalidValueException If the value is not a valid timestamp * @return \DateTime The value associated with the cell */ protected function formatExcelTimestampValue($nodeValue, $cellStyleId) { if ($this->isValidTimestampValue($nodeValue)) { $cellValue = $this->formatExcelTimestampValueAsDateTimeValue($nodeValue, $cellStyleId); } else { throw new InvalidValueException($nodeValue); } return $cellValue; } /** * Returns whether the given timestamp is supported by SpreadsheetML * @see ECMA-376 Part 1 - §18.17.4 - this specifies the timestamp boundaries. * * @param float $timestampValue * @return bool */ protected function isValidTimestampValue($timestampValue) { // @NOTE: some versions of Excel don't support negative dates (e.g. Excel for Mac 2011) return ( $this->shouldUse1904Dates && $timestampValue >= -695055 && $timestampValue <= 2957003.9999884 || !$this->shouldUse1904Dates && $timestampValue >= -693593 && $timestampValue <= 2958465.9999884 ); } /** * Returns a cell's PHP DateTime value, associated to the given timestamp. * Only the time value matters. The date part is set to the base Excel date: * Dec 30th 1899, 1900 or Jan 1st, 1904, depending on the Workbook setting. * * @param float $nodeValue * @param int $cellStyleId 0 being the default style * @return \DateTime|string The value associated with the cell */ protected function formatExcelTimestampValueAsDateTimeValue($nodeValue, $cellStyleId) { $baseDate = $this->shouldUse1904Dates ? '1904-01-01' : '1899-12-30'; $daysSinceBaseDate = (int) $nodeValue; $timeRemainder = \fmod($nodeValue, 1); $secondsRemainder = \round($timeRemainder * self::NUM_SECONDS_IN_ONE_DAY, 0); $dateObj = \DateTime::createFromFormat('|Y-m-d', $baseDate); $dateObj->modify('+' . $daysSinceBaseDate . 'days'); $dateObj->modify('+' . $secondsRemainder . 'seconds'); if ($this->shouldFormatDates) { $styleNumberFormatCode = $this->styleManager->getNumberFormatCode($cellStyleId); $phpDateFormat = DateFormatHelper::toPHPDateFormat($styleNumberFormatCode); $cellValue = $dateObj->format($phpDateFormat); } else { $cellValue = $dateObj; } return $cellValue; } /** * Returns the cell Boolean value from a specific node's Value. * * @param string $nodeValue * @return bool The value associated with the cell */ protected function formatBooleanCellValue($nodeValue) { return (bool) $nodeValue; } /** * Returns a cell's PHP Date value, associated to the given stored nodeValue. * @see ECMA-376 Part 1 - §18.17.4 * * @param string $nodeValue ISO 8601 Date string * @throws InvalidValueException If the value is not a valid date * @return \DateTime|string The value associated with the cell */ protected function formatDateCellValue($nodeValue) { // Mitigate thrown Exception on invalid date-time format (http://php.net/manual/en/datetime.construct.php) try { $cellValue = ($this->shouldFormatDates) ? $nodeValue : new \DateTime($nodeValue); } catch (\Exception $e) { throw new InvalidValueException($nodeValue); } return $cellValue; } } XLSX/Helper/DateFormatHelper.php 0000644 00000013346 15152657741 0012504 0 ustar 00 <?php namespace Box\Spout\Reader\XLSX\Helper; /** * Class DateFormatHelper * This class provides helper functions to format Excel dates */ class DateFormatHelper { const KEY_GENERAL = 'general'; const KEY_HOUR_12 = '12h'; const KEY_HOUR_24 = '24h'; /** * This map is used to replace Excel format characters by their PHP equivalent. * Keys should be ordered from longest to smallest. * * @var array Mapping between Excel format characters and PHP format characters */ private static $excelDateFormatToPHPDateFormatMapping = [ self::KEY_GENERAL => [ // Time 'am/pm' => 'A', // Uppercase Ante meridiem and Post meridiem ':mm' => ':i', // Minutes with leading zeros - if preceded by a ":" (otherwise month) 'mm:' => 'i:', // Minutes with leading zeros - if followed by a ":" (otherwise month) 'ss' => 's', // Seconds, with leading zeros '.s' => '', // Ignore (fractional seconds format does not exist in PHP) // Date 'e' => 'Y', // Full numeric representation of a year, 4 digits 'yyyy' => 'Y', // Full numeric representation of a year, 4 digits 'yy' => 'y', // Two digit representation of a year 'mmmmm' => 'M', // Short textual representation of a month, three letters ("mmmmm" should only contain the 1st letter...) 'mmmm' => 'F', // Full textual representation of a month 'mmm' => 'M', // Short textual representation of a month, three letters 'mm' => 'm', // Numeric representation of a month, with leading zeros 'm' => 'n', // Numeric representation of a month, without leading zeros 'dddd' => 'l', // Full textual representation of the day of the week 'ddd' => 'D', // Textual representation of a day, three letters 'dd' => 'd', // Day of the month, 2 digits with leading zeros 'd' => 'j', // Day of the month without leading zeros ], self::KEY_HOUR_12 => [ 'hh' => 'h', // 12-hour format of an hour without leading zeros 'h' => 'g', // 12-hour format of an hour without leading zeros ], self::KEY_HOUR_24 => [ 'hh' => 'H', // 24-hour hours with leading zero 'h' => 'G', // 24-hour format of an hour without leading zeros ], ]; /** * Converts the given Excel date format to a format understandable by the PHP date function. * * @param string $excelDateFormat Excel date format * @return string PHP date format (as defined here: http://php.net/manual/en/function.date.php) */ public static function toPHPDateFormat($excelDateFormat) { // Remove brackets potentially present at the beginning of the format string // and text portion of the format at the end of it (starting with ";") // See §18.8.31 of ECMA-376 for more detail. $dateFormat = \preg_replace('/^(?:\[\$[^\]]+?\])?([^;]*).*/', '$1', $excelDateFormat); // Double quotes are used to escape characters that must not be interpreted. // For instance, ["Day " dd] should result in "Day 13" and we should not try to interpret "D", "a", "y" // By exploding the format string using double quote as a delimiter, we can get all parts // that must be transformed (even indexes) and all parts that must not be (odd indexes). $dateFormatParts = \explode('"', $dateFormat); foreach ($dateFormatParts as $partIndex => $dateFormatPart) { // do not look at odd indexes if ($partIndex % 2 === 1) { continue; } // Make sure all characters are lowercase, as the mapping table is using lowercase characters $transformedPart = \strtolower($dateFormatPart); // Remove escapes related to non-format characters $transformedPart = \str_replace('\\', '', $transformedPart); // Apply general transformation first... $transformedPart = \strtr($transformedPart, self::$excelDateFormatToPHPDateFormatMapping[self::KEY_GENERAL]); // ... then apply hour transformation, for 12-hour or 24-hour format if (self::has12HourFormatMarker($dateFormatPart)) { $transformedPart = \strtr($transformedPart, self::$excelDateFormatToPHPDateFormatMapping[self::KEY_HOUR_12]); } else { $transformedPart = \strtr($transformedPart, self::$excelDateFormatToPHPDateFormatMapping[self::KEY_HOUR_24]); } // overwrite the parts array with the new transformed part $dateFormatParts[$partIndex] = $transformedPart; } // Merge all transformed parts back together $phpDateFormat = \implode('"', $dateFormatParts); // Finally, to have the date format compatible with the DateTime::format() function, we need to escape // all characters that are inside double quotes (and double quotes must be removed). // For instance, ["Day " dd] should become [\D\a\y\ dd] $phpDateFormat = \preg_replace_callback('/"(.+?)"/', function ($matches) { $stringToEscape = $matches[1]; $letters = \preg_split('//u', $stringToEscape, -1, PREG_SPLIT_NO_EMPTY); return '\\' . \implode('\\', $letters); }, $phpDateFormat); return $phpDateFormat; } /** * @param string $excelDateFormat Date format as defined by Excel * @return bool Whether the given date format has the 12-hour format marker */ private static function has12HourFormatMarker($excelDateFormat) { return (\stripos($excelDateFormat, 'am/pm') !== false); } } XLSX/Helper/CellHelper.php 0000644 00000006536 15152657741 0011340 0 ustar 00 <?php namespace Box\Spout\Reader\XLSX\Helper; use Box\Spout\Common\Exception\InvalidArgumentException; /** * Class CellHelper * This class provides helper functions when working with cells */ class CellHelper { // Using ord() is super slow... Using a pre-computed hash table instead. private static $columnLetterToIndexMapping = [ 'A' => 0, 'B' => 1, 'C' => 2, 'D' => 3, 'E' => 4, 'F' => 5, 'G' => 6, 'H' => 7, 'I' => 8, 'J' => 9, 'K' => 10, 'L' => 11, 'M' => 12, 'N' => 13, 'O' => 14, 'P' => 15, 'Q' => 16, 'R' => 17, 'S' => 18, 'T' => 19, 'U' => 20, 'V' => 21, 'W' => 22, 'X' => 23, 'Y' => 24, 'Z' => 25, ]; /** * Returns the base 10 column index associated to the cell index (base 26). * Excel uses A to Z letters for column indexing, where A is the 1st column, * Z is the 26th and AA is the 27th. * The mapping is zero based, so that A1 maps to 0, B2 maps to 1, Z13 to 25 and AA4 to 26. * * @param string $cellIndex The Excel cell index ('A1', 'BC13', ...) * @throws \Box\Spout\Common\Exception\InvalidArgumentException When the given cell index is invalid * @return int */ public static function getColumnIndexFromCellIndex($cellIndex) { if (!self::isValidCellIndex($cellIndex)) { throw new InvalidArgumentException('Cannot get column index from an invalid cell index.'); } $columnIndex = 0; // Remove row information $columnLetters = \preg_replace('/\d/', '', $cellIndex); // strlen() is super slow too... Using isset() is way faster and not too unreadable, // since we checked before that there are between 1 and 3 letters. $columnLength = isset($columnLetters[1]) ? (isset($columnLetters[2]) ? 3 : 2) : 1; // Looping over the different letters of the column is slower than this method. // Also, not using the pow() function because it's slooooow... switch ($columnLength) { case 1: $columnIndex = (self::$columnLetterToIndexMapping[$columnLetters]); break; case 2: $firstLetterIndex = (self::$columnLetterToIndexMapping[$columnLetters[0]] + 1) * 26; $secondLetterIndex = self::$columnLetterToIndexMapping[$columnLetters[1]]; $columnIndex = $firstLetterIndex + $secondLetterIndex; break; case 3: $firstLetterIndex = (self::$columnLetterToIndexMapping[$columnLetters[0]] + 1) * 676; $secondLetterIndex = (self::$columnLetterToIndexMapping[$columnLetters[1]] + 1) * 26; $thirdLetterIndex = self::$columnLetterToIndexMapping[$columnLetters[2]]; $columnIndex = $firstLetterIndex + $secondLetterIndex + $thirdLetterIndex; break; } return $columnIndex; } /** * Returns whether a cell index is valid, in an Excel world. * To be valid, the cell index should start with capital letters and be followed by numbers. * There can only be 3 letters, as there can only be 16,384 rows, which is equivalent to 'XFE'. * * @param string $cellIndex The Excel cell index ('A1', 'BC13', ...) * @return bool */ protected static function isValidCellIndex($cellIndex) { return (\preg_match('/^[A-Z]{1,3}\d+$/', $cellIndex) === 1); } } XLSX/Manager/SharedStringsManager.php 0000644 00000023117 15152657741 0013521 0 ustar 00 <?php namespace Box\Spout\Reader\XLSX\Manager; use Box\Spout\Common\Exception\IOException; use Box\Spout\Reader\Exception\XMLProcessingException; use Box\Spout\Reader\Wrapper\XMLReader; use Box\Spout\Reader\XLSX\Creator\HelperFactory; use Box\Spout\Reader\XLSX\Creator\InternalEntityFactory; use Box\Spout\Reader\XLSX\Manager\SharedStringsCaching\CachingStrategyFactory; use Box\Spout\Reader\XLSX\Manager\SharedStringsCaching\CachingStrategyInterface; /** * Class SharedStringsManager * This class manages the shared strings defined in the associated XML file */ class SharedStringsManager { /** Definition of XML nodes names used to parse data */ const XML_NODE_SST = 'sst'; const XML_NODE_SI = 'si'; const XML_NODE_R = 'r'; const XML_NODE_T = 't'; /** Definition of XML attributes used to parse data */ const XML_ATTRIBUTE_COUNT = 'count'; const XML_ATTRIBUTE_UNIQUE_COUNT = 'uniqueCount'; const XML_ATTRIBUTE_XML_SPACE = 'xml:space'; const XML_ATTRIBUTE_VALUE_PRESERVE = 'preserve'; /** @var string Path of the XLSX file being read */ protected $filePath; /** @var string Temporary folder where the temporary files to store shared strings will be stored */ protected $tempFolder; /** @var WorkbookRelationshipsManager Helps retrieving workbook relationships */ protected $workbookRelationshipsManager; /** @var InternalEntityFactory Factory to create entities */ protected $entityFactory; /** @var HelperFactory Factory to create helpers */ protected $helperFactory; /** @var CachingStrategyFactory Factory to create shared strings caching strategies */ protected $cachingStrategyFactory; /** @var CachingStrategyInterface The best caching strategy for storing shared strings */ protected $cachingStrategy; /** * @param string $filePath Path of the XLSX file being read * @param string $tempFolder Temporary folder where the temporary files to store shared strings will be stored * @param WorkbookRelationshipsManager $workbookRelationshipsManager Helps retrieving workbook relationships * @param InternalEntityFactory $entityFactory Factory to create entities * @param HelperFactory $helperFactory Factory to create helpers * @param CachingStrategyFactory $cachingStrategyFactory Factory to create shared strings caching strategies */ public function __construct( $filePath, $tempFolder, $workbookRelationshipsManager, $entityFactory, $helperFactory, $cachingStrategyFactory ) { $this->filePath = $filePath; $this->tempFolder = $tempFolder; $this->workbookRelationshipsManager = $workbookRelationshipsManager; $this->entityFactory = $entityFactory; $this->helperFactory = $helperFactory; $this->cachingStrategyFactory = $cachingStrategyFactory; } /** * Returns whether the XLSX file contains a shared strings XML file * * @return bool */ public function hasSharedStrings() { return $this->workbookRelationshipsManager->hasSharedStringsXMLFile(); } /** * Builds an in-memory array containing all the shared strings of the sheet. * All the strings are stored in a XML file, located at 'xl/sharedStrings.xml'. * It is then accessed by the sheet data, via the string index in the built table. * * More documentation available here: http://msdn.microsoft.com/en-us/library/office/gg278314.aspx * * The XML file can be really big with sheets containing a lot of data. That is why * we need to use a XML reader that provides streaming like the XMLReader library. * * @throws \Box\Spout\Common\Exception\IOException If shared strings XML file can't be read * @return void */ public function extractSharedStrings() { $sharedStringsXMLFilePath = $this->workbookRelationshipsManager->getSharedStringsXMLFilePath(); $xmlReader = $this->entityFactory->createXMLReader(); $sharedStringIndex = 0; if ($xmlReader->openFileInZip($this->filePath, $sharedStringsXMLFilePath) === false) { throw new IOException('Could not open "' . $sharedStringsXMLFilePath . '".'); } try { $sharedStringsUniqueCount = $this->getSharedStringsUniqueCount($xmlReader); $this->cachingStrategy = $this->getBestSharedStringsCachingStrategy($sharedStringsUniqueCount); $xmlReader->readUntilNodeFound(self::XML_NODE_SI); while ($xmlReader->getCurrentNodeName() === self::XML_NODE_SI) { $this->processSharedStringsItem($xmlReader, $sharedStringIndex); $sharedStringIndex++; // jump to the next '<si>' tag $xmlReader->next(self::XML_NODE_SI); } $this->cachingStrategy->closeCache(); } catch (XMLProcessingException $exception) { throw new IOException("The sharedStrings.xml file is invalid and cannot be read. [{$exception->getMessage()}]"); } $xmlReader->close(); } /** * Returns the shared strings unique count, as specified in <sst> tag. * * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader instance * @throws \Box\Spout\Common\Exception\IOException If sharedStrings.xml is invalid and can't be read * @return int|null Number of unique shared strings in the sharedStrings.xml file */ protected function getSharedStringsUniqueCount($xmlReader) { $xmlReader->next(self::XML_NODE_SST); // Iterate over the "sst" elements to get the actual "sst ELEMENT" (skips any DOCTYPE) while ($xmlReader->getCurrentNodeName() === self::XML_NODE_SST && $xmlReader->nodeType !== XMLReader::ELEMENT) { $xmlReader->read(); } $uniqueCount = $xmlReader->getAttribute(self::XML_ATTRIBUTE_UNIQUE_COUNT); // some software do not add the "uniqueCount" attribute but only use the "count" one // @see https://github.com/box/spout/issues/254 if ($uniqueCount === null) { $uniqueCount = $xmlReader->getAttribute(self::XML_ATTRIBUTE_COUNT); } return ($uniqueCount !== null) ? (int) $uniqueCount : null; } /** * Returns the best shared strings caching strategy. * * @param int|null $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown) * @return CachingStrategyInterface */ protected function getBestSharedStringsCachingStrategy($sharedStringsUniqueCount) { return $this->cachingStrategyFactory ->createBestCachingStrategy($sharedStringsUniqueCount, $this->tempFolder, $this->helperFactory); } /** * Processes the shared strings item XML node which the given XML reader is positioned on. * * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XML Reader positioned on a "<si>" node * @param int $sharedStringIndex Index of the processed shared strings item * @return void */ protected function processSharedStringsItem($xmlReader, $sharedStringIndex) { $sharedStringValue = ''; // NOTE: expand() will automatically decode all XML entities of the child nodes $siNode = $xmlReader->expand(); $textNodes = $siNode->getElementsByTagName(self::XML_NODE_T); foreach ($textNodes as $textNode) { if ($this->shouldExtractTextNodeValue($textNode)) { $textNodeValue = $textNode->nodeValue; $shouldPreserveWhitespace = $this->shouldPreserveWhitespace($textNode); $sharedStringValue .= ($shouldPreserveWhitespace) ? $textNodeValue : \trim($textNodeValue); } } $this->cachingStrategy->addStringForIndex($sharedStringValue, $sharedStringIndex); } /** * Not all text nodes' values must be extracted. * Some text nodes are part of a node describing the pronunciation for instance. * We'll only consider the nodes whose parents are "<si>" or "<r>". * * @param \DOMElement $textNode Text node to check * @return bool Whether the given text node's value must be extracted */ protected function shouldExtractTextNodeValue($textNode) { $parentTagName = $textNode->parentNode->localName; return ($parentTagName === self::XML_NODE_SI || $parentTagName === self::XML_NODE_R); } /** * If the text node has the attribute 'xml:space="preserve"', then preserve whitespace. * * @param \DOMElement $textNode The text node element (<t>) whose whitespace may be preserved * @return bool Whether whitespace should be preserved */ protected function shouldPreserveWhitespace($textNode) { $spaceValue = $textNode->getAttribute(self::XML_ATTRIBUTE_XML_SPACE); return ($spaceValue === self::XML_ATTRIBUTE_VALUE_PRESERVE); } /** * Returns the shared string at the given index, using the previously chosen caching strategy. * * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index * @return string The shared string at the given index */ public function getStringAtIndex($sharedStringIndex) { return $this->cachingStrategy->getStringAtIndex($sharedStringIndex); } /** * Destroys the cache, freeing memory and removing any created artifacts * * @return void */ public function cleanup() { if ($this->cachingStrategy) { $this->cachingStrategy->clearCache(); } } } XLSX/Manager/OptionsManager.php 0000644 00000001625 15152657741 0012374 0 ustar 00 <?php namespace Box\Spout\Reader\XLSX\Manager; use Box\Spout\Common\Manager\OptionsManagerAbstract; use Box\Spout\Reader\Common\Entity\Options; /** * Class OptionsManager * XLSX Reader options manager */ class OptionsManager extends OptionsManagerAbstract { /** * {@inheritdoc} */ protected function getSupportedOptions() { return [ Options::TEMP_FOLDER, Options::SHOULD_FORMAT_DATES, Options::SHOULD_PRESERVE_EMPTY_ROWS, Options::SHOULD_USE_1904_DATES, ]; } /** * {@inheritdoc} */ protected function setDefaultOptions() { $this->setOption(Options::TEMP_FOLDER, \sys_get_temp_dir()); $this->setOption(Options::SHOULD_FORMAT_DATES, false); $this->setOption(Options::SHOULD_PRESERVE_EMPTY_ROWS, false); $this->setOption(Options::SHOULD_USE_1904_DATES, false); } } XLSX/Manager/StyleManager.php 0000644 00000031416 15152657741 0012042 0 ustar 00 <?php namespace Box\Spout\Reader\XLSX\Manager; use Box\Spout\Reader\XLSX\Creator\InternalEntityFactory; /** * Class StyleManager * This class manages XLSX styles */ class StyleManager { /** Nodes used to find relevant information in the styles XML file */ const XML_NODE_NUM_FMTS = 'numFmts'; const XML_NODE_NUM_FMT = 'numFmt'; const XML_NODE_CELL_XFS = 'cellXfs'; const XML_NODE_XF = 'xf'; /** Attributes used to find relevant information in the styles XML file */ const XML_ATTRIBUTE_NUM_FMT_ID = 'numFmtId'; const XML_ATTRIBUTE_FORMAT_CODE = 'formatCode'; const XML_ATTRIBUTE_APPLY_NUMBER_FORMAT = 'applyNumberFormat'; /** By convention, default style ID is 0 */ const DEFAULT_STYLE_ID = 0; const NUMBER_FORMAT_GENERAL = 'General'; /** * @see https://msdn.microsoft.com/en-us/library/ff529597(v=office.12).aspx * @var array Mapping between built-in numFmtId and the associated format - for dates only */ protected static $builtinNumFmtIdToNumFormatMapping = [ 14 => 'm/d/yyyy', // @NOTE: ECMA spec is 'mm-dd-yy' 15 => 'd-mmm-yy', 16 => 'd-mmm', 17 => 'mmm-yy', 18 => 'h:mm AM/PM', 19 => 'h:mm:ss AM/PM', 20 => 'h:mm', 21 => 'h:mm:ss', 22 => 'm/d/yyyy h:mm', // @NOTE: ECMA spec is 'm/d/yy h:mm', 45 => 'mm:ss', 46 => '[h]:mm:ss', 47 => 'mm:ss.0', // @NOTE: ECMA spec is 'mmss.0', ]; /** @var string Path of the XLSX file being read */ protected $filePath; /** @var bool Whether the XLSX file contains a styles XML file */ protected $hasStylesXMLFile; /** @var string|null Path of the styles XML file */ protected $stylesXMLFilePath; /** @var InternalEntityFactory Factory to create entities */ protected $entityFactory; /** @var array Array containing the IDs of built-in number formats indicating a date */ protected $builtinNumFmtIdIndicatingDates; /** @var array Array containing a mapping NUM_FMT_ID => FORMAT_CODE */ protected $customNumberFormats; /** @var array Array containing a mapping STYLE_ID => [STYLE_ATTRIBUTES] */ protected $stylesAttributes; /** @var array Cache containing a mapping NUM_FMT_ID => IS_DATE_FORMAT. Used to avoid lots of recalculations */ protected $numFmtIdToIsDateFormatCache = []; /** * @param string $filePath Path of the XLSX file being read * @param WorkbookRelationshipsManager $workbookRelationshipsManager Helps retrieving workbook relationships * @param InternalEntityFactory $entityFactory Factory to create entities */ public function __construct($filePath, $workbookRelationshipsManager, $entityFactory) { $this->filePath = $filePath; $this->entityFactory = $entityFactory; $this->builtinNumFmtIdIndicatingDates = \array_keys(self::$builtinNumFmtIdToNumFormatMapping); $this->hasStylesXMLFile = $workbookRelationshipsManager->hasStylesXMLFile(); if ($this->hasStylesXMLFile) { $this->stylesXMLFilePath = $workbookRelationshipsManager->getStylesXMLFilePath(); } } /** * Returns whether the style with the given ID should consider * numeric values as timestamps and format the cell as a date. * * @param int $styleId Zero-based style ID * @return bool Whether the cell with the given cell should display a date instead of a numeric value */ public function shouldFormatNumericValueAsDate($styleId) { if (!$this->hasStylesXMLFile) { return false; } $stylesAttributes = $this->getStylesAttributes(); // Default style (0) does not format numeric values as timestamps. Only custom styles do. // Also if the style ID does not exist in the styles.xml file, format as numeric value. // Using isset here because it is way faster than array_key_exists... if ($styleId === self::DEFAULT_STYLE_ID || !isset($stylesAttributes[$styleId])) { return false; } $styleAttributes = $stylesAttributes[$styleId]; return $this->doesStyleIndicateDate($styleAttributes); } /** * Reads the styles.xml file and extract the relevant information from the file. * * @return void */ protected function extractRelevantInfo() { $this->customNumberFormats = []; $this->stylesAttributes = []; $xmlReader = $this->entityFactory->createXMLReader(); if ($xmlReader->openFileInZip($this->filePath, $this->stylesXMLFilePath)) { while ($xmlReader->read()) { if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_NUM_FMTS)) { $this->extractNumberFormats($xmlReader); } elseif ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_CELL_XFS)) { $this->extractStyleAttributes($xmlReader); } } $xmlReader->close(); } } /** * Extracts number formats from the "numFmt" nodes. * For simplicity, the styles attributes are kept in memory. This is possible thanks * to the reuse of formats. So 1 million cells should not use 1 million formats. * * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XML Reader positioned on the "numFmts" node * @return void */ protected function extractNumberFormats($xmlReader) { while ($xmlReader->read()) { if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_NUM_FMT)) { $numFmtId = (int) ($xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_FMT_ID)); $formatCode = $xmlReader->getAttribute(self::XML_ATTRIBUTE_FORMAT_CODE); $this->customNumberFormats[$numFmtId] = $formatCode; } elseif ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_NUM_FMTS)) { // Once done reading "numFmts" node's children break; } } } /** * Extracts style attributes from the "xf" nodes, inside the "cellXfs" section. * For simplicity, the styles attributes are kept in memory. This is possible thanks * to the reuse of styles. So 1 million cells should not use 1 million styles. * * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XML Reader positioned on the "cellXfs" node * @return void */ protected function extractStyleAttributes($xmlReader) { while ($xmlReader->read()) { if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_XF)) { $numFmtId = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_FMT_ID); $normalizedNumFmtId = ($numFmtId !== null) ? (int) $numFmtId : null; $applyNumberFormat = $xmlReader->getAttribute(self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT); $normalizedApplyNumberFormat = ($applyNumberFormat !== null) ? (bool) $applyNumberFormat : null; $this->stylesAttributes[] = [ self::XML_ATTRIBUTE_NUM_FMT_ID => $normalizedNumFmtId, self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT => $normalizedApplyNumberFormat, ]; } elseif ($xmlReader->isPositionedOnEndingNode(self::XML_NODE_CELL_XFS)) { // Once done reading "cellXfs" node's children break; } } } /** * @return array The custom number formats */ protected function getCustomNumberFormats() { if (!isset($this->customNumberFormats)) { $this->extractRelevantInfo(); } return $this->customNumberFormats; } /** * @return array The styles attributes */ protected function getStylesAttributes() { if (!isset($this->stylesAttributes)) { $this->extractRelevantInfo(); } return $this->stylesAttributes; } /** * @param array $styleAttributes Array containing the style attributes (2 keys: "applyNumberFormat" and "numFmtId") * @return bool Whether the style with the given attributes indicates that the number is a date */ protected function doesStyleIndicateDate($styleAttributes) { $applyNumberFormat = $styleAttributes[self::XML_ATTRIBUTE_APPLY_NUMBER_FORMAT]; $numFmtId = $styleAttributes[self::XML_ATTRIBUTE_NUM_FMT_ID]; // A style may apply a date format if it has: // - "applyNumberFormat" attribute not set to "false" // - "numFmtId" attribute set // This is a preliminary check, as having "numFmtId" set just means the style should apply a specific number format, // but this is not necessarily a date. if ($applyNumberFormat === false || $numFmtId === null) { return false; } return $this->doesNumFmtIdIndicateDate($numFmtId); } /** * Returns whether the number format ID indicates that the number is a date. * The result is cached to avoid recomputing the same thing over and over, as * "numFmtId" attributes can be shared between multiple styles. * * @param int $numFmtId * @return bool Whether the number format ID indicates that the number is a date */ protected function doesNumFmtIdIndicateDate($numFmtId) { if (!isset($this->numFmtIdToIsDateFormatCache[$numFmtId])) { $formatCode = $this->getFormatCodeForNumFmtId($numFmtId); $this->numFmtIdToIsDateFormatCache[$numFmtId] = ( $this->isNumFmtIdBuiltInDateFormat($numFmtId) || $this->isFormatCodeCustomDateFormat($formatCode) ); } return $this->numFmtIdToIsDateFormatCache[$numFmtId]; } /** * @param int $numFmtId * @return string|null The custom number format or NULL if none defined for the given numFmtId */ protected function getFormatCodeForNumFmtId($numFmtId) { $customNumberFormats = $this->getCustomNumberFormats(); // Using isset here because it is way faster than array_key_exists... return (isset($customNumberFormats[$numFmtId])) ? $customNumberFormats[$numFmtId] : null; } /** * @param int $numFmtId * @return bool Whether the number format ID indicates that the number is a date */ protected function isNumFmtIdBuiltInDateFormat($numFmtId) { return \in_array($numFmtId, $this->builtinNumFmtIdIndicatingDates); } /** * @param string|null $formatCode * @return bool Whether the given format code indicates that the number is a date */ protected function isFormatCodeCustomDateFormat($formatCode) { // if no associated format code or if using the default "General" format if ($formatCode === null || \strcasecmp($formatCode, self::NUMBER_FORMAT_GENERAL) === 0) { return false; } return $this->isFormatCodeMatchingDateFormatPattern($formatCode); } /** * @param string $formatCode * @return bool Whether the given format code matches a date format pattern */ protected function isFormatCodeMatchingDateFormatPattern($formatCode) { // Remove extra formatting (what's between [ ], the brackets should not be preceded by a "\") $pattern = '((?<!\\\)\[.+?(?<!\\\)\])'; $formatCode = \preg_replace($pattern, '', $formatCode); // custom date formats contain specific characters to represent the date: // e - yy - m - d - h - s // and all of their variants (yyyy - mm - dd...) $dateFormatCharacters = ['e', 'yy', 'm', 'd', 'h', 's']; $hasFoundDateFormatCharacter = false; foreach ($dateFormatCharacters as $dateFormatCharacter) { // character not preceded by "\" (case insensitive) $pattern = '/(?<!\\\)' . $dateFormatCharacter . '/i'; if (\preg_match($pattern, $formatCode)) { $hasFoundDateFormatCharacter = true; break; } } return $hasFoundDateFormatCharacter; } /** * Returns the format as defined in "styles.xml" of the given style. * NOTE: It is assumed that the style DOES have a number format associated to it. * * @param int $styleId Zero-based style ID * @return string The number format code associated with the given style */ public function getNumberFormatCode($styleId) { $stylesAttributes = $this->getStylesAttributes(); $styleAttributes = $stylesAttributes[$styleId]; $numFmtId = $styleAttributes[self::XML_ATTRIBUTE_NUM_FMT_ID]; if ($this->isNumFmtIdBuiltInDateFormat($numFmtId)) { $numberFormatCode = self::$builtinNumFmtIdToNumFormatMapping[$numFmtId]; } else { $customNumberFormats = $this->getCustomNumberFormats(); $numberFormatCode = $customNumberFormats[$numFmtId]; } return $numberFormatCode; } } XLSX/Manager/SheetManager.php 0000644 00000023044 15152657741 0012010 0 ustar 00 <?php namespace Box\Spout\Reader\XLSX\Manager; use Box\Spout\Reader\Common\Entity\Options; use Box\Spout\Reader\Common\XMLProcessor; use Box\Spout\Reader\XLSX\Creator\InternalEntityFactory; use Box\Spout\Reader\XLSX\Sheet; /** * Class SheetManager * This class manages XLSX sheets */ class SheetManager { /** Paths of XML files relative to the XLSX file root */ const WORKBOOK_XML_RELS_FILE_PATH = 'xl/_rels/workbook.xml.rels'; const WORKBOOK_XML_FILE_PATH = 'xl/workbook.xml'; /** Definition of XML node names used to parse data */ const XML_NODE_WORKBOOK_PROPERTIES = 'workbookPr'; const XML_NODE_WORKBOOK_VIEW = 'workbookView'; const XML_NODE_SHEET = 'sheet'; const XML_NODE_SHEETS = 'sheets'; const XML_NODE_RELATIONSHIP = 'Relationship'; /** Definition of XML attributes used to parse data */ const XML_ATTRIBUTE_DATE_1904 = 'date1904'; const XML_ATTRIBUTE_ACTIVE_TAB = 'activeTab'; const XML_ATTRIBUTE_R_ID = 'r:id'; const XML_ATTRIBUTE_NAME = 'name'; const XML_ATTRIBUTE_STATE = 'state'; const XML_ATTRIBUTE_ID = 'Id'; const XML_ATTRIBUTE_TARGET = 'Target'; /** State value to represent a hidden sheet */ const SHEET_STATE_HIDDEN = 'hidden'; /** @var string Path of the XLSX file being read */ protected $filePath; /** @var \Box\Spout\Common\Manager\OptionsManagerInterface Reader's options manager */ protected $optionsManager; /** @var \Box\Spout\Reader\XLSX\Manager\SharedStringsManager Manages shared strings */ protected $sharedStringsManager; /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */ protected $globalFunctionsHelper; /** @var InternalEntityFactory Factory to create entities */ protected $entityFactory; /** @var \Box\Spout\Common\Helper\Escaper\XLSX Used to unescape XML data */ protected $escaper; /** @var array List of sheets */ protected $sheets; /** @var int Index of the sheet currently read */ protected $currentSheetIndex; /** @var int Index of the active sheet (0 by default) */ protected $activeSheetIndex; /** * @param string $filePath Path of the XLSX file being read * @param \Box\Spout\Common\Manager\OptionsManagerInterface $optionsManager Reader's options manager * @param \Box\Spout\Reader\XLSX\Manager\SharedStringsManager $sharedStringsManager Manages shared strings * @param \Box\Spout\Common\Helper\Escaper\XLSX $escaper Used to unescape XML data * @param InternalEntityFactory $entityFactory Factory to create entities * @param mixed $sharedStringsManager */ public function __construct($filePath, $optionsManager, $sharedStringsManager, $escaper, $entityFactory) { $this->filePath = $filePath; $this->optionsManager = $optionsManager; $this->sharedStringsManager = $sharedStringsManager; $this->escaper = $escaper; $this->entityFactory = $entityFactory; } /** * Returns the sheets metadata of the file located at the previously given file path. * The paths to the sheets' data are read from the [Content_Types].xml file. * * @return Sheet[] Sheets within the XLSX file */ public function getSheets() { $this->sheets = []; $this->currentSheetIndex = 0; $this->activeSheetIndex = 0; // By default, the first sheet is active $xmlReader = $this->entityFactory->createXMLReader(); $xmlProcessor = $this->entityFactory->createXMLProcessor($xmlReader); $xmlProcessor->registerCallback(self::XML_NODE_WORKBOOK_PROPERTIES, XMLProcessor::NODE_TYPE_START, [$this, 'processWorkbookPropertiesStartingNode']); $xmlProcessor->registerCallback(self::XML_NODE_WORKBOOK_VIEW, XMLProcessor::NODE_TYPE_START, [$this, 'processWorkbookViewStartingNode']); $xmlProcessor->registerCallback(self::XML_NODE_SHEET, XMLProcessor::NODE_TYPE_START, [$this, 'processSheetStartingNode']); $xmlProcessor->registerCallback(self::XML_NODE_SHEETS, XMLProcessor::NODE_TYPE_END, [$this, 'processSheetsEndingNode']); if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_FILE_PATH)) { $xmlProcessor->readUntilStopped(); $xmlReader->close(); } return $this->sheets; } /** * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<workbookPr>" starting node * @return int A return code that indicates what action should the processor take next */ protected function processWorkbookPropertiesStartingNode($xmlReader) { // Using "filter_var($x, FILTER_VALIDATE_BOOLEAN)" here because the value of the "date1904" attribute // may be the string "false", that is not mapped to the boolean "false" by default... $shouldUse1904Dates = \filter_var($xmlReader->getAttribute(self::XML_ATTRIBUTE_DATE_1904), FILTER_VALIDATE_BOOLEAN); $this->optionsManager->setOption(Options::SHOULD_USE_1904_DATES, $shouldUse1904Dates); return XMLProcessor::PROCESSING_CONTINUE; } /** * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<workbookView>" starting node * @return int A return code that indicates what action should the processor take next */ protected function processWorkbookViewStartingNode($xmlReader) { // The "workbookView" node is located before "sheet" nodes, ensuring that // the active sheet is known before parsing sheets data. $this->activeSheetIndex = (int) $xmlReader->getAttribute(self::XML_ATTRIBUTE_ACTIVE_TAB); return XMLProcessor::PROCESSING_CONTINUE; } /** * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<sheet>" starting node * @return int A return code that indicates what action should the processor take next */ protected function processSheetStartingNode($xmlReader) { $isSheetActive = ($this->currentSheetIndex === $this->activeSheetIndex); $this->sheets[] = $this->getSheetFromSheetXMLNode($xmlReader, $this->currentSheetIndex, $isSheetActive); $this->currentSheetIndex++; return XMLProcessor::PROCESSING_CONTINUE; } /** * @return int A return code that indicates what action should the processor take next */ protected function processSheetsEndingNode() { return XMLProcessor::PROCESSING_STOP; } /** * Returns an instance of a sheet, given the XML node describing the sheet - from "workbook.xml". * We can find the XML file path describing the sheet inside "workbook.xml.res", by mapping with the sheet ID * ("r:id" in "workbook.xml", "Id" in "workbook.xml.res"). * * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReaderOnSheetNode XML Reader instance, pointing on the node describing the sheet, as defined in "workbook.xml" * @param int $sheetIndexZeroBased Index of the sheet, based on order of appearance in the workbook (zero-based) * @param bool $isSheetActive Whether this sheet was defined as active * @return \Box\Spout\Reader\XLSX\Sheet Sheet instance */ protected function getSheetFromSheetXMLNode($xmlReaderOnSheetNode, $sheetIndexZeroBased, $isSheetActive) { $sheetId = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_R_ID); $sheetState = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_STATE); $isSheetVisible = ($sheetState !== self::SHEET_STATE_HIDDEN); $escapedSheetName = $xmlReaderOnSheetNode->getAttribute(self::XML_ATTRIBUTE_NAME); $sheetName = $this->escaper->unescape($escapedSheetName); $sheetDataXMLFilePath = $this->getSheetDataXMLFilePathForSheetId($sheetId); return $this->entityFactory->createSheet( $this->filePath, $sheetDataXMLFilePath, $sheetIndexZeroBased, $sheetName, $isSheetActive, $isSheetVisible, $this->optionsManager, $this->sharedStringsManager ); } /** * @param string $sheetId The sheet ID, as defined in "workbook.xml" * @return string The XML file path describing the sheet inside "workbook.xml.res", for the given sheet ID */ protected function getSheetDataXMLFilePathForSheetId($sheetId) { $sheetDataXMLFilePath = ''; // find the file path of the sheet, by looking at the "workbook.xml.res" file $xmlReader = $this->entityFactory->createXMLReader(); if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_XML_RELS_FILE_PATH)) { while ($xmlReader->read()) { if ($xmlReader->isPositionedOnStartingNode(self::XML_NODE_RELATIONSHIP)) { $relationshipSheetId = $xmlReader->getAttribute(self::XML_ATTRIBUTE_ID); if ($relationshipSheetId === $sheetId) { // In workbook.xml.rels, it is only "worksheets/sheet1.xml" // In [Content_Types].xml, the path is "/xl/worksheets/sheet1.xml" $sheetDataXMLFilePath = $xmlReader->getAttribute(self::XML_ATTRIBUTE_TARGET); // sometimes, the sheet data file path already contains "/xl/"... if (\strpos($sheetDataXMLFilePath, '/xl/') !== 0) { $sheetDataXMLFilePath = '/xl/' . $sheetDataXMLFilePath; break; } } } } $xmlReader->close(); } return $sheetDataXMLFilePath; } } XLSX/Manager/WorkbookRelationshipsManager.php 0000644 00000013341 15152657741 0015301 0 ustar 00 <?php namespace Box\Spout\Reader\XLSX\Manager; use Box\Spout\Common\Exception\IOException; use Box\Spout\Reader\Wrapper\XMLReader; use Box\Spout\Reader\XLSX\Creator\InternalEntityFactory; /** * Class WorkbookRelationshipsManager * This class manages the workbook relationships defined in the associated XML file */ class WorkbookRelationshipsManager { const BASE_PATH = 'xl/'; /** Path of workbook relationships XML file inside the XLSX file */ const WORKBOOK_RELS_XML_FILE_PATH = 'xl/_rels/workbook.xml.rels'; /** Relationships types - For Transitional and Strict OOXML */ const RELATIONSHIP_TYPE_SHARED_STRINGS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings'; const RELATIONSHIP_TYPE_STYLES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles'; const RELATIONSHIP_TYPE_SHARED_STRINGS_STRICT = 'http://purl.oclc.org/ooxml/officeDocument/relationships/sharedStrings'; const RELATIONSHIP_TYPE_STYLES_STRICT = 'http://purl.oclc.org/ooxml/officeDocument/relationships/styles'; /** Nodes and attributes used to find relevant information in the workbook relationships XML file */ const XML_NODE_RELATIONSHIP = 'Relationship'; const XML_ATTRIBUTE_TYPE = 'Type'; const XML_ATTRIBUTE_TARGET = 'Target'; /** @var string Path of the XLSX file being read */ private $filePath; /** @var InternalEntityFactory Factory to create entities */ private $entityFactory; /** @var array Cache of the already read workbook relationships: [TYPE] => [FILE_NAME] */ private $cachedWorkbookRelationships; /** * @param string $filePath Path of the XLSX file being read * @param InternalEntityFactory $entityFactory Factory to create entities */ public function __construct($filePath, $entityFactory) { $this->filePath = $filePath; $this->entityFactory = $entityFactory; } /** * @return string The path of the shared string XML file */ public function getSharedStringsXMLFilePath() { $workbookRelationships = $this->getWorkbookRelationships(); $sharedStringsXMLFilePath = $workbookRelationships[self::RELATIONSHIP_TYPE_SHARED_STRINGS] ?? $workbookRelationships[self::RELATIONSHIP_TYPE_SHARED_STRINGS_STRICT]; // the file path can be relative (e.g. "styles.xml") or absolute (e.g. "/xl/styles.xml") $doesContainBasePath = (\strpos($sharedStringsXMLFilePath, self::BASE_PATH) !== false); if (!$doesContainBasePath) { // make sure we return an absolute file path $sharedStringsXMLFilePath = self::BASE_PATH . $sharedStringsXMLFilePath; } return $sharedStringsXMLFilePath; } /** * @return bool Whether the XLSX file contains a shared string XML file */ public function hasSharedStringsXMLFile() { $workbookRelationships = $this->getWorkbookRelationships(); return isset($workbookRelationships[self::RELATIONSHIP_TYPE_SHARED_STRINGS]) || isset($workbookRelationships[self::RELATIONSHIP_TYPE_SHARED_STRINGS_STRICT]); } /** * @return bool Whether the XLSX file contains a styles XML file */ public function hasStylesXMLFile() { $workbookRelationships = $this->getWorkbookRelationships(); return isset($workbookRelationships[self::RELATIONSHIP_TYPE_STYLES]) || isset($workbookRelationships[self::RELATIONSHIP_TYPE_STYLES_STRICT]); } /** * @return string The path of the styles XML file */ public function getStylesXMLFilePath() { $workbookRelationships = $this->getWorkbookRelationships(); $stylesXMLFilePath = $workbookRelationships[self::RELATIONSHIP_TYPE_STYLES] ?? $workbookRelationships[self::RELATIONSHIP_TYPE_STYLES_STRICT]; // the file path can be relative (e.g. "styles.xml") or absolute (e.g. "/xl/styles.xml") $doesContainBasePath = (\strpos($stylesXMLFilePath, self::BASE_PATH) !== false); if (!$doesContainBasePath) { // make sure we return a full path $stylesXMLFilePath = self::BASE_PATH . $stylesXMLFilePath; } return $stylesXMLFilePath; } /** * Reads the workbook.xml.rels and extracts the filename associated to the different types. * It caches the result so that the file is read only once. * * @throws \Box\Spout\Common\Exception\IOException If workbook.xml.rels can't be read * @return array */ private function getWorkbookRelationships() { if (!isset($this->cachedWorkbookRelationships)) { $xmlReader = $this->entityFactory->createXMLReader(); if ($xmlReader->openFileInZip($this->filePath, self::WORKBOOK_RELS_XML_FILE_PATH) === false) { throw new IOException('Could not open "' . self::WORKBOOK_RELS_XML_FILE_PATH . '".'); } $this->cachedWorkbookRelationships = []; while ($xmlReader->readUntilNodeFound(self::XML_NODE_RELATIONSHIP)) { $this->processWorkbookRelationship($xmlReader); } } return $this->cachedWorkbookRelationships; } /** * Extracts and store the data of the current workbook relationship. * * @param XMLReader $xmlReader * @return void */ private function processWorkbookRelationship($xmlReader) { $type = $xmlReader->getAttribute(self::XML_ATTRIBUTE_TYPE); $target = $xmlReader->getAttribute(self::XML_ATTRIBUTE_TARGET); // @NOTE: if a type is defined more than once, we overwrite the previous value // To be changed if we want to get the file paths of sheet XML files for instance. $this->cachedWorkbookRelationships[$type] = $target; } } XLSX/Manager/SharedStringsCaching/FileBasedStrategy.php 0000644 00000015730 15152657741 0017046 0 ustar 00 <?php namespace Box\Spout\Reader\XLSX\Manager\SharedStringsCaching; use Box\Spout\Reader\Exception\SharedStringNotFoundException; use Box\Spout\Reader\XLSX\Creator\HelperFactory; /** * Class FileBasedStrategy * * This class implements the file-based caching strategy for shared strings. * Shared strings are stored in small files (with a max number of strings per file). * This strategy is slower than an in-memory strategy but is used to avoid out of memory crashes. */ class FileBasedStrategy implements CachingStrategyInterface { /** Value to use to escape the line feed character ("\n") */ const ESCAPED_LINE_FEED_CHARACTER = '_x000A_'; /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */ protected $globalFunctionsHelper; /** @var \Box\Spout\Common\Helper\FileSystemHelper Helper to perform file system operations */ protected $fileSystemHelper; /** @var string Temporary folder where the temporary files will be created */ protected $tempFolder; /** * @var int Maximum number of strings that can be stored in one temp file * @see CachingStrategyFactory::MAX_NUM_STRINGS_PER_TEMP_FILE */ protected $maxNumStringsPerTempFile; /** @var resource Pointer to the last temp file a shared string was written to */ protected $tempFilePointer; /** * @var string Path of the temporary file whose contents is currently stored in memory * @see CachingStrategyFactory::MAX_NUM_STRINGS_PER_TEMP_FILE */ protected $inMemoryTempFilePath; /** * @var array Contents of the temporary file that was last read * @see CachingStrategyFactory::MAX_NUM_STRINGS_PER_TEMP_FILE */ protected $inMemoryTempFileContents; /** * @param string $tempFolder Temporary folder where the temporary files to store shared strings will be stored * @param int $maxNumStringsPerTempFile Maximum number of strings that can be stored in one temp file * @param HelperFactory $helperFactory Factory to create helpers */ public function __construct($tempFolder, $maxNumStringsPerTempFile, $helperFactory) { $this->fileSystemHelper = $helperFactory->createFileSystemHelper($tempFolder); $this->tempFolder = $this->fileSystemHelper->createFolder($tempFolder, \uniqid('sharedstrings')); $this->maxNumStringsPerTempFile = $maxNumStringsPerTempFile; $this->globalFunctionsHelper = $helperFactory->createGlobalFunctionsHelper(); $this->tempFilePointer = null; } /** * Adds the given string to the cache. * * @param string $sharedString The string to be added to the cache * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file * @return void */ public function addStringForIndex($sharedString, $sharedStringIndex) { $tempFilePath = $this->getSharedStringTempFilePath($sharedStringIndex); if (!$this->globalFunctionsHelper->file_exists($tempFilePath)) { if ($this->tempFilePointer) { $this->globalFunctionsHelper->fclose($this->tempFilePointer); } $this->tempFilePointer = $this->globalFunctionsHelper->fopen($tempFilePath, 'w'); } // The shared string retrieval logic expects each cell data to be on one line only // Encoding the line feed character allows to preserve this assumption $lineFeedEncodedSharedString = $this->escapeLineFeed($sharedString); $this->globalFunctionsHelper->fwrite($this->tempFilePointer, $lineFeedEncodedSharedString . PHP_EOL); } /** * Returns the path for the temp file that should contain the string for the given index * * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file * @return string The temp file path for the given index */ protected function getSharedStringTempFilePath($sharedStringIndex) { $numTempFile = (int) ($sharedStringIndex / $this->maxNumStringsPerTempFile); return $this->tempFolder . '/sharedstrings' . $numTempFile; } /** * Closes the cache after the last shared string was added. * This prevents any additional string from being added to the cache. * * @return void */ public function closeCache() { // close pointer to the last temp file that was written if ($this->tempFilePointer) { $this->globalFunctionsHelper->fclose($this->tempFilePointer); } } /** * Returns the string located at the given index from the cache. * * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index * @return string The shared string at the given index */ public function getStringAtIndex($sharedStringIndex) { $tempFilePath = $this->getSharedStringTempFilePath($sharedStringIndex); $indexInFile = $sharedStringIndex % $this->maxNumStringsPerTempFile; if (!$this->globalFunctionsHelper->file_exists($tempFilePath)) { throw new SharedStringNotFoundException("Shared string temp file not found: $tempFilePath ; for index: $sharedStringIndex"); } if ($this->inMemoryTempFilePath !== $tempFilePath) { // free memory unset($this->inMemoryTempFileContents); $this->inMemoryTempFileContents = \explode(PHP_EOL, $this->globalFunctionsHelper->file_get_contents($tempFilePath)); $this->inMemoryTempFilePath = $tempFilePath; } $sharedString = null; // Using isset here because it is way faster than array_key_exists... if (isset($this->inMemoryTempFileContents[$indexInFile])) { $escapedSharedString = $this->inMemoryTempFileContents[$indexInFile]; $sharedString = $this->unescapeLineFeed($escapedSharedString); } if ($sharedString === null) { throw new SharedStringNotFoundException("Shared string not found for index: $sharedStringIndex"); } return \rtrim($sharedString, PHP_EOL); } /** * Escapes the line feed characters (\n) * * @param string $unescapedString * @return string */ private function escapeLineFeed($unescapedString) { return \str_replace("\n", self::ESCAPED_LINE_FEED_CHARACTER, $unescapedString); } /** * Unescapes the line feed characters (\n) * * @param string $escapedString * @return string */ private function unescapeLineFeed($escapedString) { return \str_replace(self::ESCAPED_LINE_FEED_CHARACTER, "\n", $escapedString); } /** * Destroys the cache, freeing memory and removing any created artifacts * * @return void */ public function clearCache() { if ($this->tempFolder) { $this->fileSystemHelper->deleteFolderRecursively($this->tempFolder); } } } XLSX/Manager/SharedStringsCaching/CachingStrategyInterface.php 0000644 00000002372 15152657741 0020403 0 ustar 00 <?php namespace Box\Spout\Reader\XLSX\Manager\SharedStringsCaching; /** * Interface CachingStrategyInterface */ interface CachingStrategyInterface { /** * Adds the given string to the cache. * * @param string $sharedString The string to be added to the cache * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file * @return void */ public function addStringForIndex($sharedString, $sharedStringIndex); /** * Closes the cache after the last shared string was added. * This prevents any additional string from being added to the cache. * * @return void */ public function closeCache(); /** * Returns the string located at the given index from the cache. * * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index * @return string The shared string at the given index */ public function getStringAtIndex($sharedStringIndex); /** * Destroys the cache, freeing memory and removing any created artifacts * * @return void */ public function clearCache(); } XLSX/Manager/SharedStringsCaching/CachingStrategyFactory.php 0000644 00000012546 15152657741 0020116 0 ustar 00 <?php namespace Box\Spout\Reader\XLSX\Manager\SharedStringsCaching; use Box\Spout\Reader\XLSX\Creator\HelperFactory; /** * Class CachingStrategyFactory */ class CachingStrategyFactory { /** * The memory amount needed to store a string was obtained empirically from this data: * * ------------------------------------ * | Number of chars⁺ | Memory needed | * ------------------------------------ * | 3,000 | 1 MB | * | 15,000 | 2 MB | * | 30,000 | 5 MB | * | 75,000 | 11 MB | * | 150,000 | 21 MB | * | 300,000 | 43 MB | * | 750,000 | 105 MB | * | 1,500,000 | 210 MB | * | 2,250,000 | 315 MB | * | 3,000,000 | 420 MB | * | 4,500,000 | 630 MB | * ------------------------------------ * * ⁺ All characters were 1 byte long * * This gives a linear graph where each 1-byte character requires about 150 bytes to be stored. * Given that some characters can take up to 4 bytes, we need 600 bytes per character to be safe. * Also, there is on average about 20 characters per cell (this is entirely empirical data...). * * This means that in order to store one shared string in memory, the memory amount needed is: * => 20 * 600 ≈ 12KB */ const AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB = 12; /** * To avoid running out of memory when extracting a huge number of shared strings, they can be saved to temporary files * instead of in memory. Then, when accessing a string, the corresponding file contents will be loaded in memory * and the string will be quickly retrieved. * The performance bottleneck is not when creating these temporary files, but rather when loading their content. * Because the contents of the last loaded file stays in memory until another file needs to be loaded, it works * best when the indexes of the shared strings are sorted in the sheet data. * 10,000 was chosen because it creates small files that are fast to be loaded in memory. */ const MAX_NUM_STRINGS_PER_TEMP_FILE = 10000; /** * Returns the best caching strategy, given the number of unique shared strings * and the amount of memory available. * * @param int|null $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown) * @param string $tempFolder Temporary folder where the temporary files to store shared strings will be stored * @param HelperFactory $helperFactory Factory to create helpers * @return CachingStrategyInterface The best caching strategy */ public function createBestCachingStrategy($sharedStringsUniqueCount, $tempFolder, $helperFactory) { if ($this->isInMemoryStrategyUsageSafe($sharedStringsUniqueCount)) { return new InMemoryStrategy($sharedStringsUniqueCount); } return new FileBasedStrategy($tempFolder, self::MAX_NUM_STRINGS_PER_TEMP_FILE, $helperFactory); } /** * Returns whether it is safe to use in-memory caching, given the number of unique shared strings * and the amount of memory available. * * @param int|null $sharedStringsUniqueCount Number of unique shared strings (NULL if unknown) * @return bool */ protected function isInMemoryStrategyUsageSafe($sharedStringsUniqueCount) { // if the number of shared strings in unknown, do not use "in memory" strategy if ($sharedStringsUniqueCount === null) { return false; } $memoryAvailable = $this->getMemoryLimitInKB(); if ($memoryAvailable === -1) { // if cannot get memory limit or if memory limit set as unlimited, don't trust and play safe $isInMemoryStrategyUsageSafe = ($sharedStringsUniqueCount < self::MAX_NUM_STRINGS_PER_TEMP_FILE); } else { $memoryNeeded = $sharedStringsUniqueCount * self::AMOUNT_MEMORY_NEEDED_PER_STRING_IN_KB; $isInMemoryStrategyUsageSafe = ($memoryAvailable > $memoryNeeded); } return $isInMemoryStrategyUsageSafe; } /** * Returns the PHP "memory_limit" in Kilobytes * * @return float */ protected function getMemoryLimitInKB() { $memoryLimitFormatted = $this->getMemoryLimitFromIni(); $memoryLimitFormatted = \strtolower(\trim($memoryLimitFormatted)); // No memory limit if ($memoryLimitFormatted === '-1') { return -1; } if (\preg_match('/(\d+)([bkmgt])b?/', $memoryLimitFormatted, $matches)) { $amount = (int) ($matches[1]); $unit = $matches[2]; switch ($unit) { case 'b': return ($amount / 1024); case 'k': return $amount; case 'm': return ($amount * 1024); case 'g': return ($amount * 1024 * 1024); case 't': return ($amount * 1024 * 1024 * 1024); } } return -1; } /** * Returns the formatted "memory_limit" value * * @return string */ protected function getMemoryLimitFromIni() { return \ini_get('memory_limit'); } } XLSX/Manager/SharedStringsCaching/InMemoryStrategy.php 0000644 00000004736 15152657741 0016773 0 ustar 00 <?php namespace Box\Spout\Reader\XLSX\Manager\SharedStringsCaching; use Box\Spout\Reader\Exception\SharedStringNotFoundException; /** * Class InMemoryStrategy * * This class implements the in-memory caching strategy for shared strings. * This strategy is used when the number of unique strings is low, compared to the memory available. */ class InMemoryStrategy implements CachingStrategyInterface { /** @var \SplFixedArray Array used to cache the shared strings */ protected $inMemoryCache; /** @var bool Whether the cache has been closed */ protected $isCacheClosed; /** * @param int $sharedStringsUniqueCount Number of unique shared strings */ public function __construct($sharedStringsUniqueCount) { $this->inMemoryCache = new \SplFixedArray($sharedStringsUniqueCount); $this->isCacheClosed = false; } /** * Adds the given string to the cache. * * @param string $sharedString The string to be added to the cache * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file * @return void */ public function addStringForIndex($sharedString, $sharedStringIndex) { if (!$this->isCacheClosed) { $this->inMemoryCache->offsetSet($sharedStringIndex, $sharedString); } } /** * Closes the cache after the last shared string was added. * This prevents any additional string from being added to the cache. * * @return void */ public function closeCache() { $this->isCacheClosed = true; } /** * Returns the string located at the given index from the cache. * * @param int $sharedStringIndex Index of the shared string in the sharedStrings.xml file * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If no shared string found for the given index * @return string The shared string at the given index */ public function getStringAtIndex($sharedStringIndex) { try { return $this->inMemoryCache->offsetGet($sharedStringIndex); } catch (\RuntimeException $e) { throw new SharedStringNotFoundException("Shared string not found for index: $sharedStringIndex"); } } /** * Destroys the cache, freeing memory and removing any created artifacts * * @return void */ public function clearCache() { unset($this->inMemoryCache); $this->isCacheClosed = false; } } XLSX/Sheet.php 0000644 00000004005 15152657741 0007137 0 ustar 00 <?php namespace Box\Spout\Reader\XLSX; use Box\Spout\Reader\SheetInterface; /** * Class Sheet * Represents a sheet within a XLSX file */ class Sheet implements SheetInterface { /** @var \Box\Spout\Reader\XLSX\RowIterator To iterate over sheet's rows */ protected $rowIterator; /** @var int Index of the sheet, based on order in the workbook (zero-based) */ protected $index; /** @var string Name of the sheet */ protected $name; /** @var bool Whether the sheet was the active one */ protected $isActive; /** @var bool Whether the sheet is visible */ protected $isVisible; /** * @param RowIterator $rowIterator The corresponding row iterator * @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based) * @param string $sheetName Name of the sheet * @param bool $isSheetActive Whether the sheet was defined as active * @param bool $isSheetVisible Whether the sheet is visible */ public function __construct($rowIterator, $sheetIndex, $sheetName, $isSheetActive, $isSheetVisible) { $this->rowIterator = $rowIterator; $this->index = $sheetIndex; $this->name = $sheetName; $this->isActive = $isSheetActive; $this->isVisible = $isSheetVisible; } /** * @return \Box\Spout\Reader\XLSX\RowIterator */ public function getRowIterator() { return $this->rowIterator; } /** * @return int Index of the sheet, based on order in the workbook (zero-based) */ public function getIndex() { return $this->index; } /** * @return string Name of the sheet */ public function getName() { return $this->name; } /** * @return bool Whether the sheet was defined as active */ public function isActive() { return $this->isActive; } /** * @return bool Whether the sheet is visible */ public function isVisible() { return $this->isVisible; } } Common/Entity/Options.php 0000644 00000001044 15152657741 0011430 0 ustar 00 <?php namespace Box\Spout\Reader\Common\Entity; /** * Class Options * Readers' options holder */ abstract class Options { // Common options const SHOULD_FORMAT_DATES = 'shouldFormatDates'; const SHOULD_PRESERVE_EMPTY_ROWS = 'shouldPreserveEmptyRows'; // CSV specific options const FIELD_DELIMITER = 'fieldDelimiter'; const FIELD_ENCLOSURE = 'fieldEnclosure'; const ENCODING = 'encoding'; // XLSX specific options const TEMP_FOLDER = 'tempFolder'; const SHOULD_USE_1904_DATES = 'shouldUse1904Dates'; } Common/Creator/InternalEntityFactoryInterface.php 0000644 00000000661 15152657741 0016246 0 ustar 00 <?php namespace Box\Spout\Reader\Common\Creator; use Box\Spout\Common\Entity\Cell; use Box\Spout\Common\Entity\Row; /** * Interface EntityFactoryInterface */ interface InternalEntityFactoryInterface { /** * @param Cell[] $cells * @return Row */ public function createRow(array $cells = []); /** * @param mixed $cellValue * @return Cell */ public function createCell($cellValue); } Common/Creator/ReaderFactory.php 0000644 00000007722 15152657741 0012663 0 ustar 00 <?php namespace Box\Spout\Reader\Common\Creator; use Box\Spout\Common\Creator\HelperFactory; use Box\Spout\Common\Exception\UnsupportedTypeException; use Box\Spout\Common\Type; use Box\Spout\Reader\CSV\Creator\InternalEntityFactory as CSVInternalEntityFactory; use Box\Spout\Reader\CSV\Manager\OptionsManager as CSVOptionsManager; use Box\Spout\Reader\CSV\Reader as CSVReader; use Box\Spout\Reader\ODS\Creator\HelperFactory as ODSHelperFactory; use Box\Spout\Reader\ODS\Creator\InternalEntityFactory as ODSInternalEntityFactory; use Box\Spout\Reader\ODS\Creator\ManagerFactory as ODSManagerFactory; use Box\Spout\Reader\ODS\Manager\OptionsManager as ODSOptionsManager; use Box\Spout\Reader\ODS\Reader as ODSReader; use Box\Spout\Reader\ReaderInterface; use Box\Spout\Reader\XLSX\Creator\HelperFactory as XLSXHelperFactory; use Box\Spout\Reader\XLSX\Creator\InternalEntityFactory as XLSXInternalEntityFactory; use Box\Spout\Reader\XLSX\Creator\ManagerFactory as XLSXManagerFactory; use Box\Spout\Reader\XLSX\Manager\OptionsManager as XLSXOptionsManager; use Box\Spout\Reader\XLSX\Manager\SharedStringsCaching\CachingStrategyFactory; use Box\Spout\Reader\XLSX\Reader as XLSXReader; /** * Class ReaderFactory * This factory is used to create readers, based on the type of the file to be read. * It supports CSV, XLSX and ODS formats. */ class ReaderFactory { /** * Creates a reader by file extension * * @param string $path The path to the spreadsheet file. Supported extensions are .csv,.ods and .xlsx * @throws \Box\Spout\Common\Exception\UnsupportedTypeException * @return ReaderInterface */ public static function createFromFile(string $path) { $extension = \strtolower(\pathinfo($path, PATHINFO_EXTENSION)); return self::createFromType($extension); } /** * This creates an instance of the appropriate reader, given the type of the file to be read * * @param string $readerType Type of the reader to instantiate * @throws \Box\Spout\Common\Exception\UnsupportedTypeException * @return ReaderInterface */ public static function createFromType($readerType) { switch ($readerType) { case Type::CSV: return self::createCSVReader(); case Type::XLSX: return self::createXLSXReader(); case Type::ODS: return self::createODSReader(); default: throw new UnsupportedTypeException('No readers supporting the given type: ' . $readerType); } } /** * @return CSVReader */ private static function createCSVReader() { $optionsManager = new CSVOptionsManager(); $helperFactory = new HelperFactory(); $entityFactory = new CSVInternalEntityFactory($helperFactory); $globalFunctionsHelper = $helperFactory->createGlobalFunctionsHelper(); return new CSVReader($optionsManager, $globalFunctionsHelper, $entityFactory); } /** * @return XLSXReader */ private static function createXLSXReader() { $optionsManager = new XLSXOptionsManager(); $helperFactory = new XLSXHelperFactory(); $managerFactory = new XLSXManagerFactory($helperFactory, new CachingStrategyFactory()); $entityFactory = new XLSXInternalEntityFactory($managerFactory, $helperFactory); $globalFunctionsHelper = $helperFactory->createGlobalFunctionsHelper(); return new XLSXReader($optionsManager, $globalFunctionsHelper, $entityFactory, $managerFactory); } /** * @return ODSReader */ private static function createODSReader() { $optionsManager = new ODSOptionsManager(); $helperFactory = new ODSHelperFactory(); $managerFactory = new ODSManagerFactory(); $entityFactory = new ODSInternalEntityFactory($helperFactory, $managerFactory); $globalFunctionsHelper = $helperFactory->createGlobalFunctionsHelper(); return new ODSReader($optionsManager, $globalFunctionsHelper, $entityFactory); } } Common/Creator/ReaderEntityFactory.php 0000644 00000003251 15152657741 0014051 0 ustar 00 <?php namespace Box\Spout\Reader\Common\Creator; use Box\Spout\Common\Exception\UnsupportedTypeException; use Box\Spout\Common\Type; use Box\Spout\Reader\ReaderInterface; /** * Class ReaderEntityFactory * Factory to create external entities */ class ReaderEntityFactory { /** * Creates a reader by file extension * * @param string $path The path to the spreadsheet file. Supported extensions are .csv, .ods and .xlsx * @throws \Box\Spout\Common\Exception\UnsupportedTypeException * @return ReaderInterface */ public static function createReaderFromFile(string $path) { return ReaderFactory::createFromFile($path); } /** * This creates an instance of a CSV reader * * @return \Box\Spout\Reader\CSV\Reader */ public static function createCSVReader() { try { return ReaderFactory::createFromType(Type::CSV); } catch (UnsupportedTypeException $e) { // should never happen } } /** * This creates an instance of a XLSX reader * * @return \Box\Spout\Reader\XLSX\Reader */ public static function createXLSXReader() { try { return ReaderFactory::createFromType(Type::XLSX); } catch (UnsupportedTypeException $e) { // should never happen } } /** * This creates an instance of a ODS reader * * @return \Box\Spout\Reader\ODS\Reader */ public static function createODSReader() { try { return ReaderFactory::createFromType(Type::ODS); } catch (UnsupportedTypeException $e) { // should never happen } } } Common/XMLProcessor.php 0000644 00000013507 15152657741 0011070 0 ustar 00 <?php namespace Box\Spout\Reader\Common; use Box\Spout\Reader\Wrapper\XMLReader; /** * Class XMLProcessor * Helps process XML files */ class XMLProcessor { /* Node types */ const NODE_TYPE_START = XMLReader::ELEMENT; const NODE_TYPE_END = XMLReader::END_ELEMENT; /* Keys associated to reflection attributes to invoke a callback */ const CALLBACK_REFLECTION_METHOD = 'reflectionMethod'; const CALLBACK_REFLECTION_OBJECT = 'reflectionObject'; /* Values returned by the callbacks to indicate what the processor should do next */ const PROCESSING_CONTINUE = 1; const PROCESSING_STOP = 2; /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */ protected $xmlReader; /** @var array Registered callbacks */ private $callbacks = []; /** * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object */ public function __construct($xmlReader) { $this->xmlReader = $xmlReader; } /** * @param string $nodeName A callback may be triggered when a node with this name is read * @param int $nodeType Type of the node [NODE_TYPE_START || NODE_TYPE_END] * @param callable $callback Callback to execute when the read node has the given name and type * @return XMLProcessor */ public function registerCallback($nodeName, $nodeType, $callback) { $callbackKey = $this->getCallbackKey($nodeName, $nodeType); $this->callbacks[$callbackKey] = $this->getInvokableCallbackData($callback); return $this; } /** * @param string $nodeName Name of the node * @param int $nodeType Type of the node [NODE_TYPE_START || NODE_TYPE_END] * @return string Key used to store the associated callback */ private function getCallbackKey($nodeName, $nodeType) { return "$nodeName$nodeType"; } /** * Because the callback can be a "protected" function, we don't want to use call_user_func() directly * but instead invoke the callback using Reflection. This allows the invocation of "protected" functions. * Since some functions can be called a lot, we pre-process the callback to only return the elements that * will be needed to invoke the callback later. * * @param callable $callback Array reference to a callback: [OBJECT, METHOD_NAME] * @return array Associative array containing the elements needed to invoke the callback using Reflection */ private function getInvokableCallbackData($callback) { $callbackObject = $callback[0]; $callbackMethodName = $callback[1]; $reflectionMethod = new \ReflectionMethod(\get_class($callbackObject), $callbackMethodName); $reflectionMethod->setAccessible(true); return [ self::CALLBACK_REFLECTION_METHOD => $reflectionMethod, self::CALLBACK_REFLECTION_OBJECT => $callbackObject, ]; } /** * Resumes the reading of the XML file where it was left off. * Stops whenever a callback indicates that reading should stop or at the end of the file. * * @throws \Box\Spout\Reader\Exception\XMLProcessingException * @return void */ public function readUntilStopped() { while ($this->xmlReader->read()) { $nodeType = $this->xmlReader->nodeType; $nodeNamePossiblyWithPrefix = $this->xmlReader->name; $nodeNameWithoutPrefix = $this->xmlReader->localName; $callbackData = $this->getRegisteredCallbackData($nodeNamePossiblyWithPrefix, $nodeNameWithoutPrefix, $nodeType); if ($callbackData !== null) { $callbackResponse = $this->invokeCallback($callbackData, [$this->xmlReader]); if ($callbackResponse === self::PROCESSING_STOP) { // stop reading break; } } } } /** * @param string $nodeNamePossiblyWithPrefix Name of the node, possibly prefixed * @param string $nodeNameWithoutPrefix Name of the same node, un-prefixed * @param int $nodeType Type of the node [NODE_TYPE_START || NODE_TYPE_END] * @return array|null Callback data to be used for execution when a node of the given name/type is read or NULL if none found */ private function getRegisteredCallbackData($nodeNamePossiblyWithPrefix, $nodeNameWithoutPrefix, $nodeType) { // With prefixed nodes, we should match if (by order of preference): // 1. the callback was registered with the prefixed node name (e.g. "x:worksheet") // 2. the callback was registered with the un-prefixed node name (e.g. "worksheet") $callbackKeyForPossiblyPrefixedName = $this->getCallbackKey($nodeNamePossiblyWithPrefix, $nodeType); $callbackKeyForUnPrefixedName = $this->getCallbackKey($nodeNameWithoutPrefix, $nodeType); $hasPrefix = ($nodeNamePossiblyWithPrefix !== $nodeNameWithoutPrefix); $callbackKeyToUse = $callbackKeyForUnPrefixedName; if ($hasPrefix && isset($this->callbacks[$callbackKeyForPossiblyPrefixedName])) { $callbackKeyToUse = $callbackKeyForPossiblyPrefixedName; } // Using isset here because it is way faster than array_key_exists... return isset($this->callbacks[$callbackKeyToUse]) ? $this->callbacks[$callbackKeyToUse] : null; } /** * @param array $callbackData Associative array containing data to invoke the callback using Reflection * @param array $args Arguments to pass to the callback * @return int Callback response */ private function invokeCallback($callbackData, $args) { $reflectionMethod = $callbackData[self::CALLBACK_REFLECTION_METHOD]; $callbackObject = $callbackData[self::CALLBACK_REFLECTION_OBJECT]; return $reflectionMethod->invokeArgs($callbackObject, $args); } } Common/Manager/RowManager.php 0000644 00000004042 15152657741 0012136 0 ustar 00 <?php namespace Box\Spout\Reader\Common\Manager; use Box\Spout\Common\Entity\Row; use Box\Spout\Reader\Common\Creator\InternalEntityFactoryInterface; /** * Class RowManager */ class RowManager { /** @var InternalEntityFactoryInterface Factory to create entities */ private $entityFactory; /** * @param InternalEntityFactoryInterface $entityFactory Factory to create entities */ public function __construct(InternalEntityFactoryInterface $entityFactory) { $this->entityFactory = $entityFactory; } /** * Detect whether a row is considered empty. * An empty row has all of its cells empty. * * @param Row $row * @return bool */ public function isEmpty(Row $row) { foreach ($row->getCells() as $cell) { if (!$cell->isEmpty()) { return false; } } return true; } /** * Fills the missing indexes of a row with empty cells. * * @param Row $row * @return Row */ public function fillMissingIndexesWithEmptyCells(Row $row) { $numCells = $row->getNumCells(); if ($numCells === 0) { return $row; } $rowCells = $row->getCells(); $maxCellIndex = $numCells; // If the row has empty cells, calling "setCellAtIndex" will add the cell // but in the wrong place (the new cell is added at the end of the array). // Therefore, we need to sort the array using keys to have proper order. // @see https://github.com/box/spout/issues/740 $needsSorting = false; for ($cellIndex = 0; $cellIndex < $maxCellIndex; $cellIndex++) { if (!isset($rowCells[$cellIndex])) { $row->setCellAtIndex($this->entityFactory->createCell(''), $cellIndex); $needsSorting = true; } } if ($needsSorting) { $rowCells = $row->getCells(); ksort($rowCells); $row->setCells($rowCells); } return $row; } } ReaderInterface.php 0000644 00000001455 15152657741 0010322 0 ustar 00 <?php namespace Box\Spout\Reader; /** * Interface ReaderInterface */ interface ReaderInterface { /** * Prepares the reader to read the given file. It also makes sure * that the file exists and is readable. * * @param string $filePath Path of the file to be read * @throws \Box\Spout\Common\Exception\IOException * @return void */ public function open($filePath); /** * Returns an iterator to iterate over sheets. * * @throws \Box\Spout\Reader\Exception\ReaderNotOpenedException If called before opening the reader * @return \Iterator To iterate over sheets */ public function getSheetIterator(); /** * Closes the reader, preventing any additional reading * * @return void */ public function close(); } SheetInterface.php 0000644 00000001146 15152657741 0010165 0 ustar 00 <?php namespace Box\Spout\Reader; /** * Interface SheetInterface */ interface SheetInterface { /** * @return IteratorInterface Iterator to iterate over the sheet's rows. */ public function getRowIterator(); /** * @return int Index of the sheet */ public function getIndex(); /** * @return string Name of the sheet */ public function getName(); /** * @return bool Whether the sheet was defined as active */ public function isActive(); /** * @return bool Whether the sheet is visible */ public function isVisible(); } ReaderAbstract.php 0000644 00000017250 15152657741 0010165 0 ustar 00 <?php namespace Box\Spout\Reader; use Box\Spout\Common\Exception\IOException; use Box\Spout\Common\Helper\GlobalFunctionsHelper; use Box\Spout\Common\Manager\OptionsManagerInterface; use Box\Spout\Reader\Common\Creator\InternalEntityFactoryInterface; use Box\Spout\Reader\Common\Entity\Options; use Box\Spout\Reader\Exception\ReaderNotOpenedException; /** * Class ReaderAbstract * * @abstract */ abstract class ReaderAbstract implements ReaderInterface { /** @var bool Indicates whether the stream is currently open */ protected $isStreamOpened = false; /** @var InternalEntityFactoryInterface Factory to create entities */ protected $entityFactory; /** @var \Box\Spout\Common\Helper\GlobalFunctionsHelper Helper to work with global functions */ protected $globalFunctionsHelper; /** @var OptionsManagerInterface Writer options manager */ protected $optionsManager; /** * Returns whether stream wrappers are supported * * @return bool */ abstract protected function doesSupportStreamWrapper(); /** * Opens the file at the given file path to make it ready to be read * * @param string $filePath Path of the file to be read * @return void */ abstract protected function openReader($filePath); /** * Returns an iterator to iterate over sheets. * * @return IteratorInterface To iterate over sheets */ abstract protected function getConcreteSheetIterator(); /** * Closes the reader. To be used after reading the file. * * @return ReaderAbstract */ abstract protected function closeReader(); /** * @param OptionsManagerInterface $optionsManager * @param GlobalFunctionsHelper $globalFunctionsHelper * @param InternalEntityFactoryInterface $entityFactory */ public function __construct( OptionsManagerInterface $optionsManager, GlobalFunctionsHelper $globalFunctionsHelper, InternalEntityFactoryInterface $entityFactory ) { $this->optionsManager = $optionsManager; $this->globalFunctionsHelper = $globalFunctionsHelper; $this->entityFactory = $entityFactory; } /** * Sets whether date/time values should be returned as PHP objects or be formatted as strings. * * @param bool $shouldFormatDates * @return ReaderAbstract */ public function setShouldFormatDates($shouldFormatDates) { $this->optionsManager->setOption(Options::SHOULD_FORMAT_DATES, $shouldFormatDates); return $this; } /** * Sets whether empty rows should be returned or skipped. * * @param bool $shouldPreserveEmptyRows * @return ReaderAbstract */ public function setShouldPreserveEmptyRows($shouldPreserveEmptyRows) { $this->optionsManager->setOption(Options::SHOULD_PRESERVE_EMPTY_ROWS, $shouldPreserveEmptyRows); return $this; } /** * Prepares the reader to read the given file. It also makes sure * that the file exists and is readable. * * @param string $filePath Path of the file to be read * @throws \Box\Spout\Common\Exception\IOException If the file at the given path does not exist, is not readable or is corrupted * @return void */ public function open($filePath) { if ($this->isStreamWrapper($filePath) && (!$this->doesSupportStreamWrapper() || !$this->isSupportedStreamWrapper($filePath))) { throw new IOException("Could not open $filePath for reading! Stream wrapper used is not supported for this type of file."); } if (!$this->isPhpStream($filePath)) { // we skip the checks if the provided file path points to a PHP stream if (!$this->globalFunctionsHelper->file_exists($filePath)) { throw new IOException("Could not open $filePath for reading! File does not exist."); } if (!$this->globalFunctionsHelper->is_readable($filePath)) { throw new IOException("Could not open $filePath for reading! File is not readable."); } } try { $fileRealPath = $this->getFileRealPath($filePath); $this->openReader($fileRealPath); $this->isStreamOpened = true; } catch (\Exception $exception) { throw new IOException("Could not open $filePath for reading! ({$exception->getMessage()})"); } } /** * Returns the real path of the given path. * If the given path is a valid stream wrapper, returns the path unchanged. * * @param string $filePath * @return string */ protected function getFileRealPath($filePath) { if ($this->isSupportedStreamWrapper($filePath)) { return $filePath; } // Need to use realpath to fix "Can't open file" on some Windows setup return \realpath($filePath); } /** * Returns the scheme of the custom stream wrapper, if the path indicates a stream wrapper is used. * For example, php://temp => php, s3://path/to/file => s3... * * @param string $filePath Path of the file to be read * @return string|null The stream wrapper scheme or NULL if not a stream wrapper */ protected function getStreamWrapperScheme($filePath) { $streamScheme = null; if (\preg_match('/^(\w+):\/\//', $filePath, $matches)) { $streamScheme = $matches[1]; } return $streamScheme; } /** * Checks if the given path is an unsupported stream wrapper * (like local path, php://temp, mystream://foo/bar...). * * @param string $filePath Path of the file to be read * @return bool Whether the given path is an unsupported stream wrapper */ protected function isStreamWrapper($filePath) { return ($this->getStreamWrapperScheme($filePath) !== null); } /** * Checks if the given path is an supported stream wrapper * (like php://temp, mystream://foo/bar...). * If the given path is a local path, returns true. * * @param string $filePath Path of the file to be read * @return bool Whether the given path is an supported stream wrapper */ protected function isSupportedStreamWrapper($filePath) { $streamScheme = $this->getStreamWrapperScheme($filePath); return ($streamScheme !== null) ? \in_array($streamScheme, $this->globalFunctionsHelper->stream_get_wrappers()) : true; } /** * Checks if a path is a PHP stream (like php://output, php://memory, ...) * * @param string $filePath Path of the file to be read * @return bool Whether the given path maps to a PHP stream */ protected function isPhpStream($filePath) { $streamScheme = $this->getStreamWrapperScheme($filePath); return ($streamScheme === 'php'); } /** * Returns an iterator to iterate over sheets. * * @throws \Box\Spout\Reader\Exception\ReaderNotOpenedException If called before opening the reader * @return \Iterator To iterate over sheets */ public function getSheetIterator() { if (!$this->isStreamOpened) { throw new ReaderNotOpenedException('Reader should be opened first.'); } return $this->getConcreteSheetIterator(); } /** * Closes the reader, preventing any additional reading * * @return void */ public function close() { if ($this->isStreamOpened) { $this->closeReader(); $sheetIterator = $this->getConcreteSheetIterator(); if ($sheetIterator) { $sheetIterator->end(); } $this->isStreamOpened = false; } } } IteratorInterface.php 0000644 00000000401 15152657741 0010677 0 ustar 00 <?php namespace Box\Spout\Reader; /** * Interface IteratorInterface */ interface IteratorInterface extends \Iterator { /** * Cleans up what was created to iterate over the object. * * @return void */ public function end(); } ODS/Reader.php 0000644 00000004014 15152657741 0007120 0 ustar 00 <?php namespace Box\Spout\Reader\ODS; use Box\Spout\Common\Exception\IOException; use Box\Spout\Reader\ODS\Creator\InternalEntityFactory; use Box\Spout\Reader\ReaderAbstract; /** * Class Reader * This class provides support to read data from a ODS file */ class Reader extends ReaderAbstract { /** @var \ZipArchive */ protected $zip; /** @var SheetIterator To iterator over the ODS sheets */ protected $sheetIterator; /** * Returns whether stream wrappers are supported * * @return bool */ protected function doesSupportStreamWrapper() { return false; } /** * Opens the file at the given file path to make it ready to be read. * * @param string $filePath Path of the file to be read * @throws \Box\Spout\Common\Exception\IOException If the file at the given path or its content cannot be read * @throws \Box\Spout\Reader\Exception\NoSheetsFoundException If there are no sheets in the file * @return void */ protected function openReader($filePath) { /** @var InternalEntityFactory $entityFactory */ $entityFactory = $this->entityFactory; $this->zip = $entityFactory->createZipArchive(); if ($this->zip->open($filePath) === true) { /** @var InternalEntityFactory $entityFactory */ $entityFactory = $this->entityFactory; $this->sheetIterator = $entityFactory->createSheetIterator($filePath, $this->optionsManager); } else { throw new IOException("Could not open $filePath for reading."); } } /** * Returns an iterator to iterate over sheets. * * @return SheetIterator To iterate over sheets */ protected function getConcreteSheetIterator() { return $this->sheetIterator; } /** * Closes the reader. To be used after reading the file. * * @return void */ protected function closeReader() { if ($this->zip) { $this->zip->close(); } } } ODS/Creator/HelperFactory.php 0000644 00000002144 15152657741 0012066 0 ustar 00 <?php namespace Box\Spout\Reader\ODS\Creator; use Box\Spout\Reader\ODS\Helper\CellValueFormatter; use Box\Spout\Reader\ODS\Helper\SettingsHelper; /** * Class HelperFactory * Factory to create helpers */ class HelperFactory extends \Box\Spout\Common\Creator\HelperFactory { /** * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings * @return CellValueFormatter */ public function createCellValueFormatter($shouldFormatDates) { $escaper = $this->createStringsEscaper(); return new CellValueFormatter($shouldFormatDates, $escaper); } /** * @param InternalEntityFactory $entityFactory * @return SettingsHelper */ public function createSettingsHelper($entityFactory) { return new SettingsHelper($entityFactory); } /** * @return \Box\Spout\Common\Helper\Escaper\ODS */ public function createStringsEscaper() { /* @noinspection PhpUnnecessaryFullyQualifiedNameInspection */ return new \Box\Spout\Common\Helper\Escaper\ODS(); } } ODS/Creator/InternalEntityFactory.php 0000644 00000007253 15152657741 0013626 0 ustar 00 <?php namespace Box\Spout\Reader\ODS\Creator; use Box\Spout\Common\Entity\Cell; use Box\Spout\Common\Entity\Row; use Box\Spout\Reader\Common\Creator\InternalEntityFactoryInterface; use Box\Spout\Reader\Common\Entity\Options; use Box\Spout\Reader\Common\XMLProcessor; use Box\Spout\Reader\ODS\RowIterator; use Box\Spout\Reader\ODS\Sheet; use Box\Spout\Reader\ODS\SheetIterator; use Box\Spout\Reader\Wrapper\XMLReader; /** * Class EntityFactory * Factory to create entities */ class InternalEntityFactory implements InternalEntityFactoryInterface { /** @var HelperFactory */ private $helperFactory; /** @var ManagerFactory */ private $managerFactory; /** * @param HelperFactory $helperFactory * @param ManagerFactory $managerFactory */ public function __construct(HelperFactory $helperFactory, ManagerFactory $managerFactory) { $this->helperFactory = $helperFactory; $this->managerFactory = $managerFactory; } /** * @param string $filePath Path of the file to be read * @param \Box\Spout\Common\Manager\OptionsManagerInterface $optionsManager Reader's options manager * @return SheetIterator */ public function createSheetIterator($filePath, $optionsManager) { $escaper = $this->helperFactory->createStringsEscaper(); $settingsHelper = $this->helperFactory->createSettingsHelper($this); return new SheetIterator($filePath, $optionsManager, $escaper, $settingsHelper, $this); } /** * @param XMLReader $xmlReader XML Reader * @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based) * @param string $sheetName Name of the sheet * @param bool $isSheetActive Whether the sheet was defined as active * @param bool $isSheetVisible Whether the sheet is visible * @param \Box\Spout\Common\Manager\OptionsManagerInterface $optionsManager Reader's options manager * @return Sheet */ public function createSheet($xmlReader, $sheetIndex, $sheetName, $isSheetActive, $isSheetVisible, $optionsManager) { $rowIterator = $this->createRowIterator($xmlReader, $optionsManager); return new Sheet($rowIterator, $sheetIndex, $sheetName, $isSheetActive, $isSheetVisible); } /** * @param XMLReader $xmlReader XML Reader * @param \Box\Spout\Common\Manager\OptionsManagerInterface $optionsManager Reader's options manager * @return RowIterator */ private function createRowIterator($xmlReader, $optionsManager) { $shouldFormatDates = $optionsManager->getOption(Options::SHOULD_FORMAT_DATES); $cellValueFormatter = $this->helperFactory->createCellValueFormatter($shouldFormatDates); $xmlProcessor = $this->createXMLProcessor($xmlReader); $rowManager = $this->managerFactory->createRowManager($this); return new RowIterator($xmlReader, $optionsManager, $cellValueFormatter, $xmlProcessor, $rowManager, $this); } /** * @param Cell[] $cells * @return Row */ public function createRow(array $cells = []) { return new Row($cells, null); } /** * @param mixed $cellValue * @return Cell */ public function createCell($cellValue) { return new Cell($cellValue); } /** * @return XMLReader */ public function createXMLReader() { return new XMLReader(); } /** * @param $xmlReader * @return XMLProcessor */ private function createXMLProcessor($xmlReader) { return new XMLProcessor($xmlReader); } /** * @return \ZipArchive */ public function createZipArchive() { return new \ZipArchive(); } } ODS/Creator/ManagerFactory.php 0000644 00000000640 15152657741 0012220 0 ustar 00 <?php namespace Box\Spout\Reader\ODS\Creator; use Box\Spout\Reader\Common\Manager\RowManager; /** * Class ManagerFactory * Factory to create managers */ class ManagerFactory { /** * @param InternalEntityFactory $entityFactory Factory to create entities * @return RowManager */ public function createRowManager($entityFactory) { return new RowManager($entityFactory); } } ODS/SheetIterator.php 0000644 00000017546 15152657741 0010516 0 ustar 00 <?php namespace Box\Spout\Reader\ODS; use Box\Spout\Common\Exception\IOException; use Box\Spout\Reader\Exception\XMLProcessingException; use Box\Spout\Reader\IteratorInterface; use Box\Spout\Reader\ODS\Creator\InternalEntityFactory; use Box\Spout\Reader\ODS\Helper\SettingsHelper; use Box\Spout\Reader\Wrapper\XMLReader; /** * Class SheetIterator * Iterate over ODS sheet. */ class SheetIterator implements IteratorInterface { const CONTENT_XML_FILE_PATH = 'content.xml'; const XML_STYLE_NAMESPACE = 'urn:oasis:names:tc:opendocument:xmlns:style:1.0'; /** Definition of XML nodes name and attribute used to parse sheet data */ const XML_NODE_AUTOMATIC_STYLES = 'office:automatic-styles'; const XML_NODE_STYLE_TABLE_PROPERTIES = 'table-properties'; const XML_NODE_TABLE = 'table:table'; const XML_ATTRIBUTE_STYLE_NAME = 'style:name'; const XML_ATTRIBUTE_TABLE_NAME = 'table:name'; const XML_ATTRIBUTE_TABLE_STYLE_NAME = 'table:style-name'; const XML_ATTRIBUTE_TABLE_DISPLAY = 'table:display'; /** @var string Path of the file to be read */ protected $filePath; /** @var \Box\Spout\Common\Manager\OptionsManagerInterface Reader's options manager */ protected $optionsManager; /** @var InternalEntityFactory Factory to create entities */ protected $entityFactory; /** @var XMLReader The XMLReader object that will help read sheet's XML data */ protected $xmlReader; /** @var \Box\Spout\Common\Helper\Escaper\ODS Used to unescape XML data */ protected $escaper; /** @var bool Whether there are still at least a sheet to be read */ protected $hasFoundSheet; /** @var int The index of the sheet being read (zero-based) */ protected $currentSheetIndex; /** @var string The name of the sheet that was defined as active */ protected $activeSheetName; /** @var array Associative array [STYLE_NAME] => [IS_SHEET_VISIBLE] */ protected $sheetsVisibility; /** * @param string $filePath Path of the file to be read * @param \Box\Spout\Common\Manager\OptionsManagerInterface $optionsManager * @param \Box\Spout\Common\Helper\Escaper\ODS $escaper Used to unescape XML data * @param SettingsHelper $settingsHelper Helper to get data from "settings.xml" * @param InternalEntityFactory $entityFactory Factory to create entities */ public function __construct($filePath, $optionsManager, $escaper, $settingsHelper, $entityFactory) { $this->filePath = $filePath; $this->optionsManager = $optionsManager; $this->entityFactory = $entityFactory; $this->xmlReader = $entityFactory->createXMLReader(); $this->escaper = $escaper; $this->activeSheetName = $settingsHelper->getActiveSheetName($filePath); } /** * Rewind the Iterator to the first element * @see http://php.net/manual/en/iterator.rewind.php * * @throws \Box\Spout\Common\Exception\IOException If unable to open the XML file containing sheets' data * @return void */ #[\ReturnTypeWillChange] public function rewind() { $this->xmlReader->close(); if ($this->xmlReader->openFileInZip($this->filePath, self::CONTENT_XML_FILE_PATH) === false) { $contentXmlFilePath = $this->filePath . '#' . self::CONTENT_XML_FILE_PATH; throw new IOException("Could not open \"{$contentXmlFilePath}\"."); } try { $this->sheetsVisibility = $this->readSheetsVisibility(); $this->hasFoundSheet = $this->xmlReader->readUntilNodeFound(self::XML_NODE_TABLE); } catch (XMLProcessingException $exception) { throw new IOException("The content.xml file is invalid and cannot be read. [{$exception->getMessage()}]"); } $this->currentSheetIndex = 0; } /** * Extracts the visibility of the sheets * * @return array Associative array [STYLE_NAME] => [IS_SHEET_VISIBLE] */ private function readSheetsVisibility() { $sheetsVisibility = []; $this->xmlReader->readUntilNodeFound(self::XML_NODE_AUTOMATIC_STYLES); $automaticStylesNode = $this->xmlReader->expand(); $tableStyleNodes = $automaticStylesNode->getElementsByTagNameNS(self::XML_STYLE_NAMESPACE, self::XML_NODE_STYLE_TABLE_PROPERTIES); /** @var \DOMElement $tableStyleNode */ foreach ($tableStyleNodes as $tableStyleNode) { $isSheetVisible = ($tableStyleNode->getAttribute(self::XML_ATTRIBUTE_TABLE_DISPLAY) !== 'false'); $parentStyleNode = $tableStyleNode->parentNode; $styleName = $parentStyleNode->getAttribute(self::XML_ATTRIBUTE_STYLE_NAME); $sheetsVisibility[$styleName] = $isSheetVisible; } return $sheetsVisibility; } /** * Checks if current position is valid * @see http://php.net/manual/en/iterator.valid.php * * @return bool */ #[\ReturnTypeWillChange] public function valid() { return $this->hasFoundSheet; } /** * Move forward to next element * @see http://php.net/manual/en/iterator.next.php * * @return void */ #[\ReturnTypeWillChange] public function next() { $this->hasFoundSheet = $this->xmlReader->readUntilNodeFound(self::XML_NODE_TABLE); if ($this->hasFoundSheet) { $this->currentSheetIndex++; } } /** * Return the current element * @see http://php.net/manual/en/iterator.current.php * * @return \Box\Spout\Reader\ODS\Sheet */ #[\ReturnTypeWillChange] public function current() { $escapedSheetName = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_TABLE_NAME); $sheetName = $this->escaper->unescape($escapedSheetName); $isSheetActive = $this->isSheetActive($sheetName, $this->currentSheetIndex, $this->activeSheetName); $sheetStyleName = $this->xmlReader->getAttribute(self::XML_ATTRIBUTE_TABLE_STYLE_NAME); $isSheetVisible = $this->isSheetVisible($sheetStyleName); return $this->entityFactory->createSheet( $this->xmlReader, $this->currentSheetIndex, $sheetName, $isSheetActive, $isSheetVisible, $this->optionsManager ); } /** * Returns whether the current sheet was defined as the active one * * @param string $sheetName Name of the current sheet * @param int $sheetIndex Index of the current sheet * @param string|null $activeSheetName Name of the sheet that was defined as active or NULL if none defined * @return bool Whether the current sheet was defined as the active one */ private function isSheetActive($sheetName, $sheetIndex, $activeSheetName) { // The given sheet is active if its name matches the defined active sheet's name // or if no information about the active sheet was found, it defaults to the first sheet. return ( ($activeSheetName === null && $sheetIndex === 0) || ($activeSheetName === $sheetName) ); } /** * Returns whether the current sheet is visible * * @param string $sheetStyleName Name of the sheet style * @return bool Whether the current sheet is visible */ private function isSheetVisible($sheetStyleName) { return isset($this->sheetsVisibility[$sheetStyleName]) ? $this->sheetsVisibility[$sheetStyleName] : true; } /** * Return the key of the current element * @see http://php.net/manual/en/iterator.key.php * * @return int */ #[\ReturnTypeWillChange] public function key() { return $this->currentSheetIndex + 1; } /** * Cleans up what was created to iterate over the object. * * @return void */ #[\ReturnTypeWillChange] public function end() { $this->xmlReader->close(); } } ODS/RowIterator.php 0000644 00000035227 15152657741 0010211 0 ustar 00 <?php namespace Box\Spout\Reader\ODS; use Box\Spout\Common\Entity\Cell; use Box\Spout\Common\Entity\Row; use Box\Spout\Common\Exception\IOException; use Box\Spout\Common\Manager\OptionsManagerInterface; use Box\Spout\Reader\Common\Entity\Options; use Box\Spout\Reader\Common\Manager\RowManager; use Box\Spout\Reader\Common\XMLProcessor; use Box\Spout\Reader\Exception\InvalidValueException; use Box\Spout\Reader\Exception\IteratorNotRewindableException; use Box\Spout\Reader\Exception\XMLProcessingException; use Box\Spout\Reader\IteratorInterface; use Box\Spout\Reader\ODS\Creator\InternalEntityFactory; use Box\Spout\Reader\ODS\Helper\CellValueFormatter; use Box\Spout\Reader\Wrapper\XMLReader; /** * Class RowIterator */ class RowIterator implements IteratorInterface { /** Definition of XML nodes names used to parse data */ const XML_NODE_TABLE = 'table:table'; const XML_NODE_ROW = 'table:table-row'; const XML_NODE_CELL = 'table:table-cell'; const MAX_COLUMNS_EXCEL = 16384; /** Definition of XML attribute used to parse data */ const XML_ATTRIBUTE_NUM_ROWS_REPEATED = 'table:number-rows-repeated'; const XML_ATTRIBUTE_NUM_COLUMNS_REPEATED = 'table:number-columns-repeated'; /** @var \Box\Spout\Reader\Wrapper\XMLReader The XMLReader object that will help read sheet's XML data */ protected $xmlReader; /** @var \Box\Spout\Reader\Common\XMLProcessor Helper Object to process XML nodes */ protected $xmlProcessor; /** @var bool Whether empty rows should be returned or skipped */ protected $shouldPreserveEmptyRows; /** @var Helper\CellValueFormatter Helper to format cell values */ protected $cellValueFormatter; /** @var RowManager Manages rows */ protected $rowManager; /** @var InternalEntityFactory Factory to create entities */ protected $entityFactory; /** @var bool Whether the iterator has already been rewound once */ protected $hasAlreadyBeenRewound = false; /** @var Row The currently processed row */ protected $currentlyProcessedRow; /** @var Row Buffer used to store the current row, while checking if there are more rows to read */ protected $rowBuffer; /** @var bool Indicates whether all rows have been read */ protected $hasReachedEndOfFile = false; /** @var int Last row index processed (one-based) */ protected $lastRowIndexProcessed = 0; /** @var int Row index to be processed next (one-based) */ protected $nextRowIndexToBeProcessed = 1; /** @var Cell Last processed cell (because when reading cell at column N+1, cell N is processed) */ protected $lastProcessedCell; /** @var int Number of times the last processed row should be repeated */ protected $numRowsRepeated = 1; /** @var int Number of times the last cell value should be copied to the cells on its right */ protected $numColumnsRepeated = 1; /** @var bool Whether at least one cell has been read for the row currently being processed */ protected $hasAlreadyReadOneCellInCurrentRow = false; /** * @param XMLReader $xmlReader XML Reader, positioned on the "<table:table>" element * @param OptionsManagerInterface $optionsManager Reader's options manager * @param CellValueFormatter $cellValueFormatter Helper to format cell values * @param XMLProcessor $xmlProcessor Helper to process XML files * @param RowManager $rowManager Manages rows * @param InternalEntityFactory $entityFactory Factory to create entities */ public function __construct( XMLReader $xmlReader, OptionsManagerInterface $optionsManager, CellValueFormatter $cellValueFormatter, XMLProcessor $xmlProcessor, RowManager $rowManager, InternalEntityFactory $entityFactory ) { $this->xmlReader = $xmlReader; $this->shouldPreserveEmptyRows = $optionsManager->getOption(Options::SHOULD_PRESERVE_EMPTY_ROWS); $this->cellValueFormatter = $cellValueFormatter; $this->entityFactory = $entityFactory; $this->rowManager = $rowManager; // Register all callbacks to process different nodes when reading the XML file $this->xmlProcessor = $xmlProcessor; $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_START, [$this, 'processRowStartingNode']); $this->xmlProcessor->registerCallback(self::XML_NODE_CELL, XMLProcessor::NODE_TYPE_START, [$this, 'processCellStartingNode']); $this->xmlProcessor->registerCallback(self::XML_NODE_ROW, XMLProcessor::NODE_TYPE_END, [$this, 'processRowEndingNode']); $this->xmlProcessor->registerCallback(self::XML_NODE_TABLE, XMLProcessor::NODE_TYPE_END, [$this, 'processTableEndingNode']); } /** * Rewind the Iterator to the first element. * NOTE: It can only be done once, as it is not possible to read an XML file backwards. * @see http://php.net/manual/en/iterator.rewind.php * * @throws \Box\Spout\Reader\Exception\IteratorNotRewindableException If the iterator is rewound more than once * @return void */ #[\ReturnTypeWillChange] public function rewind() { // Because sheet and row data is located in the file, we can't rewind both the // sheet iterator and the row iterator, as XML file cannot be read backwards. // Therefore, rewinding the row iterator has been disabled. if ($this->hasAlreadyBeenRewound) { throw new IteratorNotRewindableException(); } $this->hasAlreadyBeenRewound = true; $this->lastRowIndexProcessed = 0; $this->nextRowIndexToBeProcessed = 1; $this->rowBuffer = null; $this->hasReachedEndOfFile = false; $this->next(); } /** * Checks if current position is valid * @see http://php.net/manual/en/iterator.valid.php * * @return bool */ #[\ReturnTypeWillChange] public function valid() { return (!$this->hasReachedEndOfFile); } /** * Move forward to next element. Empty rows will be skipped. * @see http://php.net/manual/en/iterator.next.php * * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML * @return void */ #[\ReturnTypeWillChange] public function next() { if ($this->doesNeedDataForNextRowToBeProcessed()) { $this->readDataForNextRow(); } $this->lastRowIndexProcessed++; } /** * Returns whether we need data for the next row to be processed. * We DO need to read data if: * - we have not read any rows yet * OR * - the next row to be processed immediately follows the last read row * * @return bool Whether we need data for the next row to be processed. */ protected function doesNeedDataForNextRowToBeProcessed() { $hasReadAtLeastOneRow = ($this->lastRowIndexProcessed !== 0); return ( !$hasReadAtLeastOneRow || $this->lastRowIndexProcessed === $this->nextRowIndexToBeProcessed - 1 ); } /** * @throws \Box\Spout\Reader\Exception\SharedStringNotFoundException If a shared string was not found * @throws \Box\Spout\Common\Exception\IOException If unable to read the sheet data XML * @return void */ protected function readDataForNextRow() { $this->currentlyProcessedRow = $this->entityFactory->createRow(); try { $this->xmlProcessor->readUntilStopped(); } catch (XMLProcessingException $exception) { throw new IOException("The sheet's data cannot be read. [{$exception->getMessage()}]"); } $this->rowBuffer = $this->currentlyProcessedRow; } /** * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node * @return int A return code that indicates what action should the processor take next */ protected function processRowStartingNode($xmlReader) { // Reset data from current row $this->hasAlreadyReadOneCellInCurrentRow = false; $this->lastProcessedCell = null; $this->numColumnsRepeated = 1; $this->numRowsRepeated = $this->getNumRowsRepeatedForCurrentNode($xmlReader); return XMLProcessor::PROCESSING_CONTINUE; } /** * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node * @return int A return code that indicates what action should the processor take next */ protected function processCellStartingNode($xmlReader) { $currentNumColumnsRepeated = $this->getNumColumnsRepeatedForCurrentNode($xmlReader); // NOTE: expand() will automatically decode all XML entities of the child nodes $node = $xmlReader->expand(); $currentCell = $this->getCell($node); // process cell N only after having read cell N+1 (see below why) if ($this->hasAlreadyReadOneCellInCurrentRow) { for ($i = 0; $i < $this->numColumnsRepeated; $i++) { $this->currentlyProcessedRow->addCell($this->lastProcessedCell); } } $this->hasAlreadyReadOneCellInCurrentRow = true; $this->lastProcessedCell = $currentCell; $this->numColumnsRepeated = $currentNumColumnsRepeated; return XMLProcessor::PROCESSING_CONTINUE; } /** * @return int A return code that indicates what action should the processor take next */ protected function processRowEndingNode() { $isEmptyRow = $this->isEmptyRow($this->currentlyProcessedRow, $this->lastProcessedCell); // if the fetched row is empty and we don't want to preserve it... if (!$this->shouldPreserveEmptyRows && $isEmptyRow) { // ... skip it return XMLProcessor::PROCESSING_CONTINUE; } // if the row is empty, we don't want to return more than one cell $actualNumColumnsRepeated = (!$isEmptyRow) ? $this->numColumnsRepeated : 1; $numCellsInCurrentlyProcessedRow = $this->currentlyProcessedRow->getNumCells(); // Only add the value if the last read cell is not a trailing empty cell repeater in Excel. // The current count of read columns is determined by counting the values in "$this->currentlyProcessedRowData". // This is to avoid creating a lot of empty cells, as Excel adds a last empty "<table:table-cell>" // with a number-columns-repeated value equals to the number of (supported columns - used columns). // In Excel, the number of supported columns is 16384, but we don't want to returns rows with // always 16384 cells. if (($numCellsInCurrentlyProcessedRow + $actualNumColumnsRepeated) !== self::MAX_COLUMNS_EXCEL) { for ($i = 0; $i < $actualNumColumnsRepeated; $i++) { $this->currentlyProcessedRow->addCell($this->lastProcessedCell); } } // If we are processing row N and the row is repeated M times, // then the next row to be processed will be row (N+M). $this->nextRowIndexToBeProcessed += $this->numRowsRepeated; // at this point, we have all the data we need for the row // so that we can populate the buffer return XMLProcessor::PROCESSING_STOP; } /** * @return int A return code that indicates what action should the processor take next */ protected function processTableEndingNode() { // The closing "</table:table>" marks the end of the file $this->hasReachedEndOfFile = true; return XMLProcessor::PROCESSING_STOP; } /** * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-row>" starting node * @return int The value of "table:number-rows-repeated" attribute of the current node, or 1 if attribute missing */ protected function getNumRowsRepeatedForCurrentNode($xmlReader) { $numRowsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_ROWS_REPEATED); return ($numRowsRepeated !== null) ? (int) $numRowsRepeated : 1; } /** * @param \Box\Spout\Reader\Wrapper\XMLReader $xmlReader XMLReader object, positioned on a "<table:table-cell>" starting node * @return int The value of "table:number-columns-repeated" attribute of the current node, or 1 if attribute missing */ protected function getNumColumnsRepeatedForCurrentNode($xmlReader) { $numColumnsRepeated = $xmlReader->getAttribute(self::XML_ATTRIBUTE_NUM_COLUMNS_REPEATED); return ($numColumnsRepeated !== null) ? (int) $numColumnsRepeated : 1; } /** * Returns the cell with (unescaped) correctly marshalled, cell value associated to the given XML node. * * @param \DOMNode $node * @return Cell The cell set with the associated with the cell */ protected function getCell($node) { try { $cellValue = $this->cellValueFormatter->extractAndFormatNodeValue($node); $cell = $this->entityFactory->createCell($cellValue); } catch (InvalidValueException $exception) { $cell = $this->entityFactory->createCell($exception->getInvalidValue()); $cell->setType(Cell::TYPE_ERROR); } return $cell; } /** * After finishing processing each cell, a row is considered empty if it contains * no cells or if the last read cell is empty. * After finishing processing each cell, the last read cell is not part of the * row data yet (as we still need to apply the "num-columns-repeated" attribute). * * @param Row $currentRow * @param Cell $lastReadCell The last read cell * @return bool Whether the row is empty */ protected function isEmptyRow($currentRow, $lastReadCell) { return ( $this->rowManager->isEmpty($currentRow) && (!isset($lastReadCell) || $lastReadCell->isEmpty()) ); } /** * Return the current element, from the buffer. * @see http://php.net/manual/en/iterator.current.php * * @return Row */ #[\ReturnTypeWillChange] public function current() { return $this->rowBuffer; } /** * Return the key of the current element * @see http://php.net/manual/en/iterator.key.php * * @return int */ #[\ReturnTypeWillChange] public function key() { return $this->lastRowIndexProcessed; } /** * Cleans up what was created to iterate over the object. * * @return void */ #[\ReturnTypeWillChange] public function end() { $this->xmlReader->close(); } } ODS/Helper/CellValueFormatter.php 0000644 00000024050 15152657741 0012677 0 ustar 00 <?php namespace Box\Spout\Reader\ODS\Helper; use Box\Spout\Reader\Exception\InvalidValueException; /** * Class CellValueFormatter * This class provides helper functions to format cell values */ class CellValueFormatter { /** Definition of all possible cell types */ const CELL_TYPE_STRING = 'string'; const CELL_TYPE_FLOAT = 'float'; const CELL_TYPE_BOOLEAN = 'boolean'; const CELL_TYPE_DATE = 'date'; const CELL_TYPE_TIME = 'time'; const CELL_TYPE_CURRENCY = 'currency'; const CELL_TYPE_PERCENTAGE = 'percentage'; const CELL_TYPE_VOID = 'void'; /** Definition of XML nodes names used to parse data */ const XML_NODE_P = 'p'; const XML_NODE_TEXT_A = 'text:a'; const XML_NODE_TEXT_SPAN = 'text:span'; const XML_NODE_TEXT_S = 'text:s'; const XML_NODE_TEXT_TAB = 'text:tab'; const XML_NODE_TEXT_LINE_BREAK = 'text:line-break'; /** Definition of XML attributes used to parse data */ const XML_ATTRIBUTE_TYPE = 'office:value-type'; const XML_ATTRIBUTE_VALUE = 'office:value'; const XML_ATTRIBUTE_BOOLEAN_VALUE = 'office:boolean-value'; const XML_ATTRIBUTE_DATE_VALUE = 'office:date-value'; const XML_ATTRIBUTE_TIME_VALUE = 'office:time-value'; const XML_ATTRIBUTE_CURRENCY = 'office:currency'; const XML_ATTRIBUTE_C = 'text:c'; /** @var bool Whether date/time values should be returned as PHP objects or be formatted as strings */ protected $shouldFormatDates; /** @var \Box\Spout\Common\Helper\Escaper\ODS Used to unescape XML data */ protected $escaper; /** @var array List of XML nodes representing whitespaces and their corresponding value */ private static $WHITESPACE_XML_NODES = [ self::XML_NODE_TEXT_S => ' ', self::XML_NODE_TEXT_TAB => "\t", self::XML_NODE_TEXT_LINE_BREAK => "\n", ]; /** * @param bool $shouldFormatDates Whether date/time values should be returned as PHP objects or be formatted as strings * @param \Box\Spout\Common\Helper\Escaper\ODS $escaper Used to unescape XML data */ public function __construct($shouldFormatDates, $escaper) { $this->shouldFormatDates = $shouldFormatDates; $this->escaper = $escaper; } /** * Returns the (unescaped) correctly marshalled, cell value associated to the given XML node. * @see http://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#refTable13 * * @param \DOMNode $node * @throws InvalidValueException If the node value is not valid * @return string|int|float|bool|\DateTime|\DateInterval The value associated with the cell, empty string if cell's type is void/undefined */ public function extractAndFormatNodeValue($node) { $cellType = $node->getAttribute(self::XML_ATTRIBUTE_TYPE); switch ($cellType) { case self::CELL_TYPE_STRING: return $this->formatStringCellValue($node); case self::CELL_TYPE_FLOAT: return $this->formatFloatCellValue($node); case self::CELL_TYPE_BOOLEAN: return $this->formatBooleanCellValue($node); case self::CELL_TYPE_DATE: return $this->formatDateCellValue($node); case self::CELL_TYPE_TIME: return $this->formatTimeCellValue($node); case self::CELL_TYPE_CURRENCY: return $this->formatCurrencyCellValue($node); case self::CELL_TYPE_PERCENTAGE: return $this->formatPercentageCellValue($node); case self::CELL_TYPE_VOID: default: return ''; } } /** * Returns the cell String value. * * @param \DOMNode $node * @return string The value associated with the cell */ protected function formatStringCellValue($node) { $pNodeValues = []; $pNodes = $node->getElementsByTagName(self::XML_NODE_P); foreach ($pNodes as $pNode) { $pNodeValues[] = $this->extractTextValueFromNode($pNode); } $escapedCellValue = \implode("\n", $pNodeValues); $cellValue = $this->escaper->unescape($escapedCellValue); return $cellValue; } /** * @param $pNode * @return string */ private function extractTextValueFromNode($pNode) { $textValue = ''; foreach ($pNode->childNodes as $childNode) { if ($childNode instanceof \DOMText) { $textValue .= $childNode->nodeValue; } elseif ($this->isWhitespaceNode($childNode->nodeName)) { $textValue .= $this->transformWhitespaceNode($childNode); } elseif ($childNode->nodeName === self::XML_NODE_TEXT_A || $childNode->nodeName === self::XML_NODE_TEXT_SPAN) { $textValue .= $this->extractTextValueFromNode($childNode); } } return $textValue; } /** * Returns whether the given node is a whitespace node. It must be one of these: * - <text:s /> * - <text:tab /> * - <text:line-break /> * * @param string $nodeName * @return bool */ private function isWhitespaceNode($nodeName) { return isset(self::$WHITESPACE_XML_NODES[$nodeName]); } /** * The "<text:p>" node can contain the string value directly * or contain child elements. In this case, whitespaces contain in * the child elements should be replaced by their XML equivalent: * - space => <text:s /> * - tab => <text:tab /> * - line break => <text:line-break /> * * @see https://docs.oasis-open.org/office/v1.2/os/OpenDocument-v1.2-os-part1.html#__RefHeading__1415200_253892949 * * @param \DOMNode $node The XML node representing a whitespace * @return string The corresponding whitespace value */ private function transformWhitespaceNode($node) { $countAttribute = $node->getAttribute(self::XML_ATTRIBUTE_C); // only defined for "<text:s>" $numWhitespaces = (!empty($countAttribute)) ? (int) $countAttribute : 1; return \str_repeat(self::$WHITESPACE_XML_NODES[$node->nodeName], $numWhitespaces); } /** * Returns the cell Numeric value from the given node. * * @param \DOMNode $node * @return int|float The value associated with the cell */ protected function formatFloatCellValue($node) { $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_VALUE); $nodeIntValue = (int) $nodeValue; $nodeFloatValue = (float) $nodeValue; $cellValue = ((float) $nodeIntValue === $nodeFloatValue) ? $nodeIntValue : $nodeFloatValue; return $cellValue; } /** * Returns the cell Boolean value from the given node. * * @param \DOMNode $node * @return bool The value associated with the cell */ protected function formatBooleanCellValue($node) { $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_BOOLEAN_VALUE); return (bool) $nodeValue; } /** * Returns the cell Date value from the given node. * * @param \DOMNode $node * @throws InvalidValueException If the value is not a valid date * @return \DateTime|string The value associated with the cell */ protected function formatDateCellValue($node) { // The XML node looks like this: // <table:table-cell calcext:value-type="date" office:date-value="2016-05-19T16:39:00" office:value-type="date"> // <text:p>05/19/16 04:39 PM</text:p> // </table:table-cell> if ($this->shouldFormatDates) { // The date is already formatted in the "p" tag $nodeWithValueAlreadyFormatted = $node->getElementsByTagName(self::XML_NODE_P)->item(0); $cellValue = $nodeWithValueAlreadyFormatted->nodeValue; } else { // otherwise, get it from the "date-value" attribute $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_DATE_VALUE); try { $cellValue = new \DateTime($nodeValue); } catch (\Exception $e) { throw new InvalidValueException($nodeValue); } } return $cellValue; } /** * Returns the cell Time value from the given node. * * @param \DOMNode $node * @throws InvalidValueException If the value is not a valid time * @return \DateInterval|string The value associated with the cell */ protected function formatTimeCellValue($node) { // The XML node looks like this: // <table:table-cell calcext:value-type="time" office:time-value="PT13H24M00S" office:value-type="time"> // <text:p>01:24:00 PM</text:p> // </table:table-cell> if ($this->shouldFormatDates) { // The date is already formatted in the "p" tag $nodeWithValueAlreadyFormatted = $node->getElementsByTagName(self::XML_NODE_P)->item(0); $cellValue = $nodeWithValueAlreadyFormatted->nodeValue; } else { // otherwise, get it from the "time-value" attribute $nodeValue = $node->getAttribute(self::XML_ATTRIBUTE_TIME_VALUE); try { $cellValue = new \DateInterval($nodeValue); } catch (\Exception $e) { throw new InvalidValueException($nodeValue); } } return $cellValue; } /** * Returns the cell Currency value from the given node. * * @param \DOMNode $node * @return string The value associated with the cell (e.g. "100 USD" or "9.99 EUR") */ protected function formatCurrencyCellValue($node) { $value = $node->getAttribute(self::XML_ATTRIBUTE_VALUE); $currency = $node->getAttribute(self::XML_ATTRIBUTE_CURRENCY); return "$value $currency"; } /** * Returns the cell Percentage value from the given node. * * @param \DOMNode $node * @return int|float The value associated with the cell */ protected function formatPercentageCellValue($node) { // percentages are formatted like floats return $this->formatFloatCellValue($node); } } ODS/Helper/SettingsHelper.php 0000644 00000003475 15152657741 0012107 0 ustar 00 <?php namespace Box\Spout\Reader\ODS\Helper; use Box\Spout\Reader\Exception\XMLProcessingException; use Box\Spout\Reader\ODS\Creator\InternalEntityFactory; /** * Class SettingsHelper * This class provides helper functions to extract data from the "settings.xml" file. */ class SettingsHelper { const SETTINGS_XML_FILE_PATH = 'settings.xml'; /** Definition of XML nodes name and attribute used to parse settings data */ const XML_NODE_CONFIG_ITEM = 'config:config-item'; const XML_ATTRIBUTE_CONFIG_NAME = 'config:name'; const XML_ATTRIBUTE_VALUE_ACTIVE_TABLE = 'ActiveTable'; /** @var InternalEntityFactory Factory to create entities */ private $entityFactory; /** * @param InternalEntityFactory $entityFactory Factory to create entities */ public function __construct($entityFactory) { $this->entityFactory = $entityFactory; } /** * @param string $filePath Path of the file to be read * @return string|null Name of the sheet that was defined as active or NULL if none found */ public function getActiveSheetName($filePath) { $xmlReader = $this->entityFactory->createXMLReader(); if ($xmlReader->openFileInZip($filePath, self::SETTINGS_XML_FILE_PATH) === false) { return null; } $activeSheetName = null; try { while ($xmlReader->readUntilNodeFound(self::XML_NODE_CONFIG_ITEM)) { if ($xmlReader->getAttribute(self::XML_ATTRIBUTE_CONFIG_NAME) === self::XML_ATTRIBUTE_VALUE_ACTIVE_TABLE) { $activeSheetName = $xmlReader->readString(); break; } } } catch (XMLProcessingException $exception) { // do nothing } $xmlReader->close(); return $activeSheetName; } } ODS/Manager/OptionsManager.php 0000644 00000001277 15152657741 0012226 0 ustar 00 <?php namespace Box\Spout\Reader\ODS\Manager; use Box\Spout\Common\Manager\OptionsManagerAbstract; use Box\Spout\Reader\Common\Entity\Options; /** * Class OptionsManager * ODS Reader options manager */ class OptionsManager extends OptionsManagerAbstract { /** * {@inheritdoc} */ protected function getSupportedOptions() { return [ Options::SHOULD_FORMAT_DATES, Options::SHOULD_PRESERVE_EMPTY_ROWS, ]; } /** * {@inheritdoc} */ protected function setDefaultOptions() { $this->setOption(Options::SHOULD_FORMAT_DATES, false); $this->setOption(Options::SHOULD_PRESERVE_EMPTY_ROWS, false); } } ODS/Sheet.php 0000644 00000004071 15152657741 0006771 0 ustar 00 <?php namespace Box\Spout\Reader\ODS; use Box\Spout\Reader\SheetInterface; /** * Class Sheet * Represents a sheet within a ODS file */ class Sheet implements SheetInterface { /** @var \Box\Spout\Reader\ODS\RowIterator To iterate over sheet's rows */ protected $rowIterator; /** @var int ID of the sheet */ protected $id; /** @var int Index of the sheet, based on order in the workbook (zero-based) */ protected $index; /** @var string Name of the sheet */ protected $name; /** @var bool Whether the sheet was the active one */ protected $isActive; /** @var bool Whether the sheet is visible */ protected $isVisible; /** * @param RowIterator $rowIterator The corresponding row iterator * @param int $sheetIndex Index of the sheet, based on order in the workbook (zero-based) * @param string $sheetName Name of the sheet * @param bool $isSheetActive Whether the sheet was defined as active * @param bool $isSheetVisible Whether the sheet is visible */ public function __construct($rowIterator, $sheetIndex, $sheetName, $isSheetActive, $isSheetVisible) { $this->rowIterator = $rowIterator; $this->index = $sheetIndex; $this->name = $sheetName; $this->isActive = $isSheetActive; $this->isVisible = $isSheetVisible; } /** * @return \Box\Spout\Reader\ODS\RowIterator */ public function getRowIterator() { return $this->rowIterator; } /** * @return int Index of the sheet, based on order in the workbook (zero-based) */ public function getIndex() { return $this->index; } /** * @return string Name of the sheet */ public function getName() { return $this->name; } /** * @return bool Whether the sheet was defined as active */ public function isActive() { return $this->isActive; } /** * @return bool Whether the sheet is visible */ public function isVisible() { return $this->isVisible; } }
| ver. 1.4 |
Github
|
.
| PHP 7.4.33 | ���֧ߧ֧�ѧ�ڧ� ����ѧߧڧ��: 0 |
proxy
|
phpinfo
|
���ѧ����ۧܧ�