���ѧۧݧ�ӧ�� �ާ֧ߧ֧էا֧� - ���֧էѧܧ�ڧ��ӧѧ�� - /home3/cpr76684/public_html/HTMLPurifier.tar
���ѧ٧ѧ�
CSSDefinition.php 0000644 00000046211 15152171665 0007733 0 ustar 00 <?php /** * Defines allowed CSS attributes and what their values are. * @see HTMLPurifier_HTMLDefinition */ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition { public $type = 'CSS'; /** * Assoc array of attribute name to definition object. * @type HTMLPurifier_AttrDef[] */ public $info = array(); /** * Constructs the info array. The meat of this class. * @param HTMLPurifier_Config $config */ protected function doSetup($config) { $this->info['text-align'] = new HTMLPurifier_AttrDef_Enum( array('left', 'right', 'center', 'justify'), false ); $border_style = $this->info['border-bottom-style'] = $this->info['border-right-style'] = $this->info['border-left-style'] = $this->info['border-top-style'] = new HTMLPurifier_AttrDef_Enum( array( 'none', 'hidden', 'dotted', 'dashed', 'solid', 'double', 'groove', 'ridge', 'inset', 'outset' ), false ); $this->info['border-style'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_style); $this->info['clear'] = new HTMLPurifier_AttrDef_Enum( array('none', 'left', 'right', 'both'), false ); $this->info['float'] = new HTMLPurifier_AttrDef_Enum( array('none', 'left', 'right'), false ); $this->info['font-style'] = new HTMLPurifier_AttrDef_Enum( array('normal', 'italic', 'oblique'), false ); $this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum( array('normal', 'small-caps'), false ); $uri_or_none = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Enum(array('none')), new HTMLPurifier_AttrDef_CSS_URI() ) ); $this->info['list-style-position'] = new HTMLPurifier_AttrDef_Enum( array('inside', 'outside'), false ); $this->info['list-style-type'] = new HTMLPurifier_AttrDef_Enum( array( 'disc', 'circle', 'square', 'decimal', 'lower-roman', 'upper-roman', 'lower-alpha', 'upper-alpha', 'none' ), false ); $this->info['list-style-image'] = $uri_or_none; $this->info['list-style'] = new HTMLPurifier_AttrDef_CSS_ListStyle($config); $this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum( array('capitalize', 'uppercase', 'lowercase', 'none'), false ); $this->info['color'] = new HTMLPurifier_AttrDef_CSS_Color(); $this->info['background-image'] = $uri_or_none; $this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum( array('repeat', 'repeat-x', 'repeat-y', 'no-repeat') ); $this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum( array('scroll', 'fixed') ); $this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition(); $this->info['background-size'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Enum( array( 'auto', 'cover', 'contain', 'initial', 'inherit', ) ), new HTMLPurifier_AttrDef_CSS_Percentage(), new HTMLPurifier_AttrDef_CSS_Length() ) ); $border_color = $this->info['border-top-color'] = $this->info['border-bottom-color'] = $this->info['border-left-color'] = $this->info['border-right-color'] = $this->info['background-color'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Enum(array('transparent')), new HTMLPurifier_AttrDef_CSS_Color() ) ); $this->info['background'] = new HTMLPurifier_AttrDef_CSS_Background($config); $this->info['border-color'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_color); $border_width = $this->info['border-top-width'] = $this->info['border-bottom-width'] = $this->info['border-left-width'] = $this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')), new HTMLPurifier_AttrDef_CSS_Length('0') //disallow negative ) ); $this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width); $this->info['letter-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Enum(array('normal')), new HTMLPurifier_AttrDef_CSS_Length() ) ); $this->info['word-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Enum(array('normal')), new HTMLPurifier_AttrDef_CSS_Length() ) ); $this->info['font-size'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Enum( array( 'xx-small', 'x-small', 'small', 'medium', 'large', 'x-large', 'xx-large', 'larger', 'smaller' ) ), new HTMLPurifier_AttrDef_CSS_Percentage(), new HTMLPurifier_AttrDef_CSS_Length() ) ); $this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Enum(array('normal')), new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives new HTMLPurifier_AttrDef_CSS_Length('0'), new HTMLPurifier_AttrDef_CSS_Percentage(true) ) ); $margin = $this->info['margin-top'] = $this->info['margin-bottom'] = $this->info['margin-left'] = $this->info['margin-right'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length(), new HTMLPurifier_AttrDef_CSS_Percentage(), new HTMLPurifier_AttrDef_Enum(array('auto')) ) ); $this->info['margin'] = new HTMLPurifier_AttrDef_CSS_Multiple($margin); // non-negative $padding = $this->info['padding-top'] = $this->info['padding-bottom'] = $this->info['padding-left'] = $this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length('0'), new HTMLPurifier_AttrDef_CSS_Percentage(true) ) ); $this->info['padding'] = new HTMLPurifier_AttrDef_CSS_Multiple($padding); $this->info['text-indent'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length(), new HTMLPurifier_AttrDef_CSS_Percentage() ) ); $trusted_wh = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length('0'), new HTMLPurifier_AttrDef_CSS_Percentage(true), new HTMLPurifier_AttrDef_Enum(array('auto', 'initial', 'inherit')) ) ); $trusted_min_wh = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length('0'), new HTMLPurifier_AttrDef_CSS_Percentage(true), new HTMLPurifier_AttrDef_Enum(array('initial', 'inherit')) ) ); $trusted_max_wh = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length('0'), new HTMLPurifier_AttrDef_CSS_Percentage(true), new HTMLPurifier_AttrDef_Enum(array('none', 'initial', 'inherit')) ) ); $max = $config->get('CSS.MaxImgLength'); $this->info['width'] = $this->info['height'] = $max === null ? $trusted_wh : new HTMLPurifier_AttrDef_Switch( 'img', // For img tags: new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length('0', $max), new HTMLPurifier_AttrDef_Enum(array('auto')) ) ), // For everyone else: $trusted_wh ); $this->info['min-width'] = $this->info['min-height'] = $max === null ? $trusted_min_wh : new HTMLPurifier_AttrDef_Switch( 'img', // For img tags: new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length('0', $max), new HTMLPurifier_AttrDef_Enum(array('initial', 'inherit')) ) ), // For everyone else: $trusted_min_wh ); $this->info['max-width'] = $this->info['max-height'] = $max === null ? $trusted_max_wh : new HTMLPurifier_AttrDef_Switch( 'img', // For img tags: new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length('0', $max), new HTMLPurifier_AttrDef_Enum(array('none', 'initial', 'inherit')) ) ), // For everyone else: $trusted_max_wh ); $this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration(); $this->info['font-family'] = new HTMLPurifier_AttrDef_CSS_FontFamily(); // this could use specialized code $this->info['font-weight'] = new HTMLPurifier_AttrDef_Enum( array( 'normal', 'bold', 'bolder', 'lighter', '100', '200', '300', '400', '500', '600', '700', '800', '900' ), false ); // MUST be called after other font properties, as it references // a CSSDefinition object $this->info['font'] = new HTMLPurifier_AttrDef_CSS_Font($config); // same here $this->info['border'] = $this->info['border-bottom'] = $this->info['border-top'] = $this->info['border-left'] = $this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config); $this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum( array('collapse', 'separate') ); $this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum( array('top', 'bottom') ); $this->info['table-layout'] = new HTMLPurifier_AttrDef_Enum( array('auto', 'fixed') ); $this->info['vertical-align'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Enum( array( 'baseline', 'sub', 'super', 'top', 'text-top', 'middle', 'bottom', 'text-bottom' ) ), new HTMLPurifier_AttrDef_CSS_Length(), new HTMLPurifier_AttrDef_CSS_Percentage() ) ); $this->info['border-spacing'] = new HTMLPurifier_AttrDef_CSS_Multiple(new HTMLPurifier_AttrDef_CSS_Length(), 2); // These CSS properties don't work on many browsers, but we live // in THE FUTURE! $this->info['white-space'] = new HTMLPurifier_AttrDef_Enum( array('nowrap', 'normal', 'pre', 'pre-wrap', 'pre-line') ); if ($config->get('CSS.Proprietary')) { $this->doSetupProprietary($config); } if ($config->get('CSS.AllowTricky')) { $this->doSetupTricky($config); } if ($config->get('CSS.Trusted')) { $this->doSetupTrusted($config); } $allow_important = $config->get('CSS.AllowImportant'); // wrap all attr-defs with decorator that handles !important foreach ($this->info as $k => $v) { $this->info[$k] = new HTMLPurifier_AttrDef_CSS_ImportantDecorator($v, $allow_important); } $this->setupConfigStuff($config); } /** * @param HTMLPurifier_Config $config */ protected function doSetupProprietary($config) { // Internet Explorer only scrollbar colors $this->info['scrollbar-arrow-color'] = new HTMLPurifier_AttrDef_CSS_Color(); $this->info['scrollbar-base-color'] = new HTMLPurifier_AttrDef_CSS_Color(); $this->info['scrollbar-darkshadow-color'] = new HTMLPurifier_AttrDef_CSS_Color(); $this->info['scrollbar-face-color'] = new HTMLPurifier_AttrDef_CSS_Color(); $this->info['scrollbar-highlight-color'] = new HTMLPurifier_AttrDef_CSS_Color(); $this->info['scrollbar-shadow-color'] = new HTMLPurifier_AttrDef_CSS_Color(); // vendor specific prefixes of opacity $this->info['-moz-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue(); $this->info['-khtml-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue(); // only opacity, for now $this->info['filter'] = new HTMLPurifier_AttrDef_CSS_Filter(); // more CSS3 $this->info['page-break-after'] = $this->info['page-break-before'] = new HTMLPurifier_AttrDef_Enum( array( 'auto', 'always', 'avoid', 'left', 'right' ) ); $this->info['page-break-inside'] = new HTMLPurifier_AttrDef_Enum(array('auto', 'avoid')); $border_radius = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Percentage(true), // disallow negative new HTMLPurifier_AttrDef_CSS_Length('0') // disallow negative )); $this->info['border-top-left-radius'] = $this->info['border-top-right-radius'] = $this->info['border-bottom-right-radius'] = $this->info['border-bottom-left-radius'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_radius, 2); // TODO: support SLASH syntax $this->info['border-radius'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_radius, 4); } /** * @param HTMLPurifier_Config $config */ protected function doSetupTricky($config) { $this->info['display'] = new HTMLPurifier_AttrDef_Enum( array( 'inline', 'block', 'list-item', 'run-in', 'compact', 'marker', 'table', 'inline-block', 'inline-table', 'table-row-group', 'table-header-group', 'table-footer-group', 'table-row', 'table-column-group', 'table-column', 'table-cell', 'table-caption', 'none' ) ); $this->info['visibility'] = new HTMLPurifier_AttrDef_Enum( array('visible', 'hidden', 'collapse') ); $this->info['overflow'] = new HTMLPurifier_AttrDef_Enum(array('visible', 'hidden', 'auto', 'scroll')); $this->info['opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue(); } /** * @param HTMLPurifier_Config $config */ protected function doSetupTrusted($config) { $this->info['position'] = new HTMLPurifier_AttrDef_Enum( array('static', 'relative', 'absolute', 'fixed') ); $this->info['top'] = $this->info['left'] = $this->info['right'] = $this->info['bottom'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_CSS_Length(), new HTMLPurifier_AttrDef_CSS_Percentage(), new HTMLPurifier_AttrDef_Enum(array('auto')), ) ); $this->info['z-index'] = new HTMLPurifier_AttrDef_CSS_Composite( array( new HTMLPurifier_AttrDef_Integer(), new HTMLPurifier_AttrDef_Enum(array('auto')), ) ); } /** * Performs extra config-based processing. Based off of * HTMLPurifier_HTMLDefinition. * @param HTMLPurifier_Config $config * @todo Refactor duplicate elements into common class (probably using * composition, not inheritance). */ protected function setupConfigStuff($config) { // setup allowed elements $support = "(for information on implementing this, see the " . "support forums) "; $allowed_properties = $config->get('CSS.AllowedProperties'); if ($allowed_properties !== null) { foreach ($this->info as $name => $d) { if (!isset($allowed_properties[$name])) { unset($this->info[$name]); } unset($allowed_properties[$name]); } // emit errors foreach ($allowed_properties as $name => $d) { // :TODO: Is this htmlspecialchars() call really necessary? $name = htmlspecialchars($name); trigger_error("Style attribute '$name' is not supported $support", E_USER_WARNING); } } $forbidden_properties = $config->get('CSS.ForbiddenProperties'); if ($forbidden_properties !== null) { foreach ($this->info as $name => $d) { if (isset($forbidden_properties[$name])) { unset($this->info[$name]); } } } } } // vim: et sw=4 sts=4 Language.php 0000644 00000013656 15152171665 0007024 0 ustar 00 <?php /** * Represents a language and defines localizable string formatting and * other functions, as well as the localized messages for HTML Purifier. */ class HTMLPurifier_Language { /** * ISO 639 language code of language. Prefers shortest possible version. * @type string */ public $code = 'en'; /** * Fallback language code. * @type bool|string */ public $fallback = false; /** * Array of localizable messages. * @type array */ public $messages = array(); /** * Array of localizable error codes. * @type array */ public $errorNames = array(); /** * True if no message file was found for this language, so English * is being used instead. Check this if you'd like to notify the * user that they've used a non-supported language. * @type bool */ public $error = false; /** * Has the language object been loaded yet? * @type bool * @todo Make it private, fix usage in HTMLPurifier_LanguageTest */ public $_loaded = false; /** * @type HTMLPurifier_Config */ protected $config; /** * @type HTMLPurifier_Context */ protected $context; /** * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context */ public function __construct($config, $context) { $this->config = $config; $this->context = $context; } /** * Loads language object with necessary info from factory cache * @note This is a lazy loader */ public function load() { if ($this->_loaded) { return; } $factory = HTMLPurifier_LanguageFactory::instance(); $factory->loadLanguage($this->code); foreach ($factory->keys as $key) { $this->$key = $factory->cache[$this->code][$key]; } $this->_loaded = true; } /** * Retrieves a localised message. * @param string $key string identifier of message * @return string localised message */ public function getMessage($key) { if (!$this->_loaded) { $this->load(); } if (!isset($this->messages[$key])) { return "[$key]"; } return $this->messages[$key]; } /** * Retrieves a localised error name. * @param int $int error number, corresponding to PHP's error reporting * @return string localised message */ public function getErrorName($int) { if (!$this->_loaded) { $this->load(); } if (!isset($this->errorNames[$int])) { return "[Error: $int]"; } return $this->errorNames[$int]; } /** * Converts an array list into a string readable representation * @param array $array * @return string */ public function listify($array) { $sep = $this->getMessage('Item separator'); $sep_last = $this->getMessage('Item separator last'); $ret = ''; for ($i = 0, $c = count($array); $i < $c; $i++) { if ($i == 0) { } elseif ($i + 1 < $c) { $ret .= $sep; } else { $ret .= $sep_last; } $ret .= $array[$i]; } return $ret; } /** * Formats a localised message with passed parameters * @param string $key string identifier of message * @param array $args Parameters to substitute in * @return string localised message * @todo Implement conditionals? Right now, some messages make * reference to line numbers, but those aren't always available */ public function formatMessage($key, $args = array()) { if (!$this->_loaded) { $this->load(); } if (!isset($this->messages[$key])) { return "[$key]"; } $raw = $this->messages[$key]; $subst = array(); $generator = false; foreach ($args as $i => $value) { if (is_object($value)) { if ($value instanceof HTMLPurifier_Token) { // factor this out some time if (!$generator) { $generator = $this->context->get('Generator'); } if (isset($value->name)) { $subst['$'.$i.'.Name'] = $value->name; } if (isset($value->data)) { $subst['$'.$i.'.Data'] = $value->data; } $subst['$'.$i.'.Compact'] = $subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value); // a more complex algorithm for compact representation // could be introduced for all types of tokens. This // may need to be factored out into a dedicated class if (!empty($value->attr)) { $stripped_token = clone $value; $stripped_token->attr = array(); $subst['$'.$i.'.Compact'] = $generator->generateFromToken($stripped_token); } $subst['$'.$i.'.Line'] = $value->line ? $value->line : 'unknown'; } continue; } elseif (is_array($value)) { $keys = array_keys($value); if (array_keys($keys) === $keys) { // list $subst['$'.$i] = $this->listify($value); } else { // associative array // no $i implementation yet, sorry $subst['$'.$i.'.Keys'] = $this->listify($keys); $subst['$'.$i.'.Values'] = $this->listify(array_values($value)); } continue; } $subst['$' . $i] = $value; } return strtr($raw, $subst); } } // vim: et sw=4 sts=4 DefinitionCache.php 0000644 00000007511 15152171665 0010306 0 ustar 00 <?php /** * Abstract class representing Definition cache managers that implements * useful common methods and is a factory. * @todo Create a separate maintenance file advanced users can use to * cache their custom HTMLDefinition, which can be loaded * via a configuration directive * @todo Implement memcached */ abstract class HTMLPurifier_DefinitionCache { /** * @type string */ public $type; /** * @param string $type Type of definition objects this instance of the * cache will handle. */ public function __construct($type) { $this->type = $type; } /** * Generates a unique identifier for a particular configuration * @param HTMLPurifier_Config $config Instance of HTMLPurifier_Config * @return string */ public function generateKey($config) { return $config->version . ',' . // possibly replace with function calls $config->getBatchSerial($this->type) . ',' . $config->get($this->type . '.DefinitionRev'); } /** * Tests whether or not a key is old with respect to the configuration's * version and revision number. * @param string $key Key to test * @param HTMLPurifier_Config $config Instance of HTMLPurifier_Config to test against * @return bool */ public function isOld($key, $config) { if (substr_count($key, ',') < 2) { return true; } list($version, $hash, $revision) = explode(',', $key, 3); $compare = version_compare($version, $config->version); // version mismatch, is always old if ($compare != 0) { return true; } // versions match, ids match, check revision number if ($hash == $config->getBatchSerial($this->type) && $revision < $config->get($this->type . '.DefinitionRev')) { return true; } return false; } /** * Checks if a definition's type jives with the cache's type * @note Throws an error on failure * @param HTMLPurifier_Definition $def Definition object to check * @return bool true if good, false if not */ public function checkDefType($def) { if ($def->type !== $this->type) { trigger_error("Cannot use definition of type {$def->type} in cache for {$this->type}"); return false; } return true; } /** * Adds a definition object to the cache * @param HTMLPurifier_Definition $def * @param HTMLPurifier_Config $config */ abstract public function add($def, $config); /** * Unconditionally saves a definition object to the cache * @param HTMLPurifier_Definition $def * @param HTMLPurifier_Config $config */ abstract public function set($def, $config); /** * Replace an object in the cache * @param HTMLPurifier_Definition $def * @param HTMLPurifier_Config $config */ abstract public function replace($def, $config); /** * Retrieves a definition object from the cache * @param HTMLPurifier_Config $config */ abstract public function get($config); /** * Removes a definition object to the cache * @param HTMLPurifier_Config $config */ abstract public function remove($config); /** * Clears all objects from cache * @param HTMLPurifier_Config $config */ abstract public function flush($config); /** * Clears all expired (older version or revision) objects from cache * @note Be careful implementing this method as flush. Flush must * not interfere with other Definition types, and cleanup() * should not be repeatedly called by userland code. * @param HTMLPurifier_Config $config */ abstract public function cleanup($config); } // vim: et sw=4 sts=4 Generator.php 0000644 00000024013 15152171665 0007214 0 ustar 00 <?php /** * Generates HTML from tokens. * @todo Refactor interface so that configuration/context is determined * upon instantiation, no need for messy generateFromTokens() calls * @todo Make some of the more internal functions protected, and have * unit tests work around that */ class HTMLPurifier_Generator { /** * Whether or not generator should produce XML output. * @type bool */ private $_xhtml = true; /** * :HACK: Whether or not generator should comment the insides of <script> tags. * @type bool */ private $_scriptFix = false; /** * Cache of HTMLDefinition during HTML output to determine whether or * not attributes should be minimized. * @type HTMLPurifier_HTMLDefinition */ private $_def; /** * Cache of %Output.SortAttr. * @type bool */ private $_sortAttr; /** * Cache of %Output.FlashCompat. * @type bool */ private $_flashCompat; /** * Cache of %Output.FixInnerHTML. * @type bool */ private $_innerHTMLFix; /** * Stack for keeping track of object information when outputting IE * compatibility code. * @type array */ private $_flashStack = array(); /** * Configuration for the generator * @type HTMLPurifier_Config */ protected $config; /** * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context */ public function __construct($config, $context) { $this->config = $config; $this->_scriptFix = $config->get('Output.CommentScriptContents'); $this->_innerHTMLFix = $config->get('Output.FixInnerHTML'); $this->_sortAttr = $config->get('Output.SortAttr'); $this->_flashCompat = $config->get('Output.FlashCompat'); $this->_def = $config->getHTMLDefinition(); $this->_xhtml = $this->_def->doctype->xml; } /** * Generates HTML from an array of tokens. * @param HTMLPurifier_Token[] $tokens Array of HTMLPurifier_Token * @return string Generated HTML */ public function generateFromTokens($tokens) { if (!$tokens) { return ''; } // Basic algorithm $html = ''; for ($i = 0, $size = count($tokens); $i < $size; $i++) { if ($this->_scriptFix && $tokens[$i]->name === 'script' && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) { // script special case // the contents of the script block must be ONE token // for this to work. $html .= $this->generateFromToken($tokens[$i++]); $html .= $this->generateScriptFromToken($tokens[$i++]); } $html .= $this->generateFromToken($tokens[$i]); } // Tidy cleanup if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) { $tidy = new Tidy; $tidy->parseString( $html, array( 'indent'=> true, 'output-xhtml' => $this->_xhtml, 'show-body-only' => true, 'indent-spaces' => 2, 'wrap' => 68, ), 'utf8' ); $tidy->cleanRepair(); $html = (string) $tidy; // explicit cast necessary } // Normalize newlines to system defined value if ($this->config->get('Core.NormalizeNewlines')) { $nl = $this->config->get('Output.Newline'); if ($nl === null) { $nl = PHP_EOL; } if ($nl !== "\n") { $html = str_replace("\n", $nl, $html); } } return $html; } /** * Generates HTML from a single token. * @param HTMLPurifier_Token $token HTMLPurifier_Token object. * @return string Generated HTML */ public function generateFromToken($token) { if (!$token instanceof HTMLPurifier_Token) { trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING); return ''; } elseif ($token instanceof HTMLPurifier_Token_Start) { $attr = $this->generateAttributes($token->attr, $token->name); if ($this->_flashCompat) { if ($token->name == "object") { $flash = new stdClass(); $flash->attr = $token->attr; $flash->param = array(); $this->_flashStack[] = $flash; } } return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>'; } elseif ($token instanceof HTMLPurifier_Token_End) { $_extra = ''; if ($this->_flashCompat) { if ($token->name == "object" && !empty($this->_flashStack)) { // doesn't do anything for now } } return $_extra . '</' . $token->name . '>'; } elseif ($token instanceof HTMLPurifier_Token_Empty) { if ($this->_flashCompat && $token->name == "param" && !empty($this->_flashStack)) { $this->_flashStack[count($this->_flashStack)-1]->param[$token->attr['name']] = $token->attr['value']; } $attr = $this->generateAttributes($token->attr, $token->name); return '<' . $token->name . ($attr ? ' ' : '') . $attr . ( $this->_xhtml ? ' /': '' ) // <br /> v. <br> . '>'; } elseif ($token instanceof HTMLPurifier_Token_Text) { return $this->escape($token->data, ENT_NOQUOTES); } elseif ($token instanceof HTMLPurifier_Token_Comment) { return '<!--' . $token->data . '-->'; } else { return ''; } } /** * Special case processor for the contents of script tags * @param HTMLPurifier_Token $token HTMLPurifier_Token object. * @return string * @warning This runs into problems if there's already a literal * --> somewhere inside the script contents. */ public function generateScriptFromToken($token) { if (!$token instanceof HTMLPurifier_Token_Text) { return $this->generateFromToken($token); } // Thanks <http://lachy.id.au/log/2005/05/script-comments> $data = preg_replace('#//\s*$#', '', $token->data); return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>'; } /** * Generates attribute declarations from attribute array. * @note This does not include the leading or trailing space. * @param array $assoc_array_of_attributes Attribute array * @param string $element Name of element attributes are for, used to check * attribute minimization. * @return string Generated HTML fragment for insertion. */ public function generateAttributes($assoc_array_of_attributes, $element = '') { $html = ''; if ($this->_sortAttr) { ksort($assoc_array_of_attributes); } foreach ($assoc_array_of_attributes as $key => $value) { if (!$this->_xhtml) { // Remove namespaced attributes if (strpos($key, ':') !== false) { continue; } // Check if we should minimize the attribute: val="val" -> val if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) { $html .= $key . ' '; continue; } } // Workaround for Internet Explorer innerHTML bug. // Essentially, Internet Explorer, when calculating // innerHTML, omits quotes if there are no instances of // angled brackets, quotes or spaces. However, when parsing // HTML (for example, when you assign to innerHTML), it // treats backticks as quotes. Thus, // <img alt="``" /> // becomes // <img alt=`` /> // becomes // <img alt='' /> // Fortunately, all we need to do is trigger an appropriate // quoting style, which we do by adding an extra space. // This also is consistent with the W3C spec, which states // that user agents may ignore leading or trailing // whitespace (in fact, most don't, at least for attributes // like alt, but an extra space at the end is barely // noticeable). Still, we have a configuration knob for // this, since this transformation is not necesary if you // don't process user input with innerHTML or you don't plan // on supporting Internet Explorer. if ($this->_innerHTMLFix) { if (strpos($value, '`') !== false) { // check if correct quoting style would not already be // triggered if (strcspn($value, '"\' <>') === strlen($value)) { // protect! $value .= ' '; } } } $html .= $key.'="'.$this->escape($value).'" '; } return rtrim($html); } /** * Escapes raw text data. * @todo This really ought to be protected, but until we have a facility * for properly generating HTML here w/o using tokens, it stays * public. * @param string $string String data to escape for HTML. * @param int $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is * permissible for non-attribute output. * @return string escaped data. */ public function escape($string, $quote = null) { // Workaround for APC bug on Mac Leopard reported by sidepodcast // http://htmlpurifier.org/phorum/read.php?3,4823,4846 if ($quote === null) { $quote = ENT_COMPAT; } return htmlspecialchars($string, $quote, 'UTF-8'); } } // vim: et sw=4 sts=4 Length.php 0000644 00000007464 15152171665 0006522 0 ustar 00 <?php /** * Represents a measurable length, with a string numeric magnitude * and a unit. This object is immutable. */ class HTMLPurifier_Length { /** * String numeric magnitude. * @type string */ protected $n; /** * String unit. False is permitted if $n = 0. * @type string|bool */ protected $unit; /** * Whether or not this length is valid. Null if not calculated yet. * @type bool */ protected $isValid; /** * Array Lookup array of units recognized by CSS 3 * @type array */ protected static $allowedUnits = array( 'em' => true, 'ex' => true, 'px' => true, 'in' => true, 'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true, 'ch' => true, 'rem' => true, 'vw' => true, 'vh' => true, 'vmin' => true, 'vmax' => true ); /** * @param string $n Magnitude * @param bool|string $u Unit */ public function __construct($n = '0', $u = false) { $this->n = (string) $n; $this->unit = $u !== false ? (string) $u : false; } /** * @param string $s Unit string, like '2em' or '3.4in' * @return HTMLPurifier_Length * @warning Does not perform validation. */ public static function make($s) { if ($s instanceof HTMLPurifier_Length) { return $s; } $n_length = strspn($s, '1234567890.+-'); $n = substr($s, 0, $n_length); $unit = substr($s, $n_length); if ($unit === '') { $unit = false; } return new HTMLPurifier_Length($n, $unit); } /** * Validates the number and unit. * @return bool */ protected function validate() { // Special case: if ($this->n === '+0' || $this->n === '-0') { $this->n = '0'; } if ($this->n === '0' && $this->unit === false) { return true; } if ($this->unit === false || !ctype_lower($this->unit)) { $this->unit = strtolower($this->unit); } if (!isset(HTMLPurifier_Length::$allowedUnits[$this->unit])) { return false; } // Hack: $def = new HTMLPurifier_AttrDef_CSS_Number(); $result = $def->validate($this->n, false, false); if ($result === false) { return false; } $this->n = $result; return true; } /** * Returns string representation of number. * @return string */ public function toString() { if (!$this->isValid()) { return false; } return $this->n . $this->unit; } /** * Retrieves string numeric magnitude. * @return string */ public function getN() { return $this->n; } /** * Retrieves string unit. * @return string */ public function getUnit() { return $this->unit; } /** * Returns true if this length unit is valid. * @return bool */ public function isValid() { if ($this->isValid === null) { $this->isValid = $this->validate(); } return $this->isValid; } /** * Compares two lengths, and returns 1 if greater, -1 if less and 0 if equal. * @param HTMLPurifier_Length $l * @return int * @warning If both values are too large or small, this calculation will * not work properly */ public function compareTo($l) { if ($l === false) { return false; } if ($l->unit !== $this->unit) { $converter = new HTMLPurifier_UnitConverter(); $l = $converter->convert($l, $this->unit); if ($l === false) { return false; } } return $this->n - $l->n; } } // vim: et sw=4 sts=4 URIParser.php 0000644 00000004366 15152171665 0007113 0 ustar 00 <?php /** * Parses a URI into the components and fragment identifier as specified * by RFC 3986. */ class HTMLPurifier_URIParser { /** * Instance of HTMLPurifier_PercentEncoder to do normalization with. */ protected $percentEncoder; public function __construct() { $this->percentEncoder = new HTMLPurifier_PercentEncoder(); } /** * Parses a URI. * @param $uri string URI to parse * @return HTMLPurifier_URI representation of URI. This representation has * not been validated yet and may not conform to RFC. */ public function parse($uri) { $uri = $this->percentEncoder->normalize($uri); // Regexp is as per Appendix B. // Note that ["<>] are an addition to the RFC's recommended // characters, because they represent external delimeters. $r_URI = '!'. '(([a-zA-Z0-9\.\+\-]+):)?'. // 2. Scheme '(//([^/?#"<>]*))?'. // 4. Authority '([^?#"<>]*)'. // 5. Path '(\?([^#"<>]*))?'. // 7. Query '(#([^"<>]*))?'. // 8. Fragment '!'; $matches = array(); $result = preg_match($r_URI, $uri, $matches); if (!$result) return false; // *really* invalid URI // seperate out parts $scheme = !empty($matches[1]) ? $matches[2] : null; $authority = !empty($matches[3]) ? $matches[4] : null; $path = $matches[5]; // always present, can be empty $query = !empty($matches[6]) ? $matches[7] : null; $fragment = !empty($matches[8]) ? $matches[9] : null; // further parse authority if ($authority !== null) { $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/"; $matches = array(); preg_match($r_authority, $authority, $matches); $userinfo = !empty($matches[1]) ? $matches[2] : null; $host = !empty($matches[3]) ? $matches[3] : ''; $port = !empty($matches[4]) ? (int) $matches[5] : null; } else { $port = $host = $userinfo = null; } return new HTMLPurifier_URI( $scheme, $userinfo, $host, $port, $path, $query, $fragment); } } // vim: et sw=4 sts=4 EntityParser.php 0000644 00000023374 15152171665 0007730 0 ustar 00 <?php // if want to implement error collecting here, we'll need to use some sort // of global data (probably trigger_error) because it's impossible to pass // $config or $context to the callback functions. /** * Handles referencing and derefencing character entities */ class HTMLPurifier_EntityParser { /** * Reference to entity lookup table. * @type HTMLPurifier_EntityLookup */ protected $_entity_lookup; /** * Callback regex string for entities in text. * @type string */ protected $_textEntitiesRegex; /** * Callback regex string for entities in attributes. * @type string */ protected $_attrEntitiesRegex; /** * Tests if the beginning of a string is a semi-optional regex */ protected $_semiOptionalPrefixRegex; public function __construct() { // From // http://stackoverflow.com/questions/15532252/why-is-reg-being-rendered-as-without-the-bounding-semicolon $semi_optional = "quot|QUOT|lt|LT|gt|GT|amp|AMP|AElig|Aacute|Acirc|Agrave|Aring|Atilde|Auml|COPY|Ccedil|ETH|Eacute|Ecirc|Egrave|Euml|Iacute|Icirc|Igrave|Iuml|Ntilde|Oacute|Ocirc|Ograve|Oslash|Otilde|Ouml|REG|THORN|Uacute|Ucirc|Ugrave|Uuml|Yacute|aacute|acirc|acute|aelig|agrave|aring|atilde|auml|brvbar|ccedil|cedil|cent|copy|curren|deg|divide|eacute|ecirc|egrave|eth|euml|frac12|frac14|frac34|iacute|icirc|iexcl|igrave|iquest|iuml|laquo|macr|micro|middot|nbsp|not|ntilde|oacute|ocirc|ograve|ordf|ordm|oslash|otilde|ouml|para|plusmn|pound|raquo|reg|sect|shy|sup1|sup2|sup3|szlig|thorn|times|uacute|ucirc|ugrave|uml|uuml|yacute|yen|yuml"; // NB: three empty captures to put the fourth match in the right // place $this->_semiOptionalPrefixRegex = "/&()()()($semi_optional)/"; $this->_textEntitiesRegex = '/&(?:'. // hex '[#]x([a-fA-F0-9]+);?|'. // dec '[#]0*(\d+);?|'. // string (mandatory semicolon) // NB: order matters: match semicolon preferentially '([A-Za-z_:][A-Za-z0-9.\-_:]*);|'. // string (optional semicolon) "($semi_optional)". ')/'; $this->_attrEntitiesRegex = '/&(?:'. // hex '[#]x([a-fA-F0-9]+);?|'. // dec '[#]0*(\d+);?|'. // string (mandatory semicolon) // NB: order matters: match semicolon preferentially '([A-Za-z_:][A-Za-z0-9.\-_:]*);|'. // string (optional semicolon) // don't match if trailing is equals or alphanumeric (URL // like) "($semi_optional)(?![=;A-Za-z0-9])". ')/'; } /** * Substitute entities with the parsed equivalents. Use this on * textual data in an HTML document (as opposed to attributes.) * * @param string $string String to have entities parsed. * @return string Parsed string. */ public function substituteTextEntities($string) { return preg_replace_callback( $this->_textEntitiesRegex, array($this, 'entityCallback'), $string ); } /** * Substitute entities with the parsed equivalents. Use this on * attribute contents in documents. * * @param string $string String to have entities parsed. * @return string Parsed string. */ public function substituteAttrEntities($string) { return preg_replace_callback( $this->_attrEntitiesRegex, array($this, 'entityCallback'), $string ); } /** * Callback function for substituteNonSpecialEntities() that does the work. * * @param array $matches PCRE matches array, with 0 the entire match, and * either index 1, 2 or 3 set with a hex value, dec value, * or string (respectively). * @return string Replacement string. */ protected function entityCallback($matches) { $entity = $matches[0]; $hex_part = @$matches[1]; $dec_part = @$matches[2]; $named_part = empty($matches[3]) ? (empty($matches[4]) ? "" : $matches[4]) : $matches[3]; if ($hex_part !== NULL && $hex_part !== "") { return HTMLPurifier_Encoder::unichr(hexdec($hex_part)); } elseif ($dec_part !== NULL && $dec_part !== "") { return HTMLPurifier_Encoder::unichr((int) $dec_part); } else { if (!$this->_entity_lookup) { $this->_entity_lookup = HTMLPurifier_EntityLookup::instance(); } if (isset($this->_entity_lookup->table[$named_part])) { return $this->_entity_lookup->table[$named_part]; } else { // exact match didn't match anything, so test if // any of the semicolon optional match the prefix. // Test that this is an EXACT match is important to // prevent infinite loop if (!empty($matches[3])) { return preg_replace_callback( $this->_semiOptionalPrefixRegex, array($this, 'entityCallback'), $entity ); } return $entity; } } } // LEGACY CODE BELOW /** * Callback regex string for parsing entities. * @type string */ protected $_substituteEntitiesRegex = '/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/'; // 1. hex 2. dec 3. string (XML style) /** * Decimal to parsed string conversion table for special entities. * @type array */ protected $_special_dec2str = array( 34 => '"', 38 => '&', 39 => "'", 60 => '<', 62 => '>' ); /** * Stripped entity names to decimal conversion table for special entities. * @type array */ protected $_special_ent2dec = array( 'quot' => 34, 'amp' => 38, 'lt' => 60, 'gt' => 62 ); /** * Substitutes non-special entities with their parsed equivalents. Since * running this whenever you have parsed character is t3h 5uck, we run * it before everything else. * * @param string $string String to have non-special entities parsed. * @return string Parsed string. */ public function substituteNonSpecialEntities($string) { // it will try to detect missing semicolons, but don't rely on it return preg_replace_callback( $this->_substituteEntitiesRegex, array($this, 'nonSpecialEntityCallback'), $string ); } /** * Callback function for substituteNonSpecialEntities() that does the work. * * @param array $matches PCRE matches array, with 0 the entire match, and * either index 1, 2 or 3 set with a hex value, dec value, * or string (respectively). * @return string Replacement string. */ protected function nonSpecialEntityCallback($matches) { // replaces all but big five $entity = $matches[0]; $is_num = (@$matches[0][1] === '#'); if ($is_num) { $is_hex = (@$entity[2] === 'x'); $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2]; // abort for special characters if (isset($this->_special_dec2str[$code])) { return $entity; } return HTMLPurifier_Encoder::unichr($code); } else { if (isset($this->_special_ent2dec[$matches[3]])) { return $entity; } if (!$this->_entity_lookup) { $this->_entity_lookup = HTMLPurifier_EntityLookup::instance(); } if (isset($this->_entity_lookup->table[$matches[3]])) { return $this->_entity_lookup->table[$matches[3]]; } else { return $entity; } } } /** * Substitutes only special entities with their parsed equivalents. * * @notice We try to avoid calling this function because otherwise, it * would have to be called a lot (for every parsed section). * * @param string $string String to have non-special entities parsed. * @return string Parsed string. */ public function substituteSpecialEntities($string) { return preg_replace_callback( $this->_substituteEntitiesRegex, array($this, 'specialEntityCallback'), $string ); } /** * Callback function for substituteSpecialEntities() that does the work. * * This callback has same syntax as nonSpecialEntityCallback(). * * @param array $matches PCRE-style matches array, with 0 the entire match, and * either index 1, 2 or 3 set with a hex value, dec value, * or string (respectively). * @return string Replacement string. */ protected function specialEntityCallback($matches) { $entity = $matches[0]; $is_num = (@$matches[0][1] === '#'); if ($is_num) { $is_hex = (@$entity[2] === 'x'); $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2]; return isset($this->_special_dec2str[$int]) ? $this->_special_dec2str[$int] : $entity; } else { return isset($this->_special_ent2dec[$matches[3]]) ? $this->_special_dec2str[$this->_special_ent2dec[$matches[3]]] : $entity; } } } // vim: et sw=4 sts=4 ElementDef.php 0000644 00000016553 15152171665 0007310 0 ustar 00 <?php /** * Structure that stores an HTML element definition. Used by * HTMLPurifier_HTMLDefinition and HTMLPurifier_HTMLModule. * @note This class is inspected by HTMLPurifier_Printer_HTMLDefinition. * Please update that class too. * @warning If you add new properties to this class, you MUST update * the mergeIn() method. */ class HTMLPurifier_ElementDef { /** * Does the definition work by itself, or is it created solely * for the purpose of merging into another definition? * @type bool */ public $standalone = true; /** * Associative array of attribute name to HTMLPurifier_AttrDef. * @type array * @note Before being processed by HTMLPurifier_AttrCollections * when modules are finalized during * HTMLPurifier_HTMLDefinition->setup(), this array may also * contain an array at index 0 that indicates which attribute * collections to load into the full array. It may also * contain string indentifiers in lieu of HTMLPurifier_AttrDef, * see HTMLPurifier_AttrTypes on how they are expanded during * HTMLPurifier_HTMLDefinition->setup() processing. */ public $attr = array(); // XXX: Design note: currently, it's not possible to override // previously defined AttrTransforms without messing around with // the final generated config. This is by design; a previous version // used an associated list of attr_transform, but it was extremely // easy to accidentally override other attribute transforms by // forgetting to specify an index (and just using 0.) While we // could check this by checking the index number and complaining, // there is a second problem which is that it is not at all easy to // tell when something is getting overridden. Combine this with a // codebase where this isn't really being used, and it's perfect for // nuking. /** * List of tags HTMLPurifier_AttrTransform to be done before validation. * @type array */ public $attr_transform_pre = array(); /** * List of tags HTMLPurifier_AttrTransform to be done after validation. * @type array */ public $attr_transform_post = array(); /** * HTMLPurifier_ChildDef of this tag. * @type HTMLPurifier_ChildDef */ public $child; /** * Abstract string representation of internal ChildDef rules. * @see HTMLPurifier_ContentSets for how this is parsed and then transformed * into an HTMLPurifier_ChildDef. * @warning This is a temporary variable that is not available after * being processed by HTMLDefinition * @type string */ public $content_model; /** * Value of $child->type, used to determine which ChildDef to use, * used in combination with $content_model. * @warning This must be lowercase * @warning This is a temporary variable that is not available after * being processed by HTMLDefinition * @type string */ public $content_model_type; /** * Does the element have a content model (#PCDATA | Inline)*? This * is important for chameleon ins and del processing in * HTMLPurifier_ChildDef_Chameleon. Dynamically set: modules don't * have to worry about this one. * @type bool */ public $descendants_are_inline = false; /** * List of the names of required attributes this element has. * Dynamically populated by HTMLPurifier_HTMLDefinition::getElement() * @type array */ public $required_attr = array(); /** * Lookup table of tags excluded from all descendants of this tag. * @type array * @note SGML permits exclusions for all descendants, but this is * not possible with DTDs or XML Schemas. W3C has elected to * use complicated compositions of content_models to simulate * exclusion for children, but we go the simpler, SGML-style * route of flat-out exclusions, which correctly apply to * all descendants and not just children. Note that the XHTML * Modularization Abstract Modules are blithely unaware of such * distinctions. */ public $excludes = array(); /** * This tag is explicitly auto-closed by the following tags. * @type array */ public $autoclose = array(); /** * If a foreign element is found in this element, test if it is * allowed by this sub-element; if it is, instead of closing the * current element, place it inside this element. * @type string */ public $wrap; /** * Whether or not this is a formatting element affected by the * "Active Formatting Elements" algorithm. * @type bool */ public $formatting; /** * Low-level factory constructor for creating new standalone element defs */ public static function create($content_model, $content_model_type, $attr) { $def = new HTMLPurifier_ElementDef(); $def->content_model = $content_model; $def->content_model_type = $content_model_type; $def->attr = $attr; return $def; } /** * Merges the values of another element definition into this one. * Values from the new element def take precedence if a value is * not mergeable. * @param HTMLPurifier_ElementDef $def */ public function mergeIn($def) { // later keys takes precedence foreach ($def->attr as $k => $v) { if ($k === 0) { // merge in the includes // sorry, no way to override an include foreach ($v as $v2) { $this->attr[0][] = $v2; } continue; } if ($v === false) { if (isset($this->attr[$k])) { unset($this->attr[$k]); } continue; } $this->attr[$k] = $v; } $this->_mergeAssocArray($this->excludes, $def->excludes); $this->attr_transform_pre = array_merge($this->attr_transform_pre, $def->attr_transform_pre); $this->attr_transform_post = array_merge($this->attr_transform_post, $def->attr_transform_post); if (!empty($def->content_model)) { $this->content_model = str_replace("#SUPER", (string)$this->content_model, $def->content_model); $this->child = false; } if (!empty($def->content_model_type)) { $this->content_model_type = $def->content_model_type; $this->child = false; } if (!is_null($def->child)) { $this->child = $def->child; } if (!is_null($def->formatting)) { $this->formatting = $def->formatting; } if ($def->descendants_are_inline) { $this->descendants_are_inline = $def->descendants_are_inline; } } /** * Merges one array into another, removes values which equal false * @param $a1 Array by reference that is merged into * @param $a2 Array that merges into $a1 */ private function _mergeAssocArray(&$a1, $a2) { foreach ($a2 as $k => $v) { if ($v === false) { if (isset($a1[$k])) { unset($a1[$k]); } continue; } $a1[$k] = $v; } } } // vim: et sw=4 sts=4 IDAccumulator.php 0000644 00000003157 15152171665 0007770 0 ustar 00 <?php /** * Component of HTMLPurifier_AttrContext that accumulates IDs to prevent dupes * @note In Slashdot-speak, dupe means duplicate. * @note The default constructor does not accept $config or $context objects: * use must use the static build() factory method to perform initialization. */ class HTMLPurifier_IDAccumulator { /** * Lookup table of IDs we've accumulated. * @public */ public $ids = array(); /** * Builds an IDAccumulator, also initializing the default blacklist * @param HTMLPurifier_Config $config Instance of HTMLPurifier_Config * @param HTMLPurifier_Context $context Instance of HTMLPurifier_Context * @return HTMLPurifier_IDAccumulator Fully initialized HTMLPurifier_IDAccumulator */ public static function build($config, $context) { $id_accumulator = new HTMLPurifier_IDAccumulator(); $id_accumulator->load($config->get('Attr.IDBlacklist')); return $id_accumulator; } /** * Add an ID to the lookup table. * @param string $id ID to be added. * @return bool status, true if success, false if there's a dupe */ public function add($id) { if (isset($this->ids[$id])) { return false; } return $this->ids[$id] = true; } /** * Load a list of IDs into the lookup table * @param $array_of_ids Array of IDs to load * @note This function doesn't care about duplicates */ public function load($array_of_ids) { foreach ($array_of_ids as $id) { $this->ids[$id] = true; } } } // vim: et sw=4 sts=4 AttrValidator.php 0000644 00000014654 15152171665 0010060 0 ustar 00 <?php /** * Validates the attributes of a token. Doesn't manage required attributes * very well. The only reason we factored this out was because RemoveForeignElements * also needed it besides ValidateAttributes. */ class HTMLPurifier_AttrValidator { /** * Validates the attributes of a token, mutating it as necessary. * that has valid tokens * @param HTMLPurifier_Token $token Token to validate. * @param HTMLPurifier_Config $config Instance of HTMLPurifier_Config * @param HTMLPurifier_Context $context Instance of HTMLPurifier_Context */ public function validateToken($token, $config, $context) { $definition = $config->getHTMLDefinition(); $e =& $context->get('ErrorCollector', true); // initialize IDAccumulator if necessary $ok =& $context->get('IDAccumulator', true); if (!$ok) { $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context); $context->register('IDAccumulator', $id_accumulator); } // initialize CurrentToken if necessary $current_token =& $context->get('CurrentToken', true); if (!$current_token) { $context->register('CurrentToken', $token); } if (!$token instanceof HTMLPurifier_Token_Start && !$token instanceof HTMLPurifier_Token_Empty ) { return; } // create alias to global definition array, see also $defs // DEFINITION CALL $d_defs = $definition->info_global_attr; // don't update token until the very end, to ensure an atomic update $attr = $token->attr; // do global transformations (pre) // nothing currently utilizes this foreach ($definition->info_attr_transform_pre as $transform) { $attr = $transform->transform($o = $attr, $config, $context); if ($e) { if ($attr != $o) { $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); } } } // do local transformations only applicable to this element (pre) // ex. <p align="right"> to <p style="text-align:right;"> foreach ($definition->info[$token->name]->attr_transform_pre as $transform) { $attr = $transform->transform($o = $attr, $config, $context); if ($e) { if ($attr != $o) { $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); } } } // create alias to this element's attribute definition array, see // also $d_defs (global attribute definition array) // DEFINITION CALL $defs = $definition->info[$token->name]->attr; $attr_key = false; $context->register('CurrentAttr', $attr_key); // iterate through all the attribute keypairs // Watch out for name collisions: $key has previously been used foreach ($attr as $attr_key => $value) { // call the definition if (isset($defs[$attr_key])) { // there is a local definition defined if ($defs[$attr_key] === false) { // We've explicitly been told not to allow this element. // This is usually when there's a global definition // that must be overridden. // Theoretically speaking, we could have a // AttrDef_DenyAll, but this is faster! $result = false; } else { // validate according to the element's definition $result = $defs[$attr_key]->validate( $value, $config, $context ); } } elseif (isset($d_defs[$attr_key])) { // there is a global definition defined, validate according // to the global definition $result = $d_defs[$attr_key]->validate( $value, $config, $context ); } else { // system never heard of the attribute? DELETE! $result = false; } // put the results into effect if ($result === false || $result === null) { // this is a generic error message that should replaced // with more specific ones when possible if ($e) { $e->send(E_ERROR, 'AttrValidator: Attribute removed'); } // remove the attribute unset($attr[$attr_key]); } elseif (is_string($result)) { // generally, if a substitution is happening, there // was some sort of implicit correction going on. We'll // delegate it to the attribute classes to say exactly what. // simple substitution $attr[$attr_key] = $result; } else { // nothing happens } // we'd also want slightly more complicated substitution // involving an array as the return value, // although we're not sure how colliding attributes would // resolve (certain ones would be completely overriden, // others would prepend themselves). } $context->destroy('CurrentAttr'); // post transforms // global (error reporting untested) foreach ($definition->info_attr_transform_post as $transform) { $attr = $transform->transform($o = $attr, $config, $context); if ($e) { if ($attr != $o) { $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); } } } // local (error reporting untested) foreach ($definition->info[$token->name]->attr_transform_post as $transform) { $attr = $transform->transform($o = $attr, $config, $context); if ($e) { if ($attr != $o) { $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); } } } $token->attr = $attr; // destroy CurrentToken if we made it ourselves if (!$current_token) { $context->destroy('CurrentToken'); } } } // vim: et sw=4 sts=4 Token.php 0000644 00000004261 15152171665 0006351 0 ustar 00 <?php /** * Abstract base token class that all others inherit from. */ abstract class HTMLPurifier_Token { /** * Line number node was on in source document. Null if unknown. * @type int */ public $line; /** * Column of line node was on in source document. Null if unknown. * @type int */ public $col; /** * Lookup array of processing that this token is exempt from. * Currently, valid values are "ValidateAttributes" and * "MakeWellFormed_TagClosedError" * @type array */ public $armor = array(); /** * Used during MakeWellFormed. See Note [Injector skips] * @type */ public $skip; /** * @type */ public $rewind; /** * @type */ public $carryover; /** * @param string $n * @return null|string */ public function __get($n) { if ($n === 'type') { trigger_error('Deprecated type property called; use instanceof', E_USER_NOTICE); switch (get_class($this)) { case 'HTMLPurifier_Token_Start': return 'start'; case 'HTMLPurifier_Token_Empty': return 'empty'; case 'HTMLPurifier_Token_End': return 'end'; case 'HTMLPurifier_Token_Text': return 'text'; case 'HTMLPurifier_Token_Comment': return 'comment'; default: return null; } } } /** * Sets the position of the token in the source document. * @param int $l * @param int $c */ public function position($l = null, $c = null) { $this->line = $l; $this->col = $c; } /** * Convenience function for DirectLex settings line/col position. * @param int $l * @param int $c */ public function rawPosition($l, $c) { if ($c === -1) { $l++; } $this->line = $l; $this->col = $c; } /** * Converts a token into its corresponding node. */ abstract public function toNode(); } // vim: et sw=4 sts=4 ChildDef/List.php 0000644 00000005736 15152171665 0007656 0 ustar 00 <?php /** * Definition for list containers ul and ol. * * What does this do? The big thing is to handle ol/ul at the top * level of list nodes, which should be handled specially by /folding/ * them into the previous list node. We generally shouldn't ever * see other disallowed elements, because the autoclose behavior * in MakeWellFormed handles it. */ class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef { /** * @type string */ public $type = 'list'; /** * @type array */ // lying a little bit, so that we can handle ul and ol ourselves // XXX: This whole business with 'wrap' is all a bit unsatisfactory public $elements = array('li' => true, 'ul' => true, 'ol' => true); public $whitespace; /** * @param array $children * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array */ public function validateChildren($children, $config, $context) { // Flag for subclasses $this->whitespace = false; // if there are no tokens, delete parent node if (empty($children)) { return false; } // if li is not allowed, delete parent node if (!isset($config->getHTMLDefinition()->info['li'])) { trigger_error("Cannot allow ul/ol without allowing li", E_USER_WARNING); return false; } // the new set of children $result = array(); // a little sanity check to make sure it's not ALL whitespace $all_whitespace = true; $current_li = null; foreach ($children as $node) { if (!empty($node->is_whitespace)) { $result[] = $node; continue; } $all_whitespace = false; // phew, we're not talking about whitespace if ($node->name === 'li') { // good $current_li = $node; $result[] = $node; } else { // we want to tuck this into the previous li // Invariant: we expect the node to be ol/ul // ToDo: Make this more robust in the case of not ol/ul // by distinguishing between existing li and li created // to handle non-list elements; non-list elements should // not be appended to an existing li; only li created // for non-list. This distinction is not currently made. if ($current_li === null) { $current_li = new HTMLPurifier_Node_Element('li'); $result[] = $current_li; } $current_li->children[] = $node; $current_li->empty = false; // XXX fascinating! Check for this error elsewhere ToDo } } if (empty($result)) { return false; } if ($all_whitespace) { return false; } return $result; } } // vim: et sw=4 sts=4 ChildDef/Table.php 0000644 00000016000 15152171665 0007754 0 ustar 00 <?php /** * Definition for tables. The general idea is to extract out all of the * essential bits, and then reconstruct it later. * * This is a bit confusing, because the DTDs and the W3C * validators seem to disagree on the appropriate definition. The * DTD claims: * * (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+) * * But actually, the HTML4 spec then has this to say: * * The TBODY start tag is always required except when the table * contains only one table body and no table head or foot sections. * The TBODY end tag may always be safely omitted. * * So the DTD is kind of wrong. The validator is, unfortunately, kind * of on crack. * * The definition changed again in XHTML1.1; and in my opinion, this * formulation makes the most sense. * * caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ )) * * Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode. * If we encounter a thead, tfoot or tbody, we are placed in the former * mode, and we *must* wrap any stray tr segments with a tbody. But if * we don't run into any of them, just have tr tags is OK. */ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef { /** * @type bool */ public $allow_empty = false; /** * @type string */ public $type = 'table'; /** * @type array */ public $elements = array( 'tr' => true, 'tbody' => true, 'thead' => true, 'tfoot' => true, 'caption' => true, 'colgroup' => true, 'col' => true ); public function __construct() { } /** * @param array $children * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array */ public function validateChildren($children, $config, $context) { if (empty($children)) { return false; } // only one of these elements is allowed in a table $caption = false; $thead = false; $tfoot = false; // whitespace $initial_ws = array(); $after_caption_ws = array(); $after_thead_ws = array(); $after_tfoot_ws = array(); // as many of these as you want $cols = array(); $content = array(); $tbody_mode = false; // if true, then we need to wrap any stray // <tr>s with a <tbody>. $ws_accum =& $initial_ws; foreach ($children as $node) { if ($node instanceof HTMLPurifier_Node_Comment) { $ws_accum[] = $node; continue; } switch ($node->name) { case 'tbody': $tbody_mode = true; // fall through case 'tr': $content[] = $node; $ws_accum =& $content; break; case 'caption': // there can only be one caption! if ($caption !== false) break; $caption = $node; $ws_accum =& $after_caption_ws; break; case 'thead': $tbody_mode = true; // XXX This breaks rendering properties with // Firefox, which never floats a <thead> to // the top. Ever. (Our scheme will float the // first <thead> to the top.) So maybe // <thead>s that are not first should be // turned into <tbody>? Very tricky, indeed. if ($thead === false) { $thead = $node; $ws_accum =& $after_thead_ws; } else { // Oops, there's a second one! What // should we do? Current behavior is to // transmutate the first and last entries into // tbody tags, and then put into content. // Maybe a better idea is to *attach // it* to the existing thead or tfoot? // We don't do this, because Firefox // doesn't float an extra tfoot to the // bottom like it does for the first one. $node->name = 'tbody'; $content[] = $node; $ws_accum =& $content; } break; case 'tfoot': // see above for some aveats $tbody_mode = true; if ($tfoot === false) { $tfoot = $node; $ws_accum =& $after_tfoot_ws; } else { $node->name = 'tbody'; $content[] = $node; $ws_accum =& $content; } break; case 'colgroup': case 'col': $cols[] = $node; $ws_accum =& $cols; break; case '#PCDATA': // How is whitespace handled? We treat is as sticky to // the *end* of the previous element. So all of the // nonsense we have worked on is to keep things // together. if (!empty($node->is_whitespace)) { $ws_accum[] = $node; } break; } } if (empty($content) && $thead === false && $tfoot === false) { return false; } $ret = $initial_ws; if ($caption !== false) { $ret[] = $caption; $ret = array_merge($ret, $after_caption_ws); } if ($cols !== false) { $ret = array_merge($ret, $cols); } if ($thead !== false) { $ret[] = $thead; $ret = array_merge($ret, $after_thead_ws); } if ($tfoot !== false) { $ret[] = $tfoot; $ret = array_merge($ret, $after_tfoot_ws); } if ($tbody_mode) { // we have to shuffle tr into tbody $current_tr_tbody = null; foreach($content as $node) { switch ($node->name) { case 'tbody': $current_tr_tbody = null; $ret[] = $node; break; case 'tr': if ($current_tr_tbody === null) { $current_tr_tbody = new HTMLPurifier_Node_Element('tbody'); $ret[] = $current_tr_tbody; } $current_tr_tbody->children[] = $node; break; case '#PCDATA': //assert($node->is_whitespace); if ($current_tr_tbody === null) { $ret[] = $node; } else { $current_tr_tbody->children[] = $node; } break; } } } else { $ret = array_merge($ret, $content); } return $ret; } } // vim: et sw=4 sts=4 ChildDef/Custom.php 0000644 00000005255 15152171665 0010211 0 ustar 00 <?php /** * Custom validation class, accepts DTD child definitions * * @warning Currently this class is an all or nothing proposition, that is, * it will only give a bool return value. */ class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef { /** * @type string */ public $type = 'custom'; /** * @type bool */ public $allow_empty = false; /** * Allowed child pattern as defined by the DTD. * @type string */ public $dtd_regex; /** * PCRE regex derived from $dtd_regex. * @type string */ private $_pcre_regex; /** * @param $dtd_regex Allowed child pattern from the DTD */ public function __construct($dtd_regex) { $this->dtd_regex = $dtd_regex; $this->_compileRegex(); } /** * Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex) */ protected function _compileRegex() { $raw = str_replace(' ', '', $this->dtd_regex); if ($raw[0] != '(') { $raw = "($raw)"; } $el = '[#a-zA-Z0-9_.-]+'; $reg = $raw; // COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M // DOING! Seriously: if there's problems, please report them. // collect all elements into the $elements array preg_match_all("/$el/", $reg, $matches); foreach ($matches[0] as $match) { $this->elements[$match] = true; } // setup all elements as parentheticals with leading commas $reg = preg_replace("/$el/", '(,\\0)', $reg); // remove commas when they were not solicited $reg = preg_replace("/([^,(|]\(+),/", '\\1', $reg); // remove all non-paranthetical commas: they are handled by first regex $reg = preg_replace("/,\(/", '(', $reg); $this->_pcre_regex = $reg; } /** * @param HTMLPurifier_Node[] $children * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool */ public function validateChildren($children, $config, $context) { $list_of_children = ''; $nesting = 0; // depth into the nest foreach ($children as $node) { if (!empty($node->is_whitespace)) { continue; } $list_of_children .= $node->name . ','; } // add leading comma to deal with stray comma declarations $list_of_children = ',' . rtrim($list_of_children, ','); $okay = preg_match( '/^,?' . $this->_pcre_regex . '$/', $list_of_children ); return (bool)$okay; } } // vim: et sw=4 sts=4 ChildDef/Chameleon.php 0000644 00000003552 15152171665 0010630 0 ustar 00 <?php /** * Definition that uses different definitions depending on context. * * The del and ins tags are notable because they allow different types of * elements depending on whether or not they're in a block or inline context. * Chameleon allows this behavior to happen by using two different * definitions depending on context. While this somewhat generalized, * it is specifically intended for those two tags. */ class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef { /** * Instance of the definition object to use when inline. Usually stricter. * @type HTMLPurifier_ChildDef_Optional */ public $inline; /** * Instance of the definition object to use when block. * @type HTMLPurifier_ChildDef_Optional */ public $block; /** * @type string */ public $type = 'chameleon'; /** * @param array $inline List of elements to allow when inline. * @param array $block List of elements to allow when block. */ public function __construct($inline, $block) { $this->inline = new HTMLPurifier_ChildDef_Optional($inline); $this->block = new HTMLPurifier_ChildDef_Optional($block); $this->elements = $this->block->elements; } /** * @param HTMLPurifier_Node[] $children * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return bool */ public function validateChildren($children, $config, $context) { if ($context->get('IsInline') === false) { return $this->block->validateChildren( $children, $config, $context ); } else { return $this->inline->validateChildren( $children, $config, $context ); } } } // vim: et sw=4 sts=4 ChildDef/StrictBlockquote.php 0000644 00000005542 15152171665 0012237 0 ustar 00 <?php /** * Takes the contents of blockquote when in strict and reformats for validation. */ class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Required { /** * @type array */ protected $real_elements; /** * @type array */ protected $fake_elements; /** * @type bool */ public $allow_empty = true; /** * @type string */ public $type = 'strictblockquote'; /** * @type bool */ protected $init = false; /** * @param HTMLPurifier_Config $config * @return array * @note We don't want MakeWellFormed to auto-close inline elements since * they might be allowed. */ public function getAllowedElements($config) { $this->init($config); return $this->fake_elements; } /** * @param array $children * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array */ public function validateChildren($children, $config, $context) { $this->init($config); // trick the parent class into thinking it allows more $this->elements = $this->fake_elements; $result = parent::validateChildren($children, $config, $context); $this->elements = $this->real_elements; if ($result === false) { return array(); } if ($result === true) { $result = $children; } $def = $config->getHTMLDefinition(); $block_wrap_name = $def->info_block_wrapper; $block_wrap = false; $ret = array(); foreach ($result as $node) { if ($block_wrap === false) { if (($node instanceof HTMLPurifier_Node_Text && !$node->is_whitespace) || ($node instanceof HTMLPurifier_Node_Element && !isset($this->elements[$node->name]))) { $block_wrap = new HTMLPurifier_Node_Element($def->info_block_wrapper); $ret[] = $block_wrap; } } else { if ($node instanceof HTMLPurifier_Node_Element && isset($this->elements[$node->name])) { $block_wrap = false; } } if ($block_wrap) { $block_wrap->children[] = $node; } else { $ret[] = $node; } } return $ret; } /** * @param HTMLPurifier_Config $config */ private function init($config) { if (!$this->init) { $def = $config->getHTMLDefinition(); // allow all inline elements $this->real_elements = $this->elements; $this->fake_elements = $def->info_content_sets['Flow']; $this->fake_elements['#PCDATA'] = true; $this->init = true; } } } // vim: et sw=4 sts=4 ChildDef/Empty.php 0000644 00000001542 15152171665 0010030 0 ustar 00 <?php /** * Definition that disallows all elements. * @warning validateChildren() in this class is actually never called, because * empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed * before child definitions are parsed in earnest by * HTMLPurifier_Strategy_FixNesting. */ class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef { /** * @type bool */ public $allow_empty = true; /** * @type string */ public $type = 'empty'; public function __construct() { } /** * @param HTMLPurifier_Node[] $children * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array */ public function validateChildren($children, $config, $context) { return array(); } } // vim: et sw=4 sts=4 ChildDef/Required.php 0000644 00000006426 15152171665 0010520 0 ustar 00 <?php /** * Definition that allows a set of elements, but disallows empty children. */ class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef { /** * Lookup table of allowed elements. * @type array */ public $elements = array(); /** * Whether or not the last passed node was all whitespace. * @type bool */ protected $whitespace = false; /** * @param array|string $elements List of allowed element names (lowercase). */ public function __construct($elements) { if (is_string($elements)) { $elements = str_replace(' ', '', $elements); $elements = explode('|', $elements); } $keys = array_keys($elements); if ($keys == array_keys($keys)) { $elements = array_flip($elements); foreach ($elements as $i => $x) { $elements[$i] = true; if (empty($i)) { unset($elements[$i]); } // remove blank } } $this->elements = $elements; } /** * @type bool */ public $allow_empty = false; /** * @type string */ public $type = 'required'; /** * @param array $children * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array */ public function validateChildren($children, $config, $context) { // Flag for subclasses $this->whitespace = false; // if there are no tokens, delete parent node if (empty($children)) { return false; } // the new set of children $result = array(); // whether or not parsed character data is allowed // this controls whether or not we silently drop a tag // or generate escaped HTML from it $pcdata_allowed = isset($this->elements['#PCDATA']); // a little sanity check to make sure it's not ALL whitespace $all_whitespace = true; $stack = array_reverse($children); while (!empty($stack)) { $node = array_pop($stack); if (!empty($node->is_whitespace)) { $result[] = $node; continue; } $all_whitespace = false; // phew, we're not talking about whitespace if (!isset($this->elements[$node->name])) { // special case text // XXX One of these ought to be redundant or something if ($pcdata_allowed && $node instanceof HTMLPurifier_Node_Text) { $result[] = $node; continue; } // spill the child contents in // ToDo: Make configurable if ($node instanceof HTMLPurifier_Node_Element) { for ($i = count($node->children) - 1; $i >= 0; $i--) { $stack[] = $node->children[$i]; } continue; } continue; } $result[] = $node; } if (empty($result)) { return false; } if ($all_whitespace) { $this->whitespace = true; return false; } return $result; } } // vim: et sw=4 sts=4 ChildDef/Optional.php 0000644 00000002271 15152171665 0010517 0 ustar 00 <?php /** * Definition that allows a set of elements, and allows no children. * @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required, * really, one shouldn't inherit from the other. Only altered behavior * is to overload a returned false with an array. Thus, it will never * return false. */ class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required { /** * @type bool */ public $allow_empty = true; /** * @type string */ public $type = 'optional'; /** * @param array $children * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array */ public function validateChildren($children, $config, $context) { $result = parent::validateChildren($children, $config, $context); // we assume that $children is not modified if ($result === false) { if (empty($children)) { return true; } elseif ($this->whitespace) { return $children; } else { return array(); } } return $result; } } // vim: et sw=4 sts=4 Token/Tag.php 0000644 00000003740 15152171665 0007065 0 ustar 00 <?php /** * Abstract class of a tag token (start, end or empty), and its behavior. */ abstract class HTMLPurifier_Token_Tag extends HTMLPurifier_Token { /** * Static bool marker that indicates the class is a tag. * * This allows us to check objects with <tt>!empty($obj->is_tag)</tt> * without having to use a function call <tt>is_a()</tt>. * @type bool */ public $is_tag = true; /** * The lower-case name of the tag, like 'a', 'b' or 'blockquote'. * * @note Strictly speaking, XML tags are case sensitive, so we shouldn't * be lower-casing them, but these tokens cater to HTML tags, which are * insensitive. * @type string */ public $name; /** * Associative array of the tag's attributes. * @type array */ public $attr = array(); /** * Non-overloaded constructor, which lower-cases passed tag name. * * @param string $name String name. * @param array $attr Associative array of attributes. * @param int $line * @param int $col * @param array $armor */ public function __construct($name, $attr = array(), $line = null, $col = null, $armor = array()) { $this->name = ctype_lower($name) ? $name : strtolower($name); foreach ($attr as $key => $value) { // normalization only necessary when key is not lowercase if (!ctype_lower($key)) { $new_key = strtolower($key); if (!isset($attr[$new_key])) { $attr[$new_key] = $attr[$key]; } if ($new_key !== $key) { unset($attr[$key]); } } } $this->attr = $attr; $this->line = $line; $this->col = $col; $this->armor = $armor; } public function toNode() { return new HTMLPurifier_Node_Element($this->name, $this->attr, $this->line, $this->col, $this->armor); } } // vim: et sw=4 sts=4 Token/Text.php 0000644 00000002455 15152171665 0007300 0 ustar 00 <?php /** * Concrete text token class. * * Text tokens comprise of regular parsed character data (PCDATA) and raw * character data (from the CDATA sections). Internally, their * data is parsed with all entities expanded. Surprisingly, the text token * does have a "tag name" called #PCDATA, which is how the DTD represents it * in permissible child nodes. */ class HTMLPurifier_Token_Text extends HTMLPurifier_Token { /** * @type string */ public $name = '#PCDATA'; /**< PCDATA tag name compatible with DTD. */ /** * @type string */ public $data; /**< Parsed character data of text. */ /** * @type bool */ public $is_whitespace; /**< Bool indicating if node is whitespace. */ /** * Constructor, accepts data and determines if it is whitespace. * @param string $data String parsed character data. * @param int $line * @param int $col */ public function __construct($data, $line = null, $col = null) { $this->data = $data; $this->is_whitespace = ctype_space($data); $this->line = $line; $this->col = $col; } public function toNode() { return new HTMLPurifier_Node_Text($this->data, $this->is_whitespace, $this->line, $this->col); } } // vim: et sw=4 sts=4 Token/Comment.php 0000644 00000001367 15152171665 0007757 0 ustar 00 <?php /** * Concrete comment token class. Generally will be ignored. */ class HTMLPurifier_Token_Comment extends HTMLPurifier_Token { /** * Character data within comment. * @type string */ public $data; /** * @type bool */ public $is_whitespace = true; /** * Transparent constructor. * * @param string $data String comment data. * @param int $line * @param int $col */ public function __construct($data, $line = null, $col = null) { $this->data = $data; $this->line = $line; $this->col = $col; } public function toNode() { return new HTMLPurifier_Node_Comment($this->data, $this->line, $this->col); } } // vim: et sw=4 sts=4 Token/Start.php 0000644 00000000207 15152171665 0007442 0 ustar 00 <?php /** * Concrete start token class. */ class HTMLPurifier_Token_Start extends HTMLPurifier_Token_Tag { } // vim: et sw=4 sts=4 Token/End.php 0000644 00000001114 15152171665 0007051 0 ustar 00 <?php /** * Concrete end token class. * * @warning This class accepts attributes even though end tags cannot. This * is for optimization reasons, as under normal circumstances, the Lexers * do not pass attributes. */ class HTMLPurifier_Token_End extends HTMLPurifier_Token_Tag { /** * Token that started this node. * Added by MakeWellFormed. Please do not edit this! * @type HTMLPurifier_Token */ public $start; public function toNode() { throw new Exception("HTMLPurifier_Token_End->toNode not supported!"); } } // vim: et sw=4 sts=4 Token/Empty.php 0000644 00000000370 15152171665 0007444 0 ustar 00 <?php /** * Concrete empty token class. */ class HTMLPurifier_Token_Empty extends HTMLPurifier_Token_Tag { public function toNode() { $n = parent::toNode(); $n->empty = true; return $n; } } // vim: et sw=4 sts=4 Language/messages/en.php 0000644 00000007720 15152171665 0011230 0 ustar 00 <?php $fallback = false; $messages = array( 'HTMLPurifier' => 'HTML Purifier', // for unit testing purposes 'LanguageFactoryTest: Pizza' => 'Pizza', 'LanguageTest: List' => '$1', 'LanguageTest: Hash' => '$1.Keys; $1.Values', 'Item separator' => ', ', 'Item separator last' => ' and ', // non-Harvard style 'ErrorCollector: No errors' => 'No errors detected. However, because error reporting is still incomplete, there may have been errors that the error collector was not notified of; please inspect the output HTML carefully.', 'ErrorCollector: At line' => ' at line $line', 'ErrorCollector: Incidental errors' => 'Incidental errors', 'Lexer: Unclosed comment' => 'Unclosed comment', 'Lexer: Unescaped lt' => 'Unescaped less-than sign (<) should be <', 'Lexer: Missing gt' => 'Missing greater-than sign (>), previous less-than sign (<) should be escaped', 'Lexer: Missing attribute key' => 'Attribute declaration has no key', 'Lexer: Missing end quote' => 'Attribute declaration has no end quote', 'Lexer: Extracted body' => 'Removed document metadata tags', 'Strategy_RemoveForeignElements: Tag transform' => '<$1> element transformed into $CurrentToken.Serialized', 'Strategy_RemoveForeignElements: Missing required attribute' => '$CurrentToken.Compact element missing required attribute $1', 'Strategy_RemoveForeignElements: Foreign element to text' => 'Unrecognized $CurrentToken.Serialized tag converted to text', 'Strategy_RemoveForeignElements: Foreign element removed' => 'Unrecognized $CurrentToken.Serialized tag removed', 'Strategy_RemoveForeignElements: Comment removed' => 'Comment containing "$CurrentToken.Data" removed', 'Strategy_RemoveForeignElements: Foreign meta element removed' => 'Unrecognized $CurrentToken.Serialized meta tag and all descendants removed', 'Strategy_RemoveForeignElements: Token removed to end' => 'Tags and text starting from $1 element where removed to end', 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed' => 'Trailing hyphen(s) in comment removed', 'Strategy_RemoveForeignElements: Hyphens in comment collapsed' => 'Double hyphens in comments are not allowed, and were collapsed into single hyphens', 'Strategy_MakeWellFormed: Unnecessary end tag removed' => 'Unnecessary $CurrentToken.Serialized tag removed', 'Strategy_MakeWellFormed: Unnecessary end tag to text' => 'Unnecessary $CurrentToken.Serialized tag converted to text', 'Strategy_MakeWellFormed: Tag auto closed' => '$1.Compact started on line $1.Line auto-closed by $CurrentToken.Compact', 'Strategy_MakeWellFormed: Tag carryover' => '$1.Compact started on line $1.Line auto-continued into $CurrentToken.Compact', 'Strategy_MakeWellFormed: Stray end tag removed' => 'Stray $CurrentToken.Serialized tag removed', 'Strategy_MakeWellFormed: Stray end tag to text' => 'Stray $CurrentToken.Serialized tag converted to text', 'Strategy_MakeWellFormed: Tag closed by element end' => '$1.Compact tag started on line $1.Line closed by end of $CurrentToken.Serialized', 'Strategy_MakeWellFormed: Tag closed by document end' => '$1.Compact tag started on line $1.Line closed by end of document', 'Strategy_FixNesting: Node removed' => '$CurrentToken.Compact node removed', 'Strategy_FixNesting: Node excluded' => '$CurrentToken.Compact node removed due to descendant exclusion by ancestor element', 'Strategy_FixNesting: Node reorganized' => 'Contents of $CurrentToken.Compact node reorganized to enforce its content model', 'Strategy_FixNesting: Node contents removed' => 'Contents of $CurrentToken.Compact node removed', 'AttrValidator: Attributes transformed' => 'Attributes on $CurrentToken.Compact transformed from $1.Keys to $2.Keys', 'AttrValidator: Attribute removed' => '$CurrentAttr.Name attribute on $CurrentToken.Compact removed', ); $errorNames = array( E_ERROR => 'Error', E_WARNING => 'Warning', E_NOTICE => 'Notice' ); // vim: et sw=4 sts=4 Queue.php 0000644 00000003017 15152171665 0006353 0 ustar 00 <?php /** * A simple array-backed queue, based off of the classic Okasaki * persistent amortized queue. The basic idea is to maintain two * stacks: an input stack and an output stack. When the output * stack runs out, reverse the input stack and use it as the output * stack. * * We don't use the SPL implementation because it's only supported * on PHP 5.3 and later. * * Exercise: Prove that push/pop on this queue take amortized O(1) time. * * Exercise: Extend this queue to be a deque, while preserving amortized * O(1) time. Some care must be taken on rebalancing to avoid quadratic * behaviour caused by repeatedly shuffling data from the input stack * to the output stack and back. */ class HTMLPurifier_Queue { private $input; private $output; public function __construct($input = array()) { $this->input = $input; $this->output = array(); } /** * Shifts an element off the front of the queue. */ public function shift() { if (empty($this->output)) { $this->output = array_reverse($this->input); $this->input = array(); } if (empty($this->output)) { return NULL; } return array_pop($this->output); } /** * Pushes an element onto the front of the queue. */ public function push($x) { array_push($this->input, $x); } /** * Checks if it's empty. */ public function isEmpty() { return empty($this->input) && empty($this->output); } } StringHashParser.php 0000644 00000007071 15152171665 0010522 0 ustar 00 <?php /** * Parses string hash files. File format is as such: * * DefaultKeyValue * KEY: Value * KEY2: Value2 * --MULTILINE-KEY-- * Multiline * value. * * Which would output something similar to: * * array( * 'ID' => 'DefaultKeyValue', * 'KEY' => 'Value', * 'KEY2' => 'Value2', * 'MULTILINE-KEY' => "Multiline\nvalue.\n", * ) * * We use this as an easy to use file-format for configuration schema * files, but the class itself is usage agnostic. * * You can use ---- to forcibly terminate parsing of a single string-hash; * this marker is used in multi string-hashes to delimit boundaries. */ class HTMLPurifier_StringHashParser { /** * @type string */ public $default = 'ID'; /** * Parses a file that contains a single string-hash. * @param string $file * @return array */ public function parseFile($file) { if (!file_exists($file)) { return false; } $fh = fopen($file, 'r'); if (!$fh) { return false; } $ret = $this->parseHandle($fh); fclose($fh); return $ret; } /** * Parses a file that contains multiple string-hashes delimited by '----' * @param string $file * @return array */ public function parseMultiFile($file) { if (!file_exists($file)) { return false; } $ret = array(); $fh = fopen($file, 'r'); if (!$fh) { return false; } while (!feof($fh)) { $ret[] = $this->parseHandle($fh); } fclose($fh); return $ret; } /** * Internal parser that acepts a file handle. * @note While it's possible to simulate in-memory parsing by using * custom stream wrappers, if such a use-case arises we should * factor out the file handle into its own class. * @param resource $fh File handle with pointer at start of valid string-hash * block. * @return array */ protected function parseHandle($fh) { $state = false; $single = false; $ret = array(); do { $line = fgets($fh); if ($line === false) { break; } $line = rtrim($line, "\n\r"); if (!$state && $line === '') { continue; } if ($line === '----') { break; } if (strncmp('--#', $line, 3) === 0) { // Comment continue; } elseif (strncmp('--', $line, 2) === 0) { // Multiline declaration $state = trim($line, '- '); if (!isset($ret[$state])) { $ret[$state] = ''; } continue; } elseif (!$state) { $single = true; if (strpos($line, ':') !== false) { // Single-line declaration list($state, $line) = explode(':', $line, 2); $line = trim($line); } else { // Use default declaration $state = $this->default; } } if ($single) { $ret[$state] = $line; $single = false; $state = false; } else { $ret[$state] .= "$line\n"; } } while (!feof($fh)); return $ret; } } // vim: et sw=4 sts=4 ErrorStruct.php 0000644 00000003545 15152171665 0007573 0 ustar 00 <?php /** * Records errors for particular segments of an HTML document such as tokens, * attributes or CSS properties. They can contain error structs (which apply * to components of what they represent), but their main purpose is to hold * errors applying to whatever struct is being used. */ class HTMLPurifier_ErrorStruct { /** * Possible values for $children first-key. Note that top-level structures * are automatically token-level. */ const TOKEN = 0; const ATTR = 1; const CSSPROP = 2; /** * Type of this struct. * @type string */ public $type; /** * Value of the struct we are recording errors for. There are various * values for this: * - TOKEN: Instance of HTMLPurifier_Token * - ATTR: array('attr-name', 'value') * - CSSPROP: array('prop-name', 'value') * @type mixed */ public $value; /** * Errors registered for this structure. * @type array */ public $errors = array(); /** * Child ErrorStructs that are from this structure. For example, a TOKEN * ErrorStruct would contain ATTR ErrorStructs. This is a multi-dimensional * array in structure: [TYPE]['identifier'] * @type array */ public $children = array(); /** * @param string $type * @param string $id * @return mixed */ public function getChild($type, $id) { if (!isset($this->children[$type][$id])) { $this->children[$type][$id] = new HTMLPurifier_ErrorStruct(); $this->children[$type][$id]->type = $type; } return $this->children[$type][$id]; } /** * @param int $severity * @param string $message */ public function addError($severity, $message) { $this->errors[] = array($severity, $message); } } // vim: et sw=4 sts=4 Node.php 0000644 00000002400 15152171665 0006147 0 ustar 00 <?php /** * Abstract base node class that all others inherit from. * * Why do we not use the DOM extension? (1) It is not always available, * (2) it has funny constraints on the data it can represent, * whereas we want a maximally flexible representation, and (3) its * interface is a bit cumbersome. */ abstract class HTMLPurifier_Node { /** * Line number of the start token in the source document * @type int */ public $line; /** * Column number of the start token in the source document. Null if unknown. * @type int */ public $col; /** * Lookup array of processing that this token is exempt from. * Currently, valid values are "ValidateAttributes". * @type array */ public $armor = array(); /** * When true, this node should be ignored as non-existent. * * Who is responsible for ignoring dead nodes? FixNesting is * responsible for removing them before passing on to child * validators. */ public $dead = false; /** * Returns a pair of start and end tokens, where the end token * is null if it is not necessary. Does not include children. * @type array */ abstract public function toTokenPair(); } // vim: et sw=4 sts=4 ConfigSchema.php 0000644 00000013417 15152171665 0007622 0 ustar 00 <?php /** * Configuration definition, defines directives and their defaults. */ class HTMLPurifier_ConfigSchema { /** * Defaults of the directives and namespaces. * @type array * @note This shares the exact same structure as HTMLPurifier_Config::$conf */ public $defaults = array(); /** * The default property list. Do not edit this property list. * @type array */ public $defaultPlist; /** * Definition of the directives. * The structure of this is: * * array( * 'Namespace' => array( * 'Directive' => new stdClass(), * ) * ) * * The stdClass may have the following properties: * * - If isAlias isn't set: * - type: Integer type of directive, see HTMLPurifier_VarParser for definitions * - allow_null: If set, this directive allows null values * - aliases: If set, an associative array of value aliases to real values * - allowed: If set, a lookup array of allowed (string) values * - If isAlias is set: * - namespace: Namespace this directive aliases to * - name: Directive name this directive aliases to * * In certain degenerate cases, stdClass will actually be an integer. In * that case, the value is equivalent to an stdClass with the type * property set to the integer. If the integer is negative, type is * equal to the absolute value of integer, and allow_null is true. * * This class is friendly with HTMLPurifier_Config. If you need introspection * about the schema, you're better of using the ConfigSchema_Interchange, * which uses more memory but has much richer information. * @type array */ public $info = array(); /** * Application-wide singleton * @type HTMLPurifier_ConfigSchema */ protected static $singleton; public function __construct() { $this->defaultPlist = new HTMLPurifier_PropertyList(); } /** * Unserializes the default ConfigSchema. * @return HTMLPurifier_ConfigSchema */ public static function makeFromSerial() { $contents = file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser'); $r = unserialize($contents); if (!$r) { $hash = sha1($contents); trigger_error("Unserialization of configuration schema failed, sha1 of file was $hash", E_USER_ERROR); } return $r; } /** * Retrieves an instance of the application-wide configuration definition. * @param HTMLPurifier_ConfigSchema $prototype * @return HTMLPurifier_ConfigSchema */ public static function instance($prototype = null) { if ($prototype !== null) { HTMLPurifier_ConfigSchema::$singleton = $prototype; } elseif (HTMLPurifier_ConfigSchema::$singleton === null || $prototype === true) { HTMLPurifier_ConfigSchema::$singleton = HTMLPurifier_ConfigSchema::makeFromSerial(); } return HTMLPurifier_ConfigSchema::$singleton; } /** * Defines a directive for configuration * @warning Will fail of directive's namespace is defined. * @warning This method's signature is slightly different from the legacy * define() static method! Beware! * @param string $key Name of directive * @param mixed $default Default value of directive * @param string $type Allowed type of the directive. See * HTMLPurifier_VarParser::$types for allowed values * @param bool $allow_null Whether or not to allow null values */ public function add($key, $default, $type, $allow_null) { $obj = new stdClass(); $obj->type = is_int($type) ? $type : HTMLPurifier_VarParser::$types[$type]; if ($allow_null) { $obj->allow_null = true; } $this->info[$key] = $obj; $this->defaults[$key] = $default; $this->defaultPlist->set($key, $default); } /** * Defines a directive value alias. * * Directive value aliases are convenient for developers because it lets * them set a directive to several values and get the same result. * @param string $key Name of Directive * @param array $aliases Hash of aliased values to the real alias */ public function addValueAliases($key, $aliases) { if (!isset($this->info[$key]->aliases)) { $this->info[$key]->aliases = array(); } foreach ($aliases as $alias => $real) { $this->info[$key]->aliases[$alias] = $real; } } /** * Defines a set of allowed values for a directive. * @warning This is slightly different from the corresponding static * method definition. * @param string $key Name of directive * @param array $allowed Lookup array of allowed values */ public function addAllowedValues($key, $allowed) { $this->info[$key]->allowed = $allowed; } /** * Defines a directive alias for backwards compatibility * @param string $key Directive that will be aliased * @param string $new_key Directive that the alias will be to */ public function addAlias($key, $new_key) { $obj = new stdClass; $obj->key = $new_key; $obj->isAlias = true; $this->info[$key] = $obj; } /** * Replaces any stdClass that only has the type property with type integer. */ public function postProcess() { foreach ($this->info as $key => $v) { if (count((array) $v) == 1) { $this->info[$key] = $v->type; } elseif (count((array) $v) == 2 && isset($v->allow_null)) { $this->info[$key] = -$v->type; } } } } // vim: et sw=4 sts=4 Exception.php 0000644 00000000261 15152171665 0007223 0 ustar 00 <?php /** * Global exception class for HTML Purifier; any exceptions we throw * are from here. */ class HTMLPurifier_Exception extends Exception { } // vim: et sw=4 sts=4 Doctype.php 0000644 00000003056 15152171665 0006701 0 ustar 00 <?php /** * Represents a document type, contains information on which modules * need to be loaded. * @note This class is inspected by Printer_HTMLDefinition->renderDoctype. * If structure changes, please update that function. */ class HTMLPurifier_Doctype { /** * Full name of doctype * @type string */ public $name; /** * List of standard modules (string identifiers or literal objects) * that this doctype uses * @type array */ public $modules = array(); /** * List of modules to use for tidying up code * @type array */ public $tidyModules = array(); /** * Is the language derived from XML (i.e. XHTML)? * @type bool */ public $xml = true; /** * List of aliases for this doctype * @type array */ public $aliases = array(); /** * Public DTD identifier * @type string */ public $dtdPublic; /** * System DTD identifier * @type string */ public $dtdSystem; public function __construct( $name = null, $xml = true, $modules = array(), $tidyModules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null ) { $this->name = $name; $this->xml = $xml; $this->modules = $modules; $this->tidyModules = $tidyModules; $this->aliases = $aliases; $this->dtdPublic = $dtd_public; $this->dtdSystem = $dtd_system; } } // vim: et sw=4 sts=4 Filter.php 0000644 00000003131 15152171665 0006511 0 ustar 00 <?php /** * Represents a pre or post processing filter on HTML Purifier's output * * Sometimes, a little ad-hoc fixing of HTML has to be done before * it gets sent through HTML Purifier: you can use filters to acheive * this effect. For instance, YouTube videos can be preserved using * this manner. You could have used a decorator for this task, but * PHP's support for them is not terribly robust, so we're going * to just loop through the filters. * * Filters should be exited first in, last out. If there are three filters, * named 1, 2 and 3, the order of execution should go 1->preFilter, * 2->preFilter, 3->preFilter, purify, 3->postFilter, 2->postFilter, * 1->postFilter. * * @note Methods are not declared abstract as it is perfectly legitimate * for an implementation not to want anything to happen on a step */ class HTMLPurifier_Filter { /** * Name of the filter for identification purposes. * @type string */ public $name; /** * Pre-processor function, handles HTML before HTML Purifier * @param string $html * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return string */ public function preFilter($html, $config, $context) { return $html; } /** * Post-processor function, handles HTML after HTML Purifier * @param string $html * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return string */ public function postFilter($html, $config, $context) { return $html; } } // vim: et sw=4 sts=4 DoctypeRegistry.php 0000644 00000010200 15152171665 0010417 0 ustar 00 <?php class HTMLPurifier_DoctypeRegistry { /** * Hash of doctype names to doctype objects. * @type array */ protected $doctypes; /** * Lookup table of aliases to real doctype names. * @type array */ protected $aliases; /** * Registers a doctype to the registry * @note Accepts a fully-formed doctype object, or the * parameters for constructing a doctype object * @param string $doctype Name of doctype or literal doctype object * @param bool $xml * @param array $modules Modules doctype will load * @param array $tidy_modules Modules doctype will load for certain modes * @param array $aliases Alias names for doctype * @param string $dtd_public * @param string $dtd_system * @return HTMLPurifier_Doctype Editable registered doctype */ public function register( $doctype, $xml = true, $modules = array(), $tidy_modules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null ) { if (!is_array($modules)) { $modules = array($modules); } if (!is_array($tidy_modules)) { $tidy_modules = array($tidy_modules); } if (!is_array($aliases)) { $aliases = array($aliases); } if (!is_object($doctype)) { $doctype = new HTMLPurifier_Doctype( $doctype, $xml, $modules, $tidy_modules, $aliases, $dtd_public, $dtd_system ); } $this->doctypes[$doctype->name] = $doctype; $name = $doctype->name; // hookup aliases foreach ($doctype->aliases as $alias) { if (isset($this->doctypes[$alias])) { continue; } $this->aliases[$alias] = $name; } // remove old aliases if (isset($this->aliases[$name])) { unset($this->aliases[$name]); } return $doctype; } /** * Retrieves reference to a doctype of a certain name * @note This function resolves aliases * @note When possible, use the more fully-featured make() * @param string $doctype Name of doctype * @return HTMLPurifier_Doctype Editable doctype object */ public function get($doctype) { if (isset($this->aliases[$doctype])) { $doctype = $this->aliases[$doctype]; } if (!isset($this->doctypes[$doctype])) { trigger_error('Doctype ' . htmlspecialchars($doctype) . ' does not exist', E_USER_ERROR); $anon = new HTMLPurifier_Doctype($doctype); return $anon; } return $this->doctypes[$doctype]; } /** * Creates a doctype based on a configuration object, * will perform initialization on the doctype * @note Use this function to get a copy of doctype that config * can hold on to (this is necessary in order to tell * Generator whether or not the current document is XML * based or not). * @param HTMLPurifier_Config $config * @return HTMLPurifier_Doctype */ public function make($config) { return clone $this->get($this->getDoctypeFromConfig($config)); } /** * Retrieves the doctype from the configuration object * @param HTMLPurifier_Config $config * @return string */ public function getDoctypeFromConfig($config) { // recommended test $doctype = $config->get('HTML.Doctype'); if (!empty($doctype)) { return $doctype; } $doctype = $config->get('HTML.CustomDoctype'); if (!empty($doctype)) { return $doctype; } // backwards-compatibility if ($config->get('HTML.XHTML')) { $doctype = 'XHTML 1.0'; } else { $doctype = 'HTML 4.01'; } if ($config->get('HTML.Strict')) { $doctype .= ' Strict'; } else { $doctype .= ' Transitional'; } return $doctype; } } // vim: et sw=4 sts=4 Strategy/RemoveForeignElements.php 0000644 00000021722 15152171665 0013340 0 ustar 00 <?php /** * Removes all unrecognized tags from the list of tokens. * * This strategy iterates through all the tokens and removes unrecognized * tokens. If a token is not recognized but a TagTransform is defined for * that element, the element will be transformed accordingly. */ class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy { /** * @param HTMLPurifier_Token[] $tokens * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array|HTMLPurifier_Token[] */ public function execute($tokens, $config, $context) { $definition = $config->getHTMLDefinition(); $generator = new HTMLPurifier_Generator($config, $context); $result = array(); $escape_invalid_tags = $config->get('Core.EscapeInvalidTags'); $remove_invalid_img = $config->get('Core.RemoveInvalidImg'); // currently only used to determine if comments should be kept $trusted = $config->get('HTML.Trusted'); $comment_lookup = $config->get('HTML.AllowedComments'); $comment_regexp = $config->get('HTML.AllowedCommentsRegexp'); $check_comments = $comment_lookup !== array() || $comment_regexp !== null; $remove_script_contents = $config->get('Core.RemoveScriptContents'); $hidden_elements = $config->get('Core.HiddenElements'); // remove script contents compatibility if ($remove_script_contents === true) { $hidden_elements['script'] = true; } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) { unset($hidden_elements['script']); } $attr_validator = new HTMLPurifier_AttrValidator(); // removes tokens until it reaches a closing tag with its value $remove_until = false; // converts comments into text tokens when this is equal to a tag name $textify_comments = false; $token = false; $context->register('CurrentToken', $token); $e = false; if ($config->get('Core.CollectErrors')) { $e =& $context->get('ErrorCollector'); } foreach ($tokens as $token) { if ($remove_until) { if (empty($token->is_tag) || $token->name !== $remove_until) { continue; } } if (!empty($token->is_tag)) { // DEFINITION CALL // before any processing, try to transform the element if (isset($definition->info_tag_transform[$token->name])) { $original_name = $token->name; // there is a transformation for this tag // DEFINITION CALL $token = $definition-> info_tag_transform[$token->name]->transform($token, $config, $context); if ($e) { $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name); } } if (isset($definition->info[$token->name])) { // mostly everything's good, but // we need to make sure required attributes are in order if (($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) && $definition->info[$token->name]->required_attr && ($token->name != 'img' || $remove_invalid_img) // ensure config option still works ) { $attr_validator->validateToken($token, $config, $context); $ok = true; foreach ($definition->info[$token->name]->required_attr as $name) { if (!isset($token->attr[$name])) { $ok = false; break; } } if (!$ok) { if ($e) { $e->send( E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $name ); } continue; } $token->armor['ValidateAttributes'] = true; } if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) { $textify_comments = $token->name; } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) { $textify_comments = false; } } elseif ($escape_invalid_tags) { // invalid tag, generate HTML representation and insert in if ($e) { $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text'); } $token = new HTMLPurifier_Token_Text( $generator->generateFromToken($token) ); } else { // check if we need to destroy all of the tag's children // CAN BE GENERICIZED if (isset($hidden_elements[$token->name])) { if ($token instanceof HTMLPurifier_Token_Start) { $remove_until = $token->name; } elseif ($token instanceof HTMLPurifier_Token_Empty) { // do nothing: we're still looking } else { $remove_until = false; } if ($e) { $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed'); } } else { if ($e) { $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed'); } } continue; } } elseif ($token instanceof HTMLPurifier_Token_Comment) { // textify comments in script tags when they are allowed if ($textify_comments !== false) { $data = $token->data; $token = new HTMLPurifier_Token_Text($data); } elseif ($trusted || $check_comments) { // always cleanup comments $trailing_hyphen = false; if ($e) { // perform check whether or not there's a trailing hyphen if (substr($token->data, -1) == '-') { $trailing_hyphen = true; } } $token->data = rtrim($token->data, '-'); $found_double_hyphen = false; while (strpos($token->data, '--') !== false) { $found_double_hyphen = true; $token->data = str_replace('--', '-', $token->data); } if ($trusted || !empty($comment_lookup[trim($token->data)]) || ($comment_regexp !== null && preg_match($comment_regexp, trim($token->data)))) { // OK good if ($e) { if ($trailing_hyphen) { $e->send( E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed' ); } if ($found_double_hyphen) { $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed'); } } } else { if ($e) { $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed'); } continue; } } else { // strip comments if ($e) { $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed'); } continue; } } elseif ($token instanceof HTMLPurifier_Token_Text) { } else { continue; } $result[] = $token; } if ($remove_until && $e) { // we removed tokens until the end, throw error $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until); } $context->destroy('CurrentToken'); return $result; } } // vim: et sw=4 sts=4 Strategy/Core.php 0000644 00000001006 15152171665 0007755 0 ustar 00 <?php /** * Core strategy composed of the big four strategies. */ class HTMLPurifier_Strategy_Core extends HTMLPurifier_Strategy_Composite { public function __construct() { $this->strategies[] = new HTMLPurifier_Strategy_RemoveForeignElements(); $this->strategies[] = new HTMLPurifier_Strategy_MakeWellFormed(); $this->strategies[] = new HTMLPurifier_Strategy_FixNesting(); $this->strategies[] = new HTMLPurifier_Strategy_ValidateAttributes(); } } // vim: et sw=4 sts=4 Strategy/Composite.php 0000644 00000001327 15152171665 0011035 0 ustar 00 <?php /** * Composite strategy that runs multiple strategies on tokens. */ abstract class HTMLPurifier_Strategy_Composite extends HTMLPurifier_Strategy { /** * List of strategies to run tokens through. * @type HTMLPurifier_Strategy[] */ protected $strategies = array(); /** * @param HTMLPurifier_Token[] $tokens * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return HTMLPurifier_Token[] */ public function execute($tokens, $config, $context) { foreach ($this->strategies as $strategy) { $tokens = $strategy->execute($tokens, $config, $context); } return $tokens; } } // vim: et sw=4 sts=4 Strategy/FixNesting.php 0000644 00000016605 15152171665 0011156 0 ustar 00 <?php /** * Takes a well formed list of tokens and fixes their nesting. * * HTML elements dictate which elements are allowed to be their children, * for example, you can't have a p tag in a span tag. Other elements have * much more rigorous definitions: tables, for instance, require a specific * order for their elements. There are also constraints not expressible by * document type definitions, such as the chameleon nature of ins/del * tags and global child exclusions. * * The first major objective of this strategy is to iterate through all * the nodes and determine whether or not their children conform to the * element's definition. If they do not, the child definition may * optionally supply an amended list of elements that is valid or * require that the entire node be deleted (and the previous node * rescanned). * * The second objective is to ensure that explicitly excluded elements of * an element do not appear in its children. Code that accomplishes this * task is pervasive through the strategy, though the two are distinct tasks * and could, theoretically, be seperated (although it's not recommended). * * @note Whether or not unrecognized children are silently dropped or * translated into text depends on the child definitions. * * @todo Enable nodes to be bubbled out of the structure. This is * easier with our new algorithm. */ class HTMLPurifier_Strategy_FixNesting extends HTMLPurifier_Strategy { /** * @param HTMLPurifier_Token[] $tokens * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return array|HTMLPurifier_Token[] */ public function execute($tokens, $config, $context) { //####################################################################// // Pre-processing // O(n) pass to convert to a tree, so that we can efficiently // refer to substrings $top_node = HTMLPurifier_Arborize::arborize($tokens, $config, $context); // get a copy of the HTML definition $definition = $config->getHTMLDefinition(); $excludes_enabled = !$config->get('Core.DisableExcludes'); // setup the context variable 'IsInline', for chameleon processing // is 'false' when we are not inline, 'true' when it must always // be inline, and an integer when it is inline for a certain // branch of the document tree $is_inline = $definition->info_parent_def->descendants_are_inline; $context->register('IsInline', $is_inline); // setup error collector $e =& $context->get('ErrorCollector', true); //####################################################################// // Loop initialization // stack that contains all elements that are excluded // it is organized by parent elements, similar to $stack, // but it is only populated when an element with exclusions is // processed, i.e. there won't be empty exclusions. $exclude_stack = array($definition->info_parent_def->excludes); // variable that contains the start token while we are processing // nodes. This enables error reporting to do its job $node = $top_node; // dummy token list($token, $d) = $node->toTokenPair(); $context->register('CurrentNode', $node); $context->register('CurrentToken', $token); //####################################################################// // Loop // We need to implement a post-order traversal iteratively, to // avoid running into stack space limits. This is pretty tricky // to reason about, so we just manually stack-ify the recursive // variant: // // function f($node) { // foreach ($node->children as $child) { // f($child); // } // validate($node); // } // // Thus, we will represent a stack frame as array($node, // $is_inline, stack of children) // e.g. array_reverse($node->children) - already processed // children. $parent_def = $definition->info_parent_def; $stack = array( array($top_node, $parent_def->descendants_are_inline, $parent_def->excludes, // exclusions 0) ); while (!empty($stack)) { list($node, $is_inline, $excludes, $ix) = array_pop($stack); // recursive call $go = false; $def = empty($stack) ? $definition->info_parent_def : $definition->info[$node->name]; while (isset($node->children[$ix])) { $child = $node->children[$ix++]; if ($child instanceof HTMLPurifier_Node_Element) { $go = true; $stack[] = array($node, $is_inline, $excludes, $ix); $stack[] = array($child, // ToDo: I don't think it matters if it's def or // child_def, but double check this... $is_inline || $def->descendants_are_inline, empty($def->excludes) ? $excludes : array_merge($excludes, $def->excludes), 0); break; } }; if ($go) continue; list($token, $d) = $node->toTokenPair(); // base case if ($excludes_enabled && isset($excludes[$node->name])) { $node->dead = true; if ($e) $e->send(E_ERROR, 'Strategy_FixNesting: Node excluded'); } else { // XXX I suppose it would be slightly more efficient to // avoid the allocation here and have children // strategies handle it $children = array(); foreach ($node->children as $child) { if (!$child->dead) $children[] = $child; } $result = $def->child->validateChildren($children, $config, $context); if ($result === true) { // nop $node->children = $children; } elseif ($result === false) { $node->dead = true; if ($e) $e->send(E_ERROR, 'Strategy_FixNesting: Node removed'); } else { $node->children = $result; if ($e) { // XXX This will miss mutations of internal nodes. Perhaps defer to the child validators if (empty($result) && !empty($children)) { $e->send(E_ERROR, 'Strategy_FixNesting: Node contents removed'); } else if ($result != $children) { $e->send(E_WARNING, 'Strategy_FixNesting: Node reorganized'); } } } } } //####################################################################// // Post-processing // remove context variables $context->destroy('IsInline'); $context->destroy('CurrentNode'); $context->destroy('CurrentToken'); //####################################################################// // Return return HTMLPurifier_Arborize::flatten($node, $config, $context); } } // vim: et sw=4 sts=4 Strategy/ValidateAttributes.php 0000644 00000002323 15152171665 0012670 0 ustar 00 <?php /** * Validate all attributes in the tokens. */ class HTMLPurifier_Strategy_ValidateAttributes extends HTMLPurifier_Strategy { /** * @param HTMLPurifier_Token[] $tokens * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return HTMLPurifier_Token[] */ public function execute($tokens, $config, $context) { // setup validator $validator = new HTMLPurifier_AttrValidator(); $token = false; $context->register('CurrentToken', $token); foreach ($tokens as $key => $token) { // only process tokens that have attributes, // namely start and empty tags if (!$token instanceof HTMLPurifier_Token_Start && !$token instanceof HTMLPurifier_Token_Empty) { continue; } // skip tokens that are armored if (!empty($token->armor['ValidateAttributes'])) { continue; } // note that we have no facilities here for removing tokens $validator->validateToken($token, $config, $context); } $context->destroy('CurrentToken'); return $tokens; } } // vim: et sw=4 sts=4 Strategy/MakeWellFormed.php 0000644 00000065741 15152171665 0011743 0 ustar 00 <?php /** * Takes tokens makes them well-formed (balance end tags, etc.) * * Specification of the armor attributes this strategy uses: * * - MakeWellFormed_TagClosedError: This armor field is used to * suppress tag closed errors for certain tokens [TagClosedSuppress], * in particular, if a tag was generated automatically by HTML * Purifier, we may rely on our infrastructure to close it for us * and shouldn't report an error to the user [TagClosedAuto]. */ class HTMLPurifier_Strategy_MakeWellFormed extends HTMLPurifier_Strategy { /** * Array stream of tokens being processed. * @type HTMLPurifier_Token[] */ protected $tokens; /** * Current token. * @type HTMLPurifier_Token */ protected $token; /** * Zipper managing the true state. * @type HTMLPurifier_Zipper */ protected $zipper; /** * Current nesting of elements. * @type array */ protected $stack; /** * Injectors active in this stream processing. * @type HTMLPurifier_Injector[] */ protected $injectors; /** * Current instance of HTMLPurifier_Config. * @type HTMLPurifier_Config */ protected $config; /** * Current instance of HTMLPurifier_Context. * @type HTMLPurifier_Context */ protected $context; /** * @param HTMLPurifier_Token[] $tokens * @param HTMLPurifier_Config $config * @param HTMLPurifier_Context $context * @return HTMLPurifier_Token[] * @throws HTMLPurifier_Exception */ public function execute($tokens, $config, $context) { $definition = $config->getHTMLDefinition(); // local variables $generator = new HTMLPurifier_Generator($config, $context); $escape_invalid_tags = $config->get('Core.EscapeInvalidTags'); // used for autoclose early abortion $global_parent_allowed_elements = $definition->info_parent_def->child->getAllowedElements($config); $e = $context->get('ErrorCollector', true); $i = false; // injector index list($zipper, $token) = HTMLPurifier_Zipper::fromArray($tokens); if ($token === NULL) { return array(); } $reprocess = false; // whether or not to reprocess the same token $stack = array(); // member variables $this->stack =& $stack; $this->tokens =& $tokens; $this->token =& $token; $this->zipper =& $zipper; $this->config = $config; $this->context = $context; // context variables $context->register('CurrentNesting', $stack); $context->register('InputZipper', $zipper); $context->register('CurrentToken', $token); // -- begin INJECTOR -- $this->injectors = array(); $injectors = $config->getBatch('AutoFormat'); $def_injectors = $definition->info_injector; $custom_injectors = $injectors['Custom']; unset($injectors['Custom']); // special case foreach ($injectors as $injector => $b) { // XXX: Fix with a legitimate lookup table of enabled filters if (strpos($injector, '.') !== false) { continue; } $injector = "HTMLPurifier_Injector_$injector"; if (!$b) { continue; } $this->injectors[] = new $injector; } foreach ($def_injectors as $injector) { // assumed to be objects $this->injectors[] = $injector; } foreach ($custom_injectors as $injector) { if (!$injector) { continue; } if (is_string($injector)) { $injector = "HTMLPurifier_Injector_$injector"; $injector = new $injector; } $this->injectors[] = $injector; } // give the injectors references to the definition and context // variables for performance reasons foreach ($this->injectors as $ix => $injector) { $error = $injector->prepare($config, $context); if (!$error) { continue; } array_splice($this->injectors, $ix, 1); // rm the injector trigger_error("Cannot enable {$injector->name} injector because $error is not allowed", E_USER_WARNING); } // -- end INJECTOR -- // a note on reprocessing: // In order to reduce code duplication, whenever some code needs // to make HTML changes in order to make things "correct", the // new HTML gets sent through the purifier, regardless of its // status. This means that if we add a start token, because it // was totally necessary, we don't have to update nesting; we just // punt ($reprocess = true; continue;) and it does that for us. // isset is in loop because $tokens size changes during loop exec for (;; // only increment if we don't need to reprocess $reprocess ? $reprocess = false : $token = $zipper->next($token)) { // check for a rewind if (is_int($i)) { // possibility: disable rewinding if the current token has a // rewind set on it already. This would offer protection from // infinite loop, but might hinder some advanced rewinding. $rewind_offset = $this->injectors[$i]->getRewindOffset(); if (is_int($rewind_offset)) { for ($j = 0; $j < $rewind_offset; $j++) { if (empty($zipper->front)) break; $token = $zipper->prev($token); // indicate that other injectors should not process this token, // but we need to reprocess it. See Note [Injector skips] unset($token->skip[$i]); $token->rewind = $i; if ($token instanceof HTMLPurifier_Token_Start) { array_pop($this->stack); } elseif ($token instanceof HTMLPurifier_Token_End) { $this->stack[] = $token->start; } } } $i = false; } // handle case of document end if ($token === NULL) { // kill processing if stack is empty if (empty($this->stack)) { break; } // peek $top_nesting = array_pop($this->stack); $this->stack[] = $top_nesting; // send error [TagClosedSuppress] if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) { $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting); } // append, don't splice, since this is the end $token = new HTMLPurifier_Token_End($top_nesting->name); // punt! $reprocess = true; continue; } //echo '<br>'; printZipper($zipper, $token);//printTokens($this->stack); //flush(); // quick-check: if it's not a tag, no need to process if (empty($token->is_tag)) { if ($token instanceof HTMLPurifier_Token_Text) { foreach ($this->injectors as $i => $injector) { if (isset($token->skip[$i])) { // See Note [Injector skips] continue; } if ($token->rewind !== null && $token->rewind !== $i) { continue; } // XXX fuckup $r = $token; $injector->handleText($r); $token = $this->processToken($r, $i); $reprocess = true; break; } } // another possibility is a comment continue; } if (isset($definition->info[$token->name])) { $type = $definition->info[$token->name]->child->type; } else { $type = false; // Type is unknown, treat accordingly } // quick tag checks: anything that's *not* an end tag $ok = false; if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) { // claims to be a start tag but is empty $token = new HTMLPurifier_Token_Empty( $token->name, $token->attr, $token->line, $token->col, $token->armor ); $ok = true; } elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) { // claims to be empty but really is a start tag // NB: this assignment is required $old_token = $token; $token = new HTMLPurifier_Token_End($token->name); $token = $this->insertBefore( new HTMLPurifier_Token_Start($old_token->name, $old_token->attr, $old_token->line, $old_token->col, $old_token->armor) ); // punt (since we had to modify the input stream in a non-trivial way) $reprocess = true; continue; } elseif ($token instanceof HTMLPurifier_Token_Empty) { // real empty token $ok = true; } elseif ($token instanceof HTMLPurifier_Token_Start) { // start tag // ...unless they also have to close their parent if (!empty($this->stack)) { // Performance note: you might think that it's rather // inefficient, recalculating the autoclose information // for every tag that a token closes (since when we // do an autoclose, we push a new token into the // stream and then /process/ that, before // re-processing this token.) But this is // necessary, because an injector can make an // arbitrary transformations to the autoclosing // tokens we introduce, so things may have changed // in the meantime. Also, doing the inefficient thing is // "easy" to reason about (for certain perverse definitions // of "easy") $parent = array_pop($this->stack); $this->stack[] = $parent; $parent_def = null; $parent_elements = null; $autoclose = false; if (isset($definition->info[$parent->name])) { $parent_def = $definition->info[$parent->name]; $parent_elements = $parent_def->child->getAllowedElements($config); $autoclose = !isset($parent_elements[$token->name]); } if ($autoclose && $definition->info[$token->name]->wrap) { // Check if an element can be wrapped by another // element to make it valid in a context (for // example, <ul><ul> needs a <li> in between) $wrapname = $definition->info[$token->name]->wrap; $wrapdef = $definition->info[$wrapname]; $elements = $wrapdef->child->getAllowedElements($config); if (isset($elements[$token->name]) && isset($parent_elements[$wrapname])) { $newtoken = new HTMLPurifier_Token_Start($wrapname); $token = $this->insertBefore($newtoken); $reprocess = true; continue; } } $carryover = false; if ($autoclose && $parent_def->formatting) { $carryover = true; } if ($autoclose) { // check if this autoclose is doomed to fail // (this rechecks $parent, which his harmless) $autoclose_ok = isset($global_parent_allowed_elements[$token->name]); if (!$autoclose_ok) { foreach ($this->stack as $ancestor) { $elements = $definition->info[$ancestor->name]->child->getAllowedElements($config); if (isset($elements[$token->name])) { $autoclose_ok = true; break; } if ($definition->info[$token->name]->wrap) { $wrapname = $definition->info[$token->name]->wrap; $wrapdef = $definition->info[$wrapname]; $wrap_elements = $wrapdef->child->getAllowedElements($config); if (isset($wrap_elements[$token->name]) && isset($elements[$wrapname])) { $autoclose_ok = true; break; } } } } if ($autoclose_ok) { // errors need to be updated $new_token = new HTMLPurifier_Token_End($parent->name); $new_token->start = $parent; // [TagClosedSuppress] if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) { if (!$carryover) { $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent); } else { $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent); } } if ($carryover) { $element = clone $parent; // [TagClosedAuto] $element->armor['MakeWellFormed_TagClosedError'] = true; $element->carryover = true; $token = $this->processToken(array($new_token, $token, $element)); } else { $token = $this->insertBefore($new_token); } } else { $token = $this->remove(); } $reprocess = true; continue; } } $ok = true; } if ($ok) { foreach ($this->injectors as $i => $injector) { if (isset($token->skip[$i])) { // See Note [Injector skips] continue; } if ($token->rewind !== null && $token->rewind !== $i) { continue; } $r = $token; $injector->handleElement($r); $token = $this->processToken($r, $i); $reprocess = true; break; } if (!$reprocess) { // ah, nothing interesting happened; do normal processing if ($token instanceof HTMLPurifier_Token_Start) { $this->stack[] = $token; } elseif ($token instanceof HTMLPurifier_Token_End) { throw new HTMLPurifier_Exception( 'Improper handling of end tag in start code; possible error in MakeWellFormed' ); } } continue; } // sanity check: we should be dealing with a closing tag if (!$token instanceof HTMLPurifier_Token_End) { throw new HTMLPurifier_Exception('Unaccounted for tag token in input stream, bug in HTML Purifier'); } // make sure that we have something open if (empty($this->stack)) { if ($escape_invalid_tags) { if ($e) { $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text'); } $token = new HTMLPurifier_Token_Text($generator->generateFromToken($token)); } else { if ($e) { $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed'); } $token = $this->remove(); } $reprocess = true; continue; } // first, check for the simplest case: everything closes neatly. // Eventually, everything passes through here; if there are problems // we modify the input stream accordingly and then punt, so that // the tokens get processed again. $current_parent = array_pop($this->stack); if ($current_parent->name == $token->name) { $token->start = $current_parent; foreach ($this->injectors as $i => $injector) { if (isset($token->skip[$i])) { // See Note [Injector skips] continue; } if ($token->rewind !== null && $token->rewind !== $i) { continue; } $r = $token; $injector->handleEnd($r); $token = $this->processToken($r, $i); $this->stack[] = $current_parent; $reprocess = true; break; } continue; } // okay, so we're trying to close the wrong tag // undo the pop previous pop $this->stack[] = $current_parent; // scroll back the entire nest, trying to find our tag. // (feature could be to specify how far you'd like to go) $size = count($this->stack); // -2 because -1 is the last element, but we already checked that $skipped_tags = false; for ($j = $size - 2; $j >= 0; $j--) { if ($this->stack[$j]->name == $token->name) { $skipped_tags = array_slice($this->stack, $j); break; } } // we didn't find the tag, so remove if ($skipped_tags === false) { if ($escape_invalid_tags) { if ($e) { $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text'); } $token = new HTMLPurifier_Token_Text($generator->generateFromToken($token)); } else { if ($e) { $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed'); } $token = $this->remove(); } $reprocess = true; continue; } // do errors, in REVERSE $j order: a,b,c with </a></b></c> $c = count($skipped_tags); if ($e) { for ($j = $c - 1; $j > 0; $j--) { // notice we exclude $j == 0, i.e. the current ending tag, from // the errors... [TagClosedSuppress] if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) { $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]); } } } // insert tags, in FORWARD $j order: c,b,a with </a></b></c> $replace = array($token); for ($j = 1; $j < $c; $j++) { // ...as well as from the insertions $new_token = new HTMLPurifier_Token_End($skipped_tags[$j]->name); $new_token->start = $skipped_tags[$j]; array_unshift($replace, $new_token); if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) { // [TagClosedAuto] $element = clone $skipped_tags[$j]; $element->carryover = true; $element->armor['MakeWellFormed_TagClosedError'] = true; $replace[] = $element; } } $token = $this->processToken($replace); $reprocess = true; continue; } $context->destroy('CurrentToken'); $context->destroy('CurrentNesting'); $context->destroy('InputZipper'); unset($this->injectors, $this->stack, $this->tokens); return $zipper->toArray($token); } /** * Processes arbitrary token values for complicated substitution patterns. * In general: * * If $token is an array, it is a list of tokens to substitute for the * current token. These tokens then get individually processed. If there * is a leading integer in the list, that integer determines how many * tokens from the stream should be removed. * * If $token is a regular token, it is swapped with the current token. * * If $token is false, the current token is deleted. * * If $token is an integer, that number of tokens (with the first token * being the current one) will be deleted. * * @param HTMLPurifier_Token|array|int|bool $token Token substitution value * @param HTMLPurifier_Injector|int $injector Injector that performed the substitution; default is if * this is not an injector related operation. * @throws HTMLPurifier_Exception */ protected function processToken($token, $injector = -1) { // Zend OpCache miscompiles $token = array($token), so // avoid this pattern. See: https://github.com/ezyang/htmlpurifier/issues/108 // normalize forms of token if (is_object($token)) { $tmp = $token; $token = array(1, $tmp); } if (is_int($token)) { $tmp = $token; $token = array($tmp); } if ($token === false) { $token = array(1); } if (!is_array($token)) { throw new HTMLPurifier_Exception('Invalid token type from injector'); } if (!is_int($token[0])) { array_unshift($token, 1); } if ($token[0] === 0) { throw new HTMLPurifier_Exception('Deleting zero tokens is not valid'); } // $token is now an array with the following form: // array(number nodes to delete, new node 1, new node 2, ...) $delete = array_shift($token); list($old, $r) = $this->zipper->splice($this->token, $delete, $token); if ($injector > -1) { // See Note [Injector skips] // Determine appropriate skips. Here's what the code does: // *If* we deleted one or more tokens, copy the skips // of those tokens into the skips of the new tokens (in $token). // Also, mark the newly inserted tokens as having come from // $injector. $oldskip = isset($old[0]) ? $old[0]->skip : array(); foreach ($token as $object) { $object->skip = $oldskip; $object->skip[$injector] = true; } } return $r; } /** * Inserts a token before the current token. Cursor now points to * this token. You must reprocess after this. * @param HTMLPurifier_Token $token */ private function insertBefore($token) { // NB not $this->zipper->insertBefore(), due to positioning // differences $splice = $this->zipper->splice($this->token, 0, array($token)); return $splice[1]; } /** * Removes current token. Cursor now points to new token occupying previously * occupied space. You must reprocess after this. */ private function remove() { return $this->zipper->delete(); } } // Note [Injector skips] // ~~~~~~~~~~~~~~~~~~~~~ // When I originally designed this class, the idea behind the 'skip' // property of HTMLPurifier_Token was to help avoid infinite loops // in injector processing. For example, suppose you wrote an injector // that bolded swear words. Naively, you might write it so that // whenever you saw ****, you replaced it with <strong>****</strong>. // // When this happens, we will reprocess all of the tokens with the // other injectors. Now there is an opportunity for infinite loop: // if we rerun the swear-word injector on these tokens, we might // see **** and then reprocess again to get // <strong><strong>****</strong></strong> ad infinitum. // // Thus, the idea of a skip is that once we process a token with // an injector, we mark all of those tokens as having "come from" // the injector, and we never run the injector again on these // tokens. // // There were two more complications, however: // // - With HTMLPurifier_Injector_RemoveEmpty, we noticed that if // you had <b><i></i></b>, after you removed the <i></i>, you // really would like this injector to go back and reprocess // the <b> tag, discovering that it is now empty and can be // removed. So we reintroduced the possibility of infinite looping // by adding a "rewind" function, which let you go back to an // earlier point in the token stream and reprocess it with injectors. // Needless to say, we need to UN-skip the token so it gets // reprocessed. // // - Suppose that you successfuly process a token, replace it with // one with your skip mark, but now another injector wants to // process the skipped token with another token. Should you continue // to skip that new token, or reprocess it? If you reprocess, // you can end up with an infinite loop where one injector converts // <a> to <b>, and then another injector converts it back. So // we inherit the skips, but for some reason, I thought that we // should inherit the skip from the first token of the token // that we deleted. Why? Well, it seems to work OK. // // If I were to redesign this functionality, I would absolutely not // go about doing it this way: the semantics are just not very well // defined, and in any case you probably wanted to operate on trees, // not token streams. // vim: et sw=4 sts=4 PercentEncoder.php 0000644 00000006755 15152171665 0010203 0 ustar 00 <?php /** * Class that handles operations involving percent-encoding in URIs. * * @warning * Be careful when reusing instances of PercentEncoder. The object * you use for normalize() SHOULD NOT be used for encode(), or * vice-versa. */ class HTMLPurifier_PercentEncoder { /** * Reserved characters to preserve when using encode(). * @type array */ protected $preserve = array(); /** * String of characters that should be preserved while using encode(). * @param bool $preserve */ public function __construct($preserve = false) { // unreserved letters, ought to const-ify for ($i = 48; $i <= 57; $i++) { // digits $this->preserve[$i] = true; } for ($i = 65; $i <= 90; $i++) { // upper-case $this->preserve[$i] = true; } for ($i = 97; $i <= 122; $i++) { // lower-case $this->preserve[$i] = true; } $this->preserve[45] = true; // Dash - $this->preserve[46] = true; // Period . $this->preserve[95] = true; // Underscore _ $this->preserve[126]= true; // Tilde ~ // extra letters not to escape if ($preserve !== false) { for ($i = 0, $c = strlen($preserve); $i < $c; $i++) { $this->preserve[ord($preserve[$i])] = true; } } } /** * Our replacement for urlencode, it encodes all non-reserved characters, * as well as any extra characters that were instructed to be preserved. * @note * Assumes that the string has already been normalized, making any * and all percent escape sequences valid. Percents will not be * re-escaped, regardless of their status in $preserve * @param string $string String to be encoded * @return string Encoded string. */ public function encode($string) { $ret = ''; for ($i = 0, $c = strlen($string); $i < $c; $i++) { if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])])) { $ret .= '%' . sprintf('%02X', $int); } else { $ret .= $string[$i]; } } return $ret; } /** * Fix up percent-encoding by decoding unreserved characters and normalizing. * @warning This function is affected by $preserve, even though the * usual desired behavior is for this not to preserve those * characters. Be careful when reusing instances of PercentEncoder! * @param string $string String to normalize * @return string */ public function normalize($string) { if ($string == '') { return ''; } $parts = explode('%', $string); $ret = array_shift($parts); foreach ($parts as $part) { $length = strlen($part); if ($length < 2) { $ret .= '%25' . $part; continue; } $encoding = substr($part, 0, 2); $text = substr($part, 2); if (!ctype_xdigit($encoding)) { $ret .= '%25' . $part; continue; } $int = hexdec($encoding); if (isset($this->preserve[$int])) { $ret .= chr($int) . $text; continue; } $encoding = strtoupper($encoding); $ret .= '%' . $encoding . $text; } return $ret; } } // vim: et sw=4 sts=4 ConfigSchema/InterchangeBuilder.php 0000644 00000015713 15152171665 0013361 0 ustar 00 <?php class HTMLPurifier_ConfigSchema_InterchangeBuilder { /** * Used for processing DEFAULT, nothing else. * @type HTMLPurifier_VarParser */ protected $varParser; /** * @param HTMLPurifier_VarParser $varParser */ public function __construct($varParser = null) { $this->varParser = $varParser ? $varParser : new HTMLPurifier_VarParser_Native(); } /** * @param string $dir * @return HTMLPurifier_ConfigSchema_Interchange */ public static function buildFromDirectory($dir = null) { $builder = new HTMLPurifier_ConfigSchema_InterchangeBuilder(); $interchange = new HTMLPurifier_ConfigSchema_Interchange(); return $builder->buildDir($interchange, $dir); } /** * @param HTMLPurifier_ConfigSchema_Interchange $interchange * @param string $dir * @return HTMLPurifier_ConfigSchema_Interchange */ public function buildDir($interchange, $dir = null) { if (!$dir) { $dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema'; } if (file_exists($dir . '/info.ini')) { $info = parse_ini_file($dir . '/info.ini'); $interchange->name = $info['name']; } $files = array(); $dh = opendir($dir); while (false !== ($file = readdir($dh))) { if (!$file || $file[0] == '.' || strrchr($file, '.') !== '.txt') { continue; } $files[] = $file; } closedir($dh); sort($files); foreach ($files as $file) { $this->buildFile($interchange, $dir . '/' . $file); } return $interchange; } /** * @param HTMLPurifier_ConfigSchema_Interchange $interchange * @param string $file */ public function buildFile($interchange, $file) { $parser = new HTMLPurifier_StringHashParser(); $this->build( $interchange, new HTMLPurifier_StringHash($parser->parseFile($file)) ); } /** * Builds an interchange object based on a hash. * @param HTMLPurifier_ConfigSchema_Interchange $interchange HTMLPurifier_ConfigSchema_Interchange object to build * @param HTMLPurifier_StringHash $hash source data * @throws HTMLPurifier_ConfigSchema_Exception */ public function build($interchange, $hash) { if (!$hash instanceof HTMLPurifier_StringHash) { $hash = new HTMLPurifier_StringHash($hash); } if (!isset($hash['ID'])) { throw new HTMLPurifier_ConfigSchema_Exception('Hash does not have any ID'); } if (strpos($hash['ID'], '.') === false) { if (count($hash) == 2 && isset($hash['DESCRIPTION'])) { $hash->offsetGet('DESCRIPTION'); // prevent complaining } else { throw new HTMLPurifier_ConfigSchema_Exception('All directives must have a namespace'); } } else { $this->buildDirective($interchange, $hash); } $this->_findUnused($hash); } /** * @param HTMLPurifier_ConfigSchema_Interchange $interchange * @param HTMLPurifier_StringHash $hash * @throws HTMLPurifier_ConfigSchema_Exception */ public function buildDirective($interchange, $hash) { $directive = new HTMLPurifier_ConfigSchema_Interchange_Directive(); // These are required elements: $directive->id = $this->id($hash->offsetGet('ID')); $id = $directive->id->toString(); // convenience if (isset($hash['TYPE'])) { $type = explode('/', $hash->offsetGet('TYPE')); if (isset($type[1])) { $directive->typeAllowsNull = true; } $directive->type = $type[0]; } else { throw new HTMLPurifier_ConfigSchema_Exception("TYPE in directive hash '$id' not defined"); } if (isset($hash['DEFAULT'])) { try { $directive->default = $this->varParser->parse( $hash->offsetGet('DEFAULT'), $directive->type, $directive->typeAllowsNull ); } catch (HTMLPurifier_VarParserException $e) { throw new HTMLPurifier_ConfigSchema_Exception($e->getMessage() . " in DEFAULT in directive hash '$id'"); } } if (isset($hash['DESCRIPTION'])) { $directive->description = $hash->offsetGet('DESCRIPTION'); } if (isset($hash['ALLOWED'])) { $directive->allowed = $this->lookup($this->evalArray($hash->offsetGet('ALLOWED'))); } if (isset($hash['VALUE-ALIASES'])) { $directive->valueAliases = $this->evalArray($hash->offsetGet('VALUE-ALIASES')); } if (isset($hash['ALIASES'])) { $raw_aliases = trim($hash->offsetGet('ALIASES')); $aliases = preg_split('/\s*,\s*/', $raw_aliases); foreach ($aliases as $alias) { $directive->aliases[] = $this->id($alias); } } if (isset($hash['VERSION'])) { $directive->version = $hash->offsetGet('VERSION'); } if (isset($hash['DEPRECATED-USE'])) { $directive->deprecatedUse = $this->id($hash->offsetGet('DEPRECATED-USE')); } if (isset($hash['DEPRECATED-VERSION'])) { $directive->deprecatedVersion = $hash->offsetGet('DEPRECATED-VERSION'); } if (isset($hash['EXTERNAL'])) { $directive->external = preg_split('/\s*,\s*/', trim($hash->offsetGet('EXTERNAL'))); } $interchange->addDirective($directive); } /** * Evaluates an array PHP code string without array() wrapper * @param string $contents */ protected function evalArray($contents) { return eval('return array(' . $contents . ');'); } /** * Converts an array list into a lookup array. * @param array $array * @return array */ protected function lookup($array) { $ret = array(); foreach ($array as $val) { $ret[$val] = true; } return $ret; } /** * Convenience function that creates an HTMLPurifier_ConfigSchema_Interchange_Id * object based on a string Id. * @param string $id * @return HTMLPurifier_ConfigSchema_Interchange_Id */ protected function id($id) { return HTMLPurifier_ConfigSchema_Interchange_Id::make($id); } /** * Triggers errors for any unused keys passed in the hash; such keys * may indicate typos, missing values, etc. * @param HTMLPurifier_StringHash $hash Hash to check. */ protected function _findUnused($hash) { $accessed = $hash->getAccessed(); foreach ($hash as $k => $v) { if (!isset($accessed[$k])) { trigger_error("String hash key '$k' not used by builder", E_USER_NOTICE); } } } } // vim: et sw=4 sts=4 ConfigSchema/Validator.php 0000644 00000020257 15152171665 0011547 0 ustar 00 <?php /** * Performs validations on HTMLPurifier_ConfigSchema_Interchange * * @note If you see '// handled by InterchangeBuilder', that means a * design decision in that class would prevent this validation from * ever being necessary. We have them anyway, however, for * redundancy. */ class HTMLPurifier_ConfigSchema_Validator { /** * @type HTMLPurifier_ConfigSchema_Interchange */ protected $interchange; /** * @type array */ protected $aliases; /** * Context-stack to provide easy to read error messages. * @type array */ protected $context = array(); /** * to test default's type. * @type HTMLPurifier_VarParser */ protected $parser; public function __construct() { $this->parser = new HTMLPurifier_VarParser(); } /** * Validates a fully-formed interchange object. * @param HTMLPurifier_ConfigSchema_Interchange $interchange * @return bool */ public function validate($interchange) { $this->interchange = $interchange; $this->aliases = array(); // PHP is a bit lax with integer <=> string conversions in // arrays, so we don't use the identical !== comparison foreach ($interchange->directives as $i => $directive) { $id = $directive->id->toString(); if ($i != $id) { $this->error(false, "Integrity violation: key '$i' does not match internal id '$id'"); } $this->validateDirective($directive); } return true; } /** * Validates a HTMLPurifier_ConfigSchema_Interchange_Id object. * @param HTMLPurifier_ConfigSchema_Interchange_Id $id */ public function validateId($id) { $id_string = $id->toString(); $this->context[] = "id '$id_string'"; if (!$id instanceof HTMLPurifier_ConfigSchema_Interchange_Id) { // handled by InterchangeBuilder $this->error(false, 'is not an instance of HTMLPurifier_ConfigSchema_Interchange_Id'); } // keys are now unconstrained (we might want to narrow down to A-Za-z0-9.) // we probably should check that it has at least one namespace $this->with($id, 'key') ->assertNotEmpty() ->assertIsString(); // implicit assertIsString handled by InterchangeBuilder array_pop($this->context); } /** * Validates a HTMLPurifier_ConfigSchema_Interchange_Directive object. * @param HTMLPurifier_ConfigSchema_Interchange_Directive $d */ public function validateDirective($d) { $id = $d->id->toString(); $this->context[] = "directive '$id'"; $this->validateId($d->id); $this->with($d, 'description') ->assertNotEmpty(); // BEGIN - handled by InterchangeBuilder $this->with($d, 'type') ->assertNotEmpty(); $this->with($d, 'typeAllowsNull') ->assertIsBool(); try { // This also tests validity of $d->type $this->parser->parse($d->default, $d->type, $d->typeAllowsNull); } catch (HTMLPurifier_VarParserException $e) { $this->error('default', 'had error: ' . $e->getMessage()); } // END - handled by InterchangeBuilder if (!is_null($d->allowed) || !empty($d->valueAliases)) { // allowed and valueAliases require that we be dealing with // strings, so check for that early. $d_int = HTMLPurifier_VarParser::$types[$d->type]; if (!isset(HTMLPurifier_VarParser::$stringTypes[$d_int])) { $this->error('type', 'must be a string type when used with allowed or value aliases'); } } $this->validateDirectiveAllowed($d); $this->validateDirectiveValueAliases($d); $this->validateDirectiveAliases($d); array_pop($this->context); } /** * Extra validation if $allowed member variable of * HTMLPurifier_ConfigSchema_Interchange_Directive is defined. * @param HTMLPurifier_ConfigSchema_Interchange_Directive $d */ public function validateDirectiveAllowed($d) { if (is_null($d->allowed)) { return; } $this->with($d, 'allowed') ->assertNotEmpty() ->assertIsLookup(); // handled by InterchangeBuilder if (is_string($d->default) && !isset($d->allowed[$d->default])) { $this->error('default', 'must be an allowed value'); } $this->context[] = 'allowed'; foreach ($d->allowed as $val => $x) { if (!is_string($val)) { $this->error("value $val", 'must be a string'); } } array_pop($this->context); } /** * Extra validation if $valueAliases member variable of * HTMLPurifier_ConfigSchema_Interchange_Directive is defined. * @param HTMLPurifier_ConfigSchema_Interchange_Directive $d */ public function validateDirectiveValueAliases($d) { if (is_null($d->valueAliases)) { return; } $this->with($d, 'valueAliases') ->assertIsArray(); // handled by InterchangeBuilder $this->context[] = 'valueAliases'; foreach ($d->valueAliases as $alias => $real) { if (!is_string($alias)) { $this->error("alias $alias", 'must be a string'); } if (!is_string($real)) { $this->error("alias target $real from alias '$alias'", 'must be a string'); } if ($alias === $real) { $this->error("alias '$alias'", "must not be an alias to itself"); } } if (!is_null($d->allowed)) { foreach ($d->valueAliases as $alias => $real) { if (isset($d->allowed[$alias])) { $this->error("alias '$alias'", 'must not be an allowed value'); } elseif (!isset($d->allowed[$real])) { $this->error("alias '$alias'", 'must be an alias to an allowed value'); } } } array_pop($this->context); } /** * Extra validation if $aliases member variable of * HTMLPurifier_ConfigSchema_Interchange_Directive is defined. * @param HTMLPurifier_ConfigSchema_Interchange_Directive $d */ public function validateDirectiveAliases($d) { $this->with($d, 'aliases') ->assertIsArray(); // handled by InterchangeBuilder $this->context[] = 'aliases'; foreach ($d->aliases as $alias) { $this->validateId($alias); $s = $alias->toString(); if (isset($this->interchange->directives[$s])) { $this->error("alias '$s'", 'collides with another directive'); } if (isset($this->aliases[$s])) { $other_directive = $this->aliases[$s]; $this->error("alias '$s'", "collides with alias for directive '$other_directive'"); } $this->aliases[$s] = $d->id->toString(); } array_pop($this->context); } // protected helper functions /** * Convenience function for generating HTMLPurifier_ConfigSchema_ValidatorAtom * for validating simple member variables of objects. * @param $obj * @param $member * @return HTMLPurifier_ConfigSchema_ValidatorAtom */ protected function with($obj, $member) { return new HTMLPurifier_ConfigSchema_ValidatorAtom($this->getFormattedContext(), $obj, $member); } /** * Emits an error, providing helpful context. * @throws HTMLPurifier_ConfigSchema_Exception */ protected function error($target, $msg) { if ($target !== false) { $prefix = ucfirst($target) . ' in ' . $this->getFormattedContext(); } else { $prefix = ucfirst($this->getFormattedContext()); } throw new HTMLPurifier_ConfigSchema_Exception(trim($prefix . ' ' . $msg)); } /** * Returns a formatted context string. * @return string */ protected function getFormattedContext() { return implode(' in ', array_reverse($this->context)); } } // vim: et sw=4 sts=4 ConfigSchema/Exception.php 0000644 00000000242 15152171665 0011550 0 ustar 00 <?php /** * Exceptions related to configuration schema */ class HTMLPurifier_ConfigSchema_Exception extends HTMLPurifier_Exception { } // vim: et sw=4 sts=4 ConfigSchema/ValidatorAtom.php 0000644 00000005444 15152171665 0012371 0 ustar 00 <?php /** * Fluent interface for validating the contents of member variables. * This should be immutable. See HTMLPurifier_ConfigSchema_Validator for * use-cases. We name this an 'atom' because it's ONLY for validations that * are independent and usually scalar. */ class HTMLPurifier_ConfigSchema_ValidatorAtom { /** * @type string */ protected $context; /** * @type object */ protected $obj; /** * @type string */ protected $member; /** * @type mixed */ protected $contents; public function __construct($context, $obj, $member) { $this->context = $context; $this->obj = $obj; $this->member = $member; $this->contents =& $obj->$member; } /** * @return HTMLPurifier_ConfigSchema_ValidatorAtom */ public function assertIsString() { if (!is_string($this->contents)) { $this->error('must be a string'); } return $this; } /** * @return HTMLPurifier_ConfigSchema_ValidatorAtom */ public function assertIsBool() { if (!is_bool($this->contents)) { $this->error('must be a boolean'); } return $this; } /** * @return HTMLPurifier_ConfigSchema_ValidatorAtom */ public function assertIsArray() { if (!is_array($this->contents)) { $this->error('must be an array'); } return $this; } /** * @return HTMLPurifier_ConfigSchema_ValidatorAtom */ public function assertNotNull() { if ($this->contents === null) { $this->error('must not be null'); } return $this; } /** * @return HTMLPurifier_ConfigSchema_ValidatorAtom */ public function assertAlnum() { $this->assertIsString(); if (!ctype_alnum($this->contents)) { $this->error('must be alphanumeric'); } return $this; } /** * @return HTMLPurifier_ConfigSchema_ValidatorAtom */ public function assertNotEmpty() { if (empty($this->contents)) { $this->error('must not be empty'); } return $this; } /** * @return HTMLPurifier_ConfigSchema_ValidatorAtom */ public function assertIsLookup() { $this->assertIsArray(); foreach ($this->contents as $v) { if ($v !== true) { $this->error('must be a lookup array'); } } return $this; } /** * @param string $msg * @throws HTMLPurifier_ConfigSchema_Exception */ protected function error($msg) { throw new HTMLPurifier_ConfigSchema_Exception(ucfirst($this->member) . ' in ' . $this->context . ' ' . $msg); } } // vim: et sw=4 sts=4 ConfigSchema/schema.ser 0000644 00000057176 15152171665 0011076 0 ustar 00 O:25:"HTMLPurifier_ConfigSchema":3:{s:8:"defaults";a:127:{s:19:"Attr.AllowedClasses";N;s:24:"Attr.AllowedFrameTargets";a:0:{}s:15:"Attr.AllowedRel";a:0:{}s:15:"Attr.AllowedRev";a:0:{}s:18:"Attr.ClassUseCDATA";N;s:20:"Attr.DefaultImageAlt";N;s:24:"Attr.DefaultInvalidImage";s:0:"";s:27:"Attr.DefaultInvalidImageAlt";s:13:"Invalid image";s:19:"Attr.DefaultTextDir";s:3:"ltr";s:13:"Attr.EnableID";b:0;s:21:"Attr.ForbiddenClasses";a:0:{}s:13:"Attr.ID.HTML5";N;s:16:"Attr.IDBlacklist";a:0:{}s:22:"Attr.IDBlacklistRegexp";N;s:13:"Attr.IDPrefix";s:0:"";s:18:"Attr.IDPrefixLocal";s:0:"";s:24:"AutoFormat.AutoParagraph";b:0;s:17:"AutoFormat.Custom";a:0:{}s:25:"AutoFormat.DisplayLinkURI";b:0;s:18:"AutoFormat.Linkify";b:0;s:33:"AutoFormat.PurifierLinkify.DocURL";s:3:"#%s";s:26:"AutoFormat.PurifierLinkify";b:0;s:32:"AutoFormat.RemoveEmpty.Predicate";a:4:{s:8:"colgroup";a:0:{}s:2:"th";a:0:{}s:2:"td";a:0:{}s:6:"iframe";a:1:{i:0;s:3:"src";}}s:44:"AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions";a:2:{s:2:"td";b:1;s:2:"th";b:1;}s:33:"AutoFormat.RemoveEmpty.RemoveNbsp";b:0;s:22:"AutoFormat.RemoveEmpty";b:0;s:39:"AutoFormat.RemoveSpansWithoutAttributes";b:0;s:19:"CSS.AllowDuplicates";b:0;s:18:"CSS.AllowImportant";b:0;s:15:"CSS.AllowTricky";b:0;s:16:"CSS.AllowedFonts";N;s:21:"CSS.AllowedProperties";N;s:17:"CSS.DefinitionRev";i:1;s:23:"CSS.ForbiddenProperties";a:0:{}s:16:"CSS.MaxImgLength";s:6:"1200px";s:15:"CSS.Proprietary";b:0;s:11:"CSS.Trusted";b:0;s:20:"Cache.DefinitionImpl";s:10:"Serializer";s:20:"Cache.SerializerPath";N;s:27:"Cache.SerializerPermissions";i:493;s:22:"Core.AggressivelyFixLt";b:1;s:29:"Core.AggressivelyRemoveScript";b:1;s:28:"Core.AllowHostnameUnderscore";b:0;s:23:"Core.AllowParseManyTags";b:0;s:18:"Core.CollectErrors";b:0;s:18:"Core.ColorKeywords";a:148:{s:9:"aliceblue";s:7:"#F0F8FF";s:12:"antiquewhite";s:7:"#FAEBD7";s:4:"aqua";s:7:"#00FFFF";s:10:"aquamarine";s:7:"#7FFFD4";s:5:"azure";s:7:"#F0FFFF";s:5:"beige";s:7:"#F5F5DC";s:6:"bisque";s:7:"#FFE4C4";s:5:"black";s:7:"#000000";s:14:"blanchedalmond";s:7:"#FFEBCD";s:4:"blue";s:7:"#0000FF";s:10:"blueviolet";s:7:"#8A2BE2";s:5:"brown";s:7:"#A52A2A";s:9:"burlywood";s:7:"#DEB887";s:9:"cadetblue";s:7:"#5F9EA0";s:10:"chartreuse";s:7:"#7FFF00";s:9:"chocolate";s:7:"#D2691E";s:5:"coral";s:7:"#FF7F50";s:14:"cornflowerblue";s:7:"#6495ED";s:8:"cornsilk";s:7:"#FFF8DC";s:7:"crimson";s:7:"#DC143C";s:4:"cyan";s:7:"#00FFFF";s:8:"darkblue";s:7:"#00008B";s:8:"darkcyan";s:7:"#008B8B";s:13:"darkgoldenrod";s:7:"#B8860B";s:8:"darkgray";s:7:"#A9A9A9";s:8:"darkgrey";s:7:"#A9A9A9";s:9:"darkgreen";s:7:"#006400";s:9:"darkkhaki";s:7:"#BDB76B";s:11:"darkmagenta";s:7:"#8B008B";s:14:"darkolivegreen";s:7:"#556B2F";s:10:"darkorange";s:7:"#FF8C00";s:10:"darkorchid";s:7:"#9932CC";s:7:"darkred";s:7:"#8B0000";s:10:"darksalmon";s:7:"#E9967A";s:12:"darkseagreen";s:7:"#8FBC8F";s:13:"darkslateblue";s:7:"#483D8B";s:13:"darkslategray";s:7:"#2F4F4F";s:13:"darkslategrey";s:7:"#2F4F4F";s:13:"darkturquoise";s:7:"#00CED1";s:10:"darkviolet";s:7:"#9400D3";s:8:"deeppink";s:7:"#FF1493";s:11:"deepskyblue";s:7:"#00BFFF";s:7:"dimgray";s:7:"#696969";s:7:"dimgrey";s:7:"#696969";s:10:"dodgerblue";s:7:"#1E90FF";s:9:"firebrick";s:7:"#B22222";s:11:"floralwhite";s:7:"#FFFAF0";s:11:"forestgreen";s:7:"#228B22";s:7:"fuchsia";s:7:"#FF00FF";s:9:"gainsboro";s:7:"#DCDCDC";s:10:"ghostwhite";s:7:"#F8F8FF";s:4:"gold";s:7:"#FFD700";s:9:"goldenrod";s:7:"#DAA520";s:4:"gray";s:7:"#808080";s:4:"grey";s:7:"#808080";s:5:"green";s:7:"#008000";s:11:"greenyellow";s:7:"#ADFF2F";s:8:"honeydew";s:7:"#F0FFF0";s:7:"hotpink";s:7:"#FF69B4";s:9:"indianred";s:7:"#CD5C5C";s:6:"indigo";s:7:"#4B0082";s:5:"ivory";s:7:"#FFFFF0";s:5:"khaki";s:7:"#F0E68C";s:8:"lavender";s:7:"#E6E6FA";s:13:"lavenderblush";s:7:"#FFF0F5";s:9:"lawngreen";s:7:"#7CFC00";s:12:"lemonchiffon";s:7:"#FFFACD";s:9:"lightblue";s:7:"#ADD8E6";s:10:"lightcoral";s:7:"#F08080";s:9:"lightcyan";s:7:"#E0FFFF";s:20:"lightgoldenrodyellow";s:7:"#FAFAD2";s:9:"lightgray";s:7:"#D3D3D3";s:9:"lightgrey";s:7:"#D3D3D3";s:10:"lightgreen";s:7:"#90EE90";s:9:"lightpink";s:7:"#FFB6C1";s:11:"lightsalmon";s:7:"#FFA07A";s:13:"lightseagreen";s:7:"#20B2AA";s:12:"lightskyblue";s:7:"#87CEFA";s:14:"lightslategray";s:7:"#778899";s:14:"lightslategrey";s:7:"#778899";s:14:"lightsteelblue";s:7:"#B0C4DE";s:11:"lightyellow";s:7:"#FFFFE0";s:4:"lime";s:7:"#00FF00";s:9:"limegreen";s:7:"#32CD32";s:5:"linen";s:7:"#FAF0E6";s:7:"magenta";s:7:"#FF00FF";s:6:"maroon";s:7:"#800000";s:16:"mediumaquamarine";s:7:"#66CDAA";s:10:"mediumblue";s:7:"#0000CD";s:12:"mediumorchid";s:7:"#BA55D3";s:12:"mediumpurple";s:7:"#9370DB";s:14:"mediumseagreen";s:7:"#3CB371";s:15:"mediumslateblue";s:7:"#7B68EE";s:17:"mediumspringgreen";s:7:"#00FA9A";s:15:"mediumturquoise";s:7:"#48D1CC";s:15:"mediumvioletred";s:7:"#C71585";s:12:"midnightblue";s:7:"#191970";s:9:"mintcream";s:7:"#F5FFFA";s:9:"mistyrose";s:7:"#FFE4E1";s:8:"moccasin";s:7:"#FFE4B5";s:11:"navajowhite";s:7:"#FFDEAD";s:4:"navy";s:7:"#000080";s:7:"oldlace";s:7:"#FDF5E6";s:5:"olive";s:7:"#808000";s:9:"olivedrab";s:7:"#6B8E23";s:6:"orange";s:7:"#FFA500";s:9:"orangered";s:7:"#FF4500";s:6:"orchid";s:7:"#DA70D6";s:13:"palegoldenrod";s:7:"#EEE8AA";s:9:"palegreen";s:7:"#98FB98";s:13:"paleturquoise";s:7:"#AFEEEE";s:13:"palevioletred";s:7:"#DB7093";s:10:"papayawhip";s:7:"#FFEFD5";s:9:"peachpuff";s:7:"#FFDAB9";s:4:"peru";s:7:"#CD853F";s:4:"pink";s:7:"#FFC0CB";s:4:"plum";s:7:"#DDA0DD";s:10:"powderblue";s:7:"#B0E0E6";s:6:"purple";s:7:"#800080";s:13:"rebeccapurple";s:7:"#663399";s:3:"red";s:7:"#FF0000";s:9:"rosybrown";s:7:"#BC8F8F";s:9:"royalblue";s:7:"#4169E1";s:11:"saddlebrown";s:7:"#8B4513";s:6:"salmon";s:7:"#FA8072";s:10:"sandybrown";s:7:"#F4A460";s:8:"seagreen";s:7:"#2E8B57";s:8:"seashell";s:7:"#FFF5EE";s:6:"sienna";s:7:"#A0522D";s:6:"silver";s:7:"#C0C0C0";s:7:"skyblue";s:7:"#87CEEB";s:9:"slateblue";s:7:"#6A5ACD";s:9:"slategray";s:7:"#708090";s:9:"slategrey";s:7:"#708090";s:4:"snow";s:7:"#FFFAFA";s:11:"springgreen";s:7:"#00FF7F";s:9:"steelblue";s:7:"#4682B4";s:3:"tan";s:7:"#D2B48C";s:4:"teal";s:7:"#008080";s:7:"thistle";s:7:"#D8BFD8";s:6:"tomato";s:7:"#FF6347";s:9:"turquoise";s:7:"#40E0D0";s:6:"violet";s:7:"#EE82EE";s:5:"wheat";s:7:"#F5DEB3";s:5:"white";s:7:"#FFFFFF";s:10:"whitesmoke";s:7:"#F5F5F5";s:6:"yellow";s:7:"#FFFF00";s:11:"yellowgreen";s:7:"#9ACD32";}s:30:"Core.ConvertDocumentToFragment";b:1;s:36:"Core.DirectLexLineNumberSyncInterval";i:0;s:20:"Core.DisableExcludes";b:0;s:15:"Core.EnableIDNA";b:0;s:13:"Core.Encoding";s:5:"utf-8";s:26:"Core.EscapeInvalidChildren";b:0;s:22:"Core.EscapeInvalidTags";b:0;s:29:"Core.EscapeNonASCIICharacters";b:0;s:19:"Core.HiddenElements";a:2:{s:6:"script";b:1;s:5:"style";b:1;}s:13:"Core.Language";s:2:"en";s:24:"Core.LegacyEntityDecoder";b:0;s:14:"Core.LexerImpl";N;s:24:"Core.MaintainLineNumbers";N;s:22:"Core.NormalizeNewlines";b:1;s:21:"Core.RemoveInvalidImg";b:1;s:33:"Core.RemoveProcessingInstructions";b:0;s:25:"Core.RemoveScriptContents";N;s:13:"Filter.Custom";a:0:{}s:34:"Filter.ExtractStyleBlocks.Escaping";b:1;s:31:"Filter.ExtractStyleBlocks.Scope";N;s:34:"Filter.ExtractStyleBlocks.TidyImpl";N;s:25:"Filter.ExtractStyleBlocks";b:0;s:14:"Filter.YouTube";b:0;s:12:"HTML.Allowed";N;s:22:"HTML.AllowedAttributes";N;s:20:"HTML.AllowedComments";a:0:{}s:26:"HTML.AllowedCommentsRegexp";N;s:20:"HTML.AllowedElements";N;s:19:"HTML.AllowedModules";N;s:23:"HTML.Attr.Name.UseCDATA";b:0;s:17:"HTML.BlockWrapper";s:1:"p";s:16:"HTML.CoreModules";a:7:{s:9:"Structure";b:1;s:4:"Text";b:1;s:9:"Hypertext";b:1;s:4:"List";b:1;s:22:"NonXMLCommonAttributes";b:1;s:19:"XMLCommonAttributes";b:1;s:16:"CommonAttributes";b:1;}s:18:"HTML.CustomDoctype";N;s:17:"HTML.DefinitionID";N;s:18:"HTML.DefinitionRev";i:1;s:12:"HTML.Doctype";N;s:25:"HTML.FlashAllowFullScreen";b:0;s:24:"HTML.ForbiddenAttributes";a:0:{}s:22:"HTML.ForbiddenElements";a:0:{}s:10:"HTML.Forms";b:0;s:17:"HTML.MaxImgLength";i:1200;s:13:"HTML.Nofollow";b:0;s:11:"HTML.Parent";s:3:"div";s:16:"HTML.Proprietary";b:0;s:14:"HTML.SafeEmbed";b:0;s:15:"HTML.SafeIframe";b:0;s:15:"HTML.SafeObject";b:0;s:18:"HTML.SafeScripting";a:0:{}s:11:"HTML.Strict";b:0;s:16:"HTML.TargetBlank";b:0;s:19:"HTML.TargetNoopener";b:1;s:21:"HTML.TargetNoreferrer";b:1;s:12:"HTML.TidyAdd";a:0:{}s:14:"HTML.TidyLevel";s:6:"medium";s:15:"HTML.TidyRemove";a:0:{}s:12:"HTML.Trusted";b:0;s:10:"HTML.XHTML";b:1;s:28:"Output.CommentScriptContents";b:1;s:19:"Output.FixInnerHTML";b:1;s:18:"Output.FlashCompat";b:0;s:14:"Output.Newline";N;s:15:"Output.SortAttr";b:0;s:17:"Output.TidyFormat";b:0;s:17:"Test.ForceNoIconv";b:0;s:18:"URI.AllowedSchemes";a:7:{s:4:"http";b:1;s:5:"https";b:1;s:6:"mailto";b:1;s:3:"ftp";b:1;s:4:"nntp";b:1;s:4:"news";b:1;s:3:"tel";b:1;}s:8:"URI.Base";N;s:17:"URI.DefaultScheme";s:4:"http";s:16:"URI.DefinitionID";N;s:17:"URI.DefinitionRev";i:1;s:11:"URI.Disable";b:0;s:19:"URI.DisableExternal";b:0;s:28:"URI.DisableExternalResources";b:0;s:20:"URI.DisableResources";b:0;s:8:"URI.Host";N;s:17:"URI.HostBlacklist";a:0:{}s:16:"URI.MakeAbsolute";b:0;s:9:"URI.Munge";N;s:18:"URI.MungeResources";b:0;s:18:"URI.MungeSecretKey";N;s:26:"URI.OverrideAllowedSchemes";b:1;s:20:"URI.SafeIframeRegexp";N;}s:12:"defaultPlist";O:25:"HTMLPurifier_PropertyList":3:{s:7:"