From 9655c53a3bbf6a6982d742df2a98bad9d49075dc Mon Sep 17 00:00:00 2001 From: chicpro Date: Mon, 29 Aug 2016 11:03:17 +0900 Subject: [PATCH] =?UTF-8?q?htmlpurifier=204.8=20=EC=97=85=EB=8D=B0?= =?UTF-8?q?=EC=9D=B4=ED=8A=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../htmlpurifier/HTMLPurifier.standalone.php | 308 +++++++++++++++--- .../HTMLPurifier/ConfigSchema/schema.ser | Bin 15305 -> 15598 bytes .../ConfigSchema/schema/Attr.ID.HTML5.txt | 10 + .../schema/CSS.AllowDuplicates.txt | 11 + .../schema/Cache.SerializerPermissions.txt | 7 +- .../schema/HTML.TargetNoreferrer.txt | 9 + .../schema/URI.AllowedSchemes.txt | 1 + .../HTMLPurifier/Printer/ConfigForm.php | 4 + 8 files changed, 300 insertions(+), 50 deletions(-) create mode 100644 plugin/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.ID.HTML5.txt create mode 100644 plugin/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.AllowDuplicates.txt create mode 100644 plugin/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoreferrer.txt diff --git a/plugin/htmlpurifier/HTMLPurifier.standalone.php b/plugin/htmlpurifier/HTMLPurifier.standalone.php index 423dcbf01..f2005bf5b 100644 --- a/plugin/htmlpurifier/HTMLPurifier.standalone.php +++ b/plugin/htmlpurifier/HTMLPurifier.standalone.php @@ -7,7 +7,7 @@ * primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS * FILE, changes will be overwritten the next time the script is run. * - * @version 4.7.0 + * @version 4.8.0 * * @warning * You must *not* include any other HTML Purifier files before this file, @@ -39,7 +39,7 @@ */ /* - HTML Purifier 4.7.0 - Standards Compliant HTML Filtering + HTML Purifier 4.8.0 - Standards Compliant HTML Filtering Copyright (C) 2006-2008 Edward Z. Yang This library is free software; you can redistribute it and/or @@ -78,12 +78,12 @@ class HTMLPurifier * Version of HTML Purifier. * @type string */ - public $version = '4.7.0'; + public $version = '4.8.0'; /** * Constant with version of HTML Purifier. */ - const VERSION = '4.7.0'; + const VERSION = '4.8.0'; /** * Global configuration object. @@ -124,7 +124,7 @@ class HTMLPurifier /** * Initializes the purifier. * - * @param HTMLPurifier_Config $config Optional HTMLPurifier_Config object + * @param HTMLPurifier_Config|mixed $config Optional HTMLPurifier_Config object * for all instances of the purifier, if omitted, a default * configuration is supplied (which can be overridden on a * per-use basis). @@ -406,6 +406,11 @@ class HTMLPurifier_AttrCollections * @param HTMLPurifier_HTMLModule[] $modules Hash array of HTMLPurifier_HTMLModule members */ public function __construct($attr_types, $modules) + { + $this->doConstruct($attr_types, $modules); + } + + public function doConstruct($attr_types, $modules) { // load extensions from the modules foreach ($modules as $module) { @@ -1557,6 +1562,19 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition ); $this->info['page-break-inside'] = new HTMLPurifier_AttrDef_Enum(array('auto', 'avoid')); + $border_radius = new HTMLPurifier_AttrDef_CSS_Composite( + array( + new HTMLPurifier_AttrDef_CSS_Percentage(true), // disallow negative + new HTMLPurifier_AttrDef_CSS_Length('0') // disallow negative + )); + + $this->info['border-top-left-radius'] = + $this->info['border-top-right-radius'] = + $this->info['border-bottom-right-radius'] = + $this->info['border-bottom-left-radius'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_radius, 2); + // TODO: support SLASH syntax + $this->info['border-radius'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_radius, 4); + } /** @@ -1736,7 +1754,7 @@ class HTMLPurifier_Config * HTML Purifier's version * @type string */ - public $version = '4.7.0'; + public $version = '4.8.0'; /** * Whether or not to automatically finalize @@ -3198,7 +3216,7 @@ abstract class HTMLPurifier_DefinitionCache /** * Clears all expired (older version or revision) objects from cache - * @note Be carefuly implementing this method as flush. Flush must + * @note Be careful implementing this method as flush. Flush must * not interfere with other Definition types, and cleanup() * should not be repeatedly called by userland code. * @param HTMLPurifier_Config $config @@ -6293,6 +6311,11 @@ class HTMLPurifier_HTMLModuleManager if ($config->get('HTML.TargetBlank')) { $modules[] = 'TargetBlank'; } + // NB: HTML.TargetNoreferrer must be AFTER HTML.TargetBlank + // so that its post-attr-transform gets run afterwards. + if ($config->get('HTML.TargetNoreferrer')) { + $modules[] = 'TargetNoreferrer'; + } // merge in custom modules $modules = array_merge($modules, $this->userModules); @@ -7743,12 +7766,17 @@ class HTMLPurifier_Lexer public function extractBody($html) { $matches = array(); - $result = preg_match('!]*>(.*)!is', $html, $matches); + $result = preg_match('|(.*?)]*>(.*)|is', $html, $matches); if ($result) { - return $matches[1]; - } else { - return $html; + // Make sure it's not in a comment + $comment_start = strrpos($matches[1], ''); + if ($comment_start === false || + ($comment_end !== false && $comment_end > $comment_start)) { + return $matches[2]; + } } + return $html; } } @@ -10073,6 +10101,7 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef $css = $this->parseCDATA($css); $definition = $config->getCSSDefinition(); + $allow_duplicates = $config->get("CSS.AllowDuplicates"); // we're going to break the spec and explode by semicolons. // This is because semicolon rarely appears in escaped form @@ -10082,6 +10111,7 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef $declarations = explode(';', $css); $propvalues = array(); + $new_declarations = ''; /** * Name of the current CSS property being validated. @@ -10131,7 +10161,11 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef if ($result === false) { continue; } - $propvalues[$property] = $result; + if ($allow_duplicates) { + $new_declarations .= "$property:$result;"; + } else { + $propvalues[$property] = $result; + } } $context->destroy('CurrentCSSProperty'); @@ -10140,7 +10174,6 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef // slightly inefficient, but it's the only way of getting rid of // duplicates. Perhaps config to optimize it, but not now. - $new_declarations = ''; foreach ($propvalues as $prop => $value) { $new_declarations .= "$prop:$value;"; } @@ -12251,6 +12284,9 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI return false; } $uri_string = substr($uri_string, 4); + if (strlen($uri_string) == 0) { + return false; + } $new_length = strlen($uri_string) - 1; if ($uri_string[$new_length] != ')') { return false; @@ -12625,18 +12661,26 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef // we purposely avoid using regex, hopefully this is faster - if (ctype_alpha($id)) { - $result = true; - } else { - if (!ctype_alpha(@$id[0])) { + if ($config->get('Attr.ID.HTML5') === true) { + if (preg_match('/[\t\n\x0b\x0c ]/', $id)) { return false; } - // primitive style of regexps, I suppose - $trim = trim( - $id, - 'A..Za..z0..9:-._' - ); - $result = ($trim === ''); + } else { + if (ctype_alpha($id)) { + // OK + } else { + if (!ctype_alpha(@$id[0])) { + return false; + } + // primitive style of regexps, I suppose + $trim = trim( + $id, + 'A..Za..z0..9:-._' + ); + if ($trim !== '') { + return false; + } + } } $regexp = $config->get('Attr.IDBlacklistRegexp'); @@ -12644,14 +12688,14 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef return false; } - if (!$this->selector && $result) { + if (!$this->selector) { $id_accumulator->add($id); } // if no change was made to the ID, return the result // else, return the new id if stripping whitespace made it // valid, or return false. - return $result ? $id : false; + return $id; } } @@ -13024,24 +13068,33 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef // fairly well supported. $underscore = $config->get('Core.AllowHostnameUnderscore') ? '_' : ''; + // Based off of RFC 1738, but amended so that + // as per RFC 3696, the top label need only not be all numeric. // The productions describing this are: $a = '[a-z]'; // alpha $an = '[a-z0-9]'; // alphanum $and = "[a-z0-9-$underscore]"; // alphanum | "-" // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum - $domainlabel = "$an($and*$an)?"; - // toplabel = alpha | alpha *( alphanum | "-" ) alphanum - $toplabel = "$a($and*$an)?"; + $domainlabel = "$an(?:$and*$an)?"; + // AMENDED as per RFC 3696 + // toplabel = alphanum | alphanum *( alphanum | "-" ) alphanum + // side condition: not all numeric + $toplabel = "$an(?:$and*$an)?"; // hostname = *( domainlabel "." ) toplabel [ "." ] - if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) { - return $string; + if (preg_match("/^(?:$domainlabel\.)*($toplabel)\.?$/i", $string, $matches)) { + if (!ctype_digit($matches[1])) { + return $string; + } } + // PHP 5.3 and later support this functionality natively + if (function_exists('idn_to_ascii')) { + return idn_to_ascii($string); + // If we have Net_IDNA2 support, we can support IRIs by // punycoding them. (This is the most portable thing to do, // since otherwise we have to assume browsers support - - if ($config->get('Core.EnableIDNA')) { + } elseif ($config->get('Core.EnableIDNA')) { $idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true)); // we need to encode each period separately $parts = explode('.', $string); @@ -13505,8 +13558,7 @@ class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform if ($src) { $alt = $config->get('Attr.DefaultImageAlt'); if ($alt === null) { - // truncate if the alt is too long - $attr['alt'] = substr(basename($attr['src']), 0, 40); + $attr['alt'] = basename($attr['src']); } else { $attr['alt'] = $alt; } @@ -14053,6 +14105,44 @@ class HTMLPurifier_AttrTransform_TargetBlank extends HTMLPurifier_AttrTransform +// must be called POST validation + +/** + * Adds rel="noreferrer" to any links which target a different window + * than the current one. This is used to prevent malicious websites + * from silently replacing the original window, which could be used + * to do phishing. + * This transform is controlled by %HTML.TargetNoreferrer. + */ +class HTMLPurifier_AttrTransform_TargetNoreferrer extends HTMLPurifier_AttrTransform +{ + /** + * @param array $attr + * @param HTMLPurifier_Config $config + * @param HTMLPurifier_Context $context + * @return array + */ + public function transform($attr, $config, $context) + { + if (isset($attr['rel'])) { + $rels = explode(' ', $attr['rel']); + } else { + $rels = array(); + } + if (isset($attr['target']) && !in_array('noreferrer', $rels)) { + $rels[] = 'noreferrer'; + } + if (!empty($rels) || isset($attr['rel'])) { + $attr['rel'] = implode(' ', $rels); + } + + return $attr; + } +} + + + + /** * Sets height/width defaults for