From 9655c53a3bbf6a6982d742df2a98bad9d49075dc Mon Sep 17 00:00:00 2001
From: chicpro
Date: Mon, 29 Aug 2016 11:03:17 +0900
Subject: [PATCH] =?UTF-8?q?htmlpurifier=204.8=20=EC=97=85=EB=8D=B0?=
=?UTF-8?q?=EC=9D=B4=ED=8A=B8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../htmlpurifier/HTMLPurifier.standalone.php | 308 +++++++++++++++---
.../HTMLPurifier/ConfigSchema/schema.ser | Bin 15305 -> 15598 bytes
.../ConfigSchema/schema/Attr.ID.HTML5.txt | 10 +
.../schema/CSS.AllowDuplicates.txt | 11 +
.../schema/Cache.SerializerPermissions.txt | 7 +-
.../schema/HTML.TargetNoreferrer.txt | 9 +
.../schema/URI.AllowedSchemes.txt | 1 +
.../HTMLPurifier/Printer/ConfigForm.php | 4 +
8 files changed, 300 insertions(+), 50 deletions(-)
create mode 100644 plugin/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/Attr.ID.HTML5.txt
create mode 100644 plugin/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/CSS.AllowDuplicates.txt
create mode 100644 plugin/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoreferrer.txt
diff --git a/plugin/htmlpurifier/HTMLPurifier.standalone.php b/plugin/htmlpurifier/HTMLPurifier.standalone.php
index 423dcbf01..f2005bf5b 100644
--- a/plugin/htmlpurifier/HTMLPurifier.standalone.php
+++ b/plugin/htmlpurifier/HTMLPurifier.standalone.php
@@ -7,7 +7,7 @@
* primary concern and you are using an opcode cache. PLEASE DO NOT EDIT THIS
* FILE, changes will be overwritten the next time the script is run.
*
- * @version 4.7.0
+ * @version 4.8.0
*
* @warning
* You must *not* include any other HTML Purifier files before this file,
@@ -39,7 +39,7 @@
*/
/*
- HTML Purifier 4.7.0 - Standards Compliant HTML Filtering
+ HTML Purifier 4.8.0 - Standards Compliant HTML Filtering
Copyright (C) 2006-2008 Edward Z. Yang
This library is free software; you can redistribute it and/or
@@ -78,12 +78,12 @@ class HTMLPurifier
* Version of HTML Purifier.
* @type string
*/
- public $version = '4.7.0';
+ public $version = '4.8.0';
/**
* Constant with version of HTML Purifier.
*/
- const VERSION = '4.7.0';
+ const VERSION = '4.8.0';
/**
* Global configuration object.
@@ -124,7 +124,7 @@ class HTMLPurifier
/**
* Initializes the purifier.
*
- * @param HTMLPurifier_Config $config Optional HTMLPurifier_Config object
+ * @param HTMLPurifier_Config|mixed $config Optional HTMLPurifier_Config object
* for all instances of the purifier, if omitted, a default
* configuration is supplied (which can be overridden on a
* per-use basis).
@@ -406,6 +406,11 @@ class HTMLPurifier_AttrCollections
* @param HTMLPurifier_HTMLModule[] $modules Hash array of HTMLPurifier_HTMLModule members
*/
public function __construct($attr_types, $modules)
+ {
+ $this->doConstruct($attr_types, $modules);
+ }
+
+ public function doConstruct($attr_types, $modules)
{
// load extensions from the modules
foreach ($modules as $module) {
@@ -1557,6 +1562,19 @@ class HTMLPurifier_CSSDefinition extends HTMLPurifier_Definition
);
$this->info['page-break-inside'] = new HTMLPurifier_AttrDef_Enum(array('auto', 'avoid'));
+ $border_radius = new HTMLPurifier_AttrDef_CSS_Composite(
+ array(
+ new HTMLPurifier_AttrDef_CSS_Percentage(true), // disallow negative
+ new HTMLPurifier_AttrDef_CSS_Length('0') // disallow negative
+ ));
+
+ $this->info['border-top-left-radius'] =
+ $this->info['border-top-right-radius'] =
+ $this->info['border-bottom-right-radius'] =
+ $this->info['border-bottom-left-radius'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_radius, 2);
+ // TODO: support SLASH syntax
+ $this->info['border-radius'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_radius, 4);
+
}
/**
@@ -1736,7 +1754,7 @@ class HTMLPurifier_Config
* HTML Purifier's version
* @type string
*/
- public $version = '4.7.0';
+ public $version = '4.8.0';
/**
* Whether or not to automatically finalize
@@ -3198,7 +3216,7 @@ abstract class HTMLPurifier_DefinitionCache
/**
* Clears all expired (older version or revision) objects from cache
- * @note Be carefuly implementing this method as flush. Flush must
+ * @note Be careful implementing this method as flush. Flush must
* not interfere with other Definition types, and cleanup()
* should not be repeatedly called by userland code.
* @param HTMLPurifier_Config $config
@@ -6293,6 +6311,11 @@ class HTMLPurifier_HTMLModuleManager
if ($config->get('HTML.TargetBlank')) {
$modules[] = 'TargetBlank';
}
+ // NB: HTML.TargetNoreferrer must be AFTER HTML.TargetBlank
+ // so that its post-attr-transform gets run afterwards.
+ if ($config->get('HTML.TargetNoreferrer')) {
+ $modules[] = 'TargetNoreferrer';
+ }
// merge in custom modules
$modules = array_merge($modules, $this->userModules);
@@ -7743,12 +7766,17 @@ class HTMLPurifier_Lexer
public function extractBody($html)
{
$matches = array();
- $result = preg_match('!]*>(.*)!is', $html, $matches);
+ $result = preg_match('|(.*?)]*>(.*)|is', $html, $matches);
if ($result) {
- return $matches[1];
- } else {
- return $html;
+ // Make sure it's not in a comment
+ $comment_start = strrpos($matches[1], '');
+ if ($comment_start === false ||
+ ($comment_end !== false && $comment_end > $comment_start)) {
+ return $matches[2];
+ }
}
+ return $html;
}
}
@@ -10073,6 +10101,7 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
$css = $this->parseCDATA($css);
$definition = $config->getCSSDefinition();
+ $allow_duplicates = $config->get("CSS.AllowDuplicates");
// we're going to break the spec and explode by semicolons.
// This is because semicolon rarely appears in escaped form
@@ -10082,6 +10111,7 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
$declarations = explode(';', $css);
$propvalues = array();
+ $new_declarations = '';
/**
* Name of the current CSS property being validated.
@@ -10131,7 +10161,11 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
if ($result === false) {
continue;
}
- $propvalues[$property] = $result;
+ if ($allow_duplicates) {
+ $new_declarations .= "$property:$result;";
+ } else {
+ $propvalues[$property] = $result;
+ }
}
$context->destroy('CurrentCSSProperty');
@@ -10140,7 +10174,6 @@ class HTMLPurifier_AttrDef_CSS extends HTMLPurifier_AttrDef
// slightly inefficient, but it's the only way of getting rid of
// duplicates. Perhaps config to optimize it, but not now.
- $new_declarations = '';
foreach ($propvalues as $prop => $value) {
$new_declarations .= "$prop:$value;";
}
@@ -12251,6 +12284,9 @@ class HTMLPurifier_AttrDef_CSS_URI extends HTMLPurifier_AttrDef_URI
return false;
}
$uri_string = substr($uri_string, 4);
+ if (strlen($uri_string) == 0) {
+ return false;
+ }
$new_length = strlen($uri_string) - 1;
if ($uri_string[$new_length] != ')') {
return false;
@@ -12625,18 +12661,26 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
// we purposely avoid using regex, hopefully this is faster
- if (ctype_alpha($id)) {
- $result = true;
- } else {
- if (!ctype_alpha(@$id[0])) {
+ if ($config->get('Attr.ID.HTML5') === true) {
+ if (preg_match('/[\t\n\x0b\x0c ]/', $id)) {
return false;
}
- // primitive style of regexps, I suppose
- $trim = trim(
- $id,
- 'A..Za..z0..9:-._'
- );
- $result = ($trim === '');
+ } else {
+ if (ctype_alpha($id)) {
+ // OK
+ } else {
+ if (!ctype_alpha(@$id[0])) {
+ return false;
+ }
+ // primitive style of regexps, I suppose
+ $trim = trim(
+ $id,
+ 'A..Za..z0..9:-._'
+ );
+ if ($trim !== '') {
+ return false;
+ }
+ }
}
$regexp = $config->get('Attr.IDBlacklistRegexp');
@@ -12644,14 +12688,14 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef
return false;
}
- if (!$this->selector && $result) {
+ if (!$this->selector) {
$id_accumulator->add($id);
}
// if no change was made to the ID, return the result
// else, return the new id if stripping whitespace made it
// valid, or return false.
- return $result ? $id : false;
+ return $id;
}
}
@@ -13024,24 +13068,33 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef
// fairly well supported.
$underscore = $config->get('Core.AllowHostnameUnderscore') ? '_' : '';
+ // Based off of RFC 1738, but amended so that
+ // as per RFC 3696, the top label need only not be all numeric.
// The productions describing this are:
$a = '[a-z]'; // alpha
$an = '[a-z0-9]'; // alphanum
$and = "[a-z0-9-$underscore]"; // alphanum | "-"
// domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
- $domainlabel = "$an($and*$an)?";
- // toplabel = alpha | alpha *( alphanum | "-" ) alphanum
- $toplabel = "$a($and*$an)?";
+ $domainlabel = "$an(?:$and*$an)?";
+ // AMENDED as per RFC 3696
+ // toplabel = alphanum | alphanum *( alphanum | "-" ) alphanum
+ // side condition: not all numeric
+ $toplabel = "$an(?:$and*$an)?";
// hostname = *( domainlabel "." ) toplabel [ "." ]
- if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) {
- return $string;
+ if (preg_match("/^(?:$domainlabel\.)*($toplabel)\.?$/i", $string, $matches)) {
+ if (!ctype_digit($matches[1])) {
+ return $string;
+ }
}
+ // PHP 5.3 and later support this functionality natively
+ if (function_exists('idn_to_ascii')) {
+ return idn_to_ascii($string);
+
// If we have Net_IDNA2 support, we can support IRIs by
// punycoding them. (This is the most portable thing to do,
// since otherwise we have to assume browsers support
-
- if ($config->get('Core.EnableIDNA')) {
+ } elseif ($config->get('Core.EnableIDNA')) {
$idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true));
// we need to encode each period separately
$parts = explode('.', $string);
@@ -13505,8 +13558,7 @@ class HTMLPurifier_AttrTransform_ImgRequired extends HTMLPurifier_AttrTransform
if ($src) {
$alt = $config->get('Attr.DefaultImageAlt');
if ($alt === null) {
- // truncate if the alt is too long
- $attr['alt'] = substr(basename($attr['src']), 0, 40);
+ $attr['alt'] = basename($attr['src']);
} else {
$attr['alt'] = $alt;
}
@@ -14053,6 +14105,44 @@ class HTMLPurifier_AttrTransform_TargetBlank extends HTMLPurifier_AttrTransform
+// must be called POST validation
+
+/**
+ * Adds rel="noreferrer" to any links which target a different window
+ * than the current one. This is used to prevent malicious websites
+ * from silently replacing the original window, which could be used
+ * to do phishing.
+ * This transform is controlled by %HTML.TargetNoreferrer.
+ */
+class HTMLPurifier_AttrTransform_TargetNoreferrer extends HTMLPurifier_AttrTransform
+{
+ /**
+ * @param array $attr
+ * @param HTMLPurifier_Config $config
+ * @param HTMLPurifier_Context $context
+ * @return array
+ */
+ public function transform($attr, $config, $context)
+ {
+ if (isset($attr['rel'])) {
+ $rels = explode(' ', $attr['rel']);
+ } else {
+ $rels = array();
+ }
+ if (isset($attr['target']) && !in_array('noreferrer', $rels)) {
+ $rels[] = 'noreferrer';
+ }
+ if (!empty($rels) || isset($attr['rel'])) {
+ $attr['rel'] = implode(' ', $rels);
+ }
+
+ return $attr;
+ }
+}
+
+
+
+
/**
* Sets height/width defaults for
+
+ In HTML Purifier 4.8.0, this also supports NULL,
+ which means that no chmod'ing or directory creation shall
+ occur.
+
--# vim: et sw=4 sts=4
diff --git a/plugin/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoreferrer.txt b/plugin/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoreferrer.txt
new file mode 100644
index 000000000..993a81704
--- /dev/null
+++ b/plugin/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/HTML.TargetNoreferrer.txt
@@ -0,0 +1,9 @@
+HTML.TargetNoreferrer
+TYPE: bool
+VERSION: 4.8.0
+DEFAULT: TRUE
+--DESCRIPTION--
+If enabled, noreferrer rel attributes are added to links which have
+a target attribute associated with them. This prevents malicious
+destinations from overwriting the original window.
+--# vim: et sw=4 sts=4
diff --git a/plugin/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt b/plugin/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt
index 47714f5d2..0b0533a77 100644
--- a/plugin/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt
+++ b/plugin/htmlpurifier/standalone/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt
@@ -8,6 +8,7 @@ array (
'ftp' => true,
'nntp' => true,
'news' => true,
+ 'tel' => true,
)
--DESCRIPTION--
Whitelist that defines the schemes that a URI is allowed to have. This
diff --git a/plugin/htmlpurifier/standalone/HTMLPurifier/Printer/ConfigForm.php b/plugin/htmlpurifier/standalone/HTMLPurifier/Printer/ConfigForm.php
index 660960f37..3bc417366 100644
--- a/plugin/htmlpurifier/standalone/HTMLPurifier/Printer/ConfigForm.php
+++ b/plugin/htmlpurifier/standalone/HTMLPurifier/Printer/ConfigForm.php
@@ -327,6 +327,10 @@ class HTMLPurifier_Printer_ConfigForm_default extends HTMLPurifier_Printer
case HTMLPurifier_VarParser::HASH:
$nvalue = '';
foreach ($value as $i => $v) {
+ if (is_array($v)) {
+ // HACK
+ $v = implode(";", $v);
+ }
$nvalue .= "$i:$v" . PHP_EOL;
}
$value = $nvalue;