874 lines
30 KiB
JavaScript
874 lines
30 KiB
JavaScript
/*
|
||
sanitize-html (Apostrophe Technologies)
|
||
SOURCE: https://github.com/apostrophecms/sanitize-html
|
||
LICENSE: https://github.com/apostrophecms/sanitize-html/blob/main/LICENSE
|
||
|
||
Modified for audiobookshelf
|
||
*/
|
||
|
||
const htmlparser = require('htmlparser2');
|
||
// const escapeStringRegexp = require('escape-string-regexp');
|
||
// const { isPlainObject } = require('is-plain-object');
|
||
// const deepmerge = require('deepmerge');
|
||
// const parseSrcset = require('parse-srcset');
|
||
// const { parse: postcssParse } = require('postcss');
|
||
// Tags that can conceivably represent stand-alone media.
|
||
|
||
// ABS UPDATE: Packages not necessary
|
||
// SOURCE: https://github.com/sindresorhus/escape-string-regexp/blob/main/index.js
|
||
function escapeStringRegexp(string) {
|
||
if (typeof string !== 'string') {
|
||
throw new TypeError('Expected a string');
|
||
}
|
||
|
||
// Escape characters with special meaning either inside or outside character sets.
|
||
// Use a simple backslash escape when it’s always valid, and a `\xnn` escape when the simpler form would be disallowed by Unicode patterns’ stricter grammar.
|
||
return string
|
||
.replace(/[|\\{}()[\]^$+*?.]/g, '\\$&')
|
||
.replace(/-/g, '\\x2d');
|
||
}
|
||
|
||
// SOURCE: https://github.com/jonschlinkert/is-plain-object/blob/master/is-plain-object.js
|
||
function isObject(o) {
|
||
return Object.prototype.toString.call(o) === '[object Object]';
|
||
}
|
||
|
||
function isPlainObject(o) {
|
||
var ctor, prot;
|
||
|
||
if (isObject(o) === false) return false;
|
||
|
||
// If has modified constructor
|
||
ctor = o.constructor;
|
||
if (ctor === undefined) return true;
|
||
|
||
// If has modified prototype
|
||
prot = ctor.prototype;
|
||
if (isObject(prot) === false) return false;
|
||
|
||
// If constructor does not have an Object-specific method
|
||
if (prot.hasOwnProperty('isPrototypeOf') === false) {
|
||
return false;
|
||
}
|
||
|
||
// Most likely a plain Object
|
||
return true;
|
||
};
|
||
|
||
|
||
const mediaTags = [
|
||
'img', 'audio', 'video', 'picture', 'svg',
|
||
'object', 'map', 'iframe', 'embed'
|
||
];
|
||
// Tags that are inherently vulnerable to being used in XSS attacks.
|
||
const vulnerableTags = ['script', 'style'];
|
||
|
||
function each(obj, cb) {
|
||
if (obj) {
|
||
Object.keys(obj).forEach(function (key) {
|
||
cb(obj[key], key);
|
||
});
|
||
}
|
||
}
|
||
|
||
// Avoid false positives with .__proto__, .hasOwnProperty, etc.
|
||
function has(obj, key) {
|
||
return ({}).hasOwnProperty.call(obj, key);
|
||
}
|
||
|
||
// Returns those elements of `a` for which `cb(a)` returns truthy
|
||
function filter(a, cb) {
|
||
const n = [];
|
||
each(a, function (v) {
|
||
if (cb(v)) {
|
||
n.push(v);
|
||
}
|
||
});
|
||
return n;
|
||
}
|
||
|
||
function isEmptyObject(obj) {
|
||
for (const key in obj) {
|
||
if (has(obj, key)) {
|
||
return false;
|
||
}
|
||
}
|
||
return true;
|
||
}
|
||
|
||
function stringifySrcset(parsedSrcset) {
|
||
return parsedSrcset.map(function (part) {
|
||
if (!part.url) {
|
||
throw new Error('URL missing');
|
||
}
|
||
|
||
return (
|
||
part.url +
|
||
(part.w ? ` ${part.w}w` : '') +
|
||
(part.h ? ` ${part.h}h` : '') +
|
||
(part.d ? ` ${part.d}x` : '')
|
||
);
|
||
}).join(', ');
|
||
}
|
||
|
||
module.exports = sanitizeHtml;
|
||
|
||
// A valid attribute name.
|
||
// We use a tolerant definition based on the set of strings defined by
|
||
// html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
|
||
// and html.spec.whatwg.org/multipage/parsing.html#attribute-name-state .
|
||
// The characters accepted are ones which can be appended to the attribute
|
||
// name buffer without triggering a parse error:
|
||
// * unexpected-equals-sign-before-attribute-name
|
||
// * unexpected-null-character
|
||
// * unexpected-character-in-attribute-name
|
||
// We exclude the empty string because it's impossible to get to the after
|
||
// attribute name state with an empty attribute name buffer.
|
||
const VALID_HTML_ATTRIBUTE_NAME = /^[^\0\t\n\f\r /<=>]+$/;
|
||
|
||
// Ignore the _recursing flag; it's there for recursive
|
||
// invocation as a guard against this exploit:
|
||
// https://github.com/fb55/htmlparser2/issues/105
|
||
|
||
function sanitizeHtml(html, options, _recursing) {
|
||
if (html == null) {
|
||
return '';
|
||
}
|
||
|
||
let result = '';
|
||
// Used for hot swapping the result variable with an empty string in order to "capture" the text written to it.
|
||
let tempResult = '';
|
||
|
||
function Frame(tag, attribs) {
|
||
const that = this;
|
||
this.tag = tag;
|
||
this.attribs = attribs || {};
|
||
this.tagPosition = result.length;
|
||
this.text = ''; // Node inner text
|
||
this.mediaChildren = [];
|
||
|
||
this.updateParentNodeText = function () {
|
||
if (stack.length) {
|
||
const parentFrame = stack[stack.length - 1];
|
||
parentFrame.text += that.text;
|
||
}
|
||
};
|
||
|
||
this.updateParentNodeMediaChildren = function () {
|
||
if (stack.length && mediaTags.includes(this.tag)) {
|
||
const parentFrame = stack[stack.length - 1];
|
||
parentFrame.mediaChildren.push(this.tag);
|
||
}
|
||
};
|
||
}
|
||
|
||
options = Object.assign({}, sanitizeHtml.defaults, options);
|
||
options.parser = Object.assign({}, htmlParserDefaults, options.parser);
|
||
|
||
// vulnerableTags
|
||
vulnerableTags.forEach(function (tag) {
|
||
if (
|
||
options.allowedTags && options.allowedTags.indexOf(tag) > -1 &&
|
||
!options.allowVulnerableTags
|
||
) {
|
||
console.warn(`\n\n⚠️ Your \`allowedTags\` option includes, \`${tag}\`, which is inherently\nvulnerable to XSS attacks. Please remove it from \`allowedTags\`.\nOr, to disable this warning, add the \`allowVulnerableTags\` option\nand ensure you are accounting for this risk.\n\n`);
|
||
}
|
||
});
|
||
|
||
// Tags that contain something other than HTML, or where discarding
|
||
// the text when the tag is disallowed makes sense for other reasons.
|
||
// If we are not allowing these tags, we should drop their content too.
|
||
// For other tags you would drop the tag but keep its content.
|
||
const nonTextTagsArray = options.nonTextTags || [
|
||
'script',
|
||
'style',
|
||
'textarea',
|
||
'option'
|
||
];
|
||
let allowedAttributesMap;
|
||
let allowedAttributesGlobMap;
|
||
if (options.allowedAttributes) {
|
||
allowedAttributesMap = {};
|
||
allowedAttributesGlobMap = {};
|
||
each(options.allowedAttributes, function (attributes, tag) {
|
||
allowedAttributesMap[tag] = [];
|
||
const globRegex = [];
|
||
attributes.forEach(function (obj) {
|
||
if (typeof obj === 'string' && obj.indexOf('*') >= 0) {
|
||
globRegex.push(escapeStringRegexp(obj).replace(/\\\*/g, '.*'));
|
||
} else {
|
||
allowedAttributesMap[tag].push(obj);
|
||
}
|
||
});
|
||
if (globRegex.length) {
|
||
allowedAttributesGlobMap[tag] = new RegExp('^(' + globRegex.join('|') + ')$');
|
||
}
|
||
});
|
||
}
|
||
const allowedClassesMap = {};
|
||
const allowedClassesGlobMap = {};
|
||
const allowedClassesRegexMap = {};
|
||
each(options.allowedClasses, function (classes, tag) {
|
||
// Implicitly allows the class attribute
|
||
if (allowedAttributesMap) {
|
||
if (!has(allowedAttributesMap, tag)) {
|
||
allowedAttributesMap[tag] = [];
|
||
}
|
||
allowedAttributesMap[tag].push('class');
|
||
}
|
||
|
||
allowedClassesMap[tag] = [];
|
||
allowedClassesRegexMap[tag] = [];
|
||
const globRegex = [];
|
||
classes.forEach(function (obj) {
|
||
if (typeof obj === 'string' && obj.indexOf('*') >= 0) {
|
||
globRegex.push(escapeStringRegexp(obj).replace(/\\\*/g, '.*'));
|
||
} else if (obj instanceof RegExp) {
|
||
allowedClassesRegexMap[tag].push(obj);
|
||
} else {
|
||
allowedClassesMap[tag].push(obj);
|
||
}
|
||
});
|
||
if (globRegex.length) {
|
||
allowedClassesGlobMap[tag] = new RegExp('^(' + globRegex.join('|') + ')$');
|
||
}
|
||
});
|
||
|
||
const transformTagsMap = {};
|
||
let transformTagsAll;
|
||
each(options.transformTags, function (transform, tag) {
|
||
let transFun;
|
||
if (typeof transform === 'function') {
|
||
transFun = transform;
|
||
} else if (typeof transform === 'string') {
|
||
transFun = sanitizeHtml.simpleTransform(transform);
|
||
}
|
||
if (tag === '*') {
|
||
transformTagsAll = transFun;
|
||
} else {
|
||
transformTagsMap[tag] = transFun;
|
||
}
|
||
});
|
||
|
||
let depth;
|
||
let stack;
|
||
let skipMap;
|
||
let transformMap;
|
||
let skipText;
|
||
let skipTextDepth;
|
||
let addedText = false;
|
||
|
||
initializeState();
|
||
|
||
const parser = new htmlparser.Parser({
|
||
onopentag: function (name, attribs) {
|
||
// If `enforceHtmlBoundary` is `true` and this has found the opening
|
||
// `html` tag, reset the state.
|
||
if (options.enforceHtmlBoundary && name === 'html') {
|
||
initializeState();
|
||
}
|
||
|
||
if (skipText) {
|
||
skipTextDepth++;
|
||
return;
|
||
}
|
||
const frame = new Frame(name, attribs);
|
||
stack.push(frame);
|
||
|
||
let skip = false;
|
||
const hasText = !!frame.text;
|
||
let transformedTag;
|
||
if (has(transformTagsMap, name)) {
|
||
transformedTag = transformTagsMap[name](name, attribs);
|
||
|
||
frame.attribs = attribs = transformedTag.attribs;
|
||
|
||
if (transformedTag.text !== undefined) {
|
||
frame.innerText = transformedTag.text;
|
||
}
|
||
|
||
if (name !== transformedTag.tagName) {
|
||
frame.name = name = transformedTag.tagName;
|
||
transformMap[depth] = transformedTag.tagName;
|
||
}
|
||
}
|
||
if (transformTagsAll) {
|
||
transformedTag = transformTagsAll(name, attribs);
|
||
|
||
frame.attribs = attribs = transformedTag.attribs;
|
||
if (name !== transformedTag.tagName) {
|
||
frame.name = name = transformedTag.tagName;
|
||
transformMap[depth] = transformedTag.tagName;
|
||
}
|
||
}
|
||
|
||
if ((options.allowedTags && options.allowedTags.indexOf(name) === -1) || (options.disallowedTagsMode === 'recursiveEscape' && !isEmptyObject(skipMap)) || (options.nestingLimit != null && depth >= options.nestingLimit)) {
|
||
skip = true;
|
||
skipMap[depth] = true;
|
||
if (options.disallowedTagsMode === 'discard') {
|
||
if (nonTextTagsArray.indexOf(name) !== -1) {
|
||
skipText = true;
|
||
skipTextDepth = 1;
|
||
}
|
||
}
|
||
skipMap[depth] = true;
|
||
}
|
||
depth++;
|
||
if (skip) {
|
||
if (options.disallowedTagsMode === 'discard') {
|
||
// We want the contents but not this tag
|
||
return;
|
||
}
|
||
tempResult = result;
|
||
result = '';
|
||
}
|
||
result += '<' + name;
|
||
|
||
if (name === 'script') {
|
||
if (options.allowedScriptHostnames || options.allowedScriptDomains) {
|
||
frame.innerText = '';
|
||
}
|
||
}
|
||
|
||
if (!allowedAttributesMap || has(allowedAttributesMap, name) || allowedAttributesMap['*']) {
|
||
each(attribs, function (value, a) {
|
||
if (!VALID_HTML_ATTRIBUTE_NAME.test(a)) {
|
||
// This prevents part of an attribute name in the output from being
|
||
// interpreted as the end of an attribute, or end of a tag.
|
||
delete frame.attribs[a];
|
||
return;
|
||
}
|
||
let parsed;
|
||
// check allowedAttributesMap for the element and attribute and modify the value
|
||
// as necessary if there are specific values defined.
|
||
let passedAllowedAttributesMapCheck = false;
|
||
if (!allowedAttributesMap ||
|
||
(has(allowedAttributesMap, name) && allowedAttributesMap[name].indexOf(a) !== -1) ||
|
||
(allowedAttributesMap['*'] && allowedAttributesMap['*'].indexOf(a) !== -1) ||
|
||
(has(allowedAttributesGlobMap, name) && allowedAttributesGlobMap[name].test(a)) ||
|
||
(allowedAttributesGlobMap['*'] && allowedAttributesGlobMap['*'].test(a))) {
|
||
passedAllowedAttributesMapCheck = true;
|
||
} else if (allowedAttributesMap && allowedAttributesMap[name]) {
|
||
for (const o of allowedAttributesMap[name]) {
|
||
if (isPlainObject(o) && o.name && (o.name === a)) {
|
||
passedAllowedAttributesMapCheck = true;
|
||
let newValue = '';
|
||
if (o.multiple === true) {
|
||
// verify the values that are allowed
|
||
const splitStrArray = value.split(' ');
|
||
for (const s of splitStrArray) {
|
||
if (o.values.indexOf(s) !== -1) {
|
||
if (newValue === '') {
|
||
newValue = s;
|
||
} else {
|
||
newValue += ' ' + s;
|
||
}
|
||
}
|
||
}
|
||
} else if (o.values.indexOf(value) >= 0) {
|
||
// verified an allowed value matches the entire attribute value
|
||
newValue = value;
|
||
}
|
||
value = newValue;
|
||
}
|
||
}
|
||
}
|
||
if (passedAllowedAttributesMapCheck) {
|
||
if (options.allowedSchemesAppliedToAttributes.indexOf(a) !== -1) {
|
||
if (naughtyHref(name, value)) {
|
||
delete frame.attribs[a];
|
||
return;
|
||
}
|
||
}
|
||
|
||
if (name === 'script' && a === 'src') {
|
||
|
||
let allowed = true;
|
||
|
||
try {
|
||
const parsed = new URL(value);
|
||
|
||
if (options.allowedScriptHostnames || options.allowedScriptDomains) {
|
||
const allowedHostname = (options.allowedScriptHostnames || []).find(function (hostname) {
|
||
return hostname === parsed.hostname;
|
||
});
|
||
const allowedDomain = (options.allowedScriptDomains || []).find(function (domain) {
|
||
return parsed.hostname === domain || parsed.hostname.endsWith(`.${domain}`);
|
||
});
|
||
allowed = allowedHostname || allowedDomain;
|
||
}
|
||
} catch (e) {
|
||
allowed = false;
|
||
}
|
||
|
||
if (!allowed) {
|
||
delete frame.attribs[a];
|
||
return;
|
||
}
|
||
}
|
||
|
||
if (name === 'iframe' && a === 'src') {
|
||
let allowed = true;
|
||
try {
|
||
// Chrome accepts \ as a substitute for / in the // at the
|
||
// start of a URL, so rewrite accordingly to prevent exploit.
|
||
// Also drop any whitespace at that point in the URL
|
||
value = value.replace(/^(\w+:)?\s*[\\/]\s*[\\/]/, '$1//');
|
||
if (value.startsWith('relative:')) {
|
||
// An attempt to exploit our workaround for base URLs being
|
||
// mandatory for relative URL validation in the WHATWG
|
||
// URL parser, reject it
|
||
throw new Error('relative: exploit attempt');
|
||
}
|
||
// naughtyHref is in charge of whether protocol relative URLs
|
||
// are cool. Here we are concerned just with allowed hostnames and
|
||
// whether to allow relative URLs.
|
||
//
|
||
// Build a placeholder "base URL" against which any reasonable
|
||
// relative URL may be parsed successfully
|
||
let base = 'relative://relative-site';
|
||
for (let i = 0; (i < 100); i++) {
|
||
base += `/${i}`;
|
||
}
|
||
const parsed = new URL(value, base);
|
||
const isRelativeUrl = parsed && parsed.hostname === 'relative-site' && parsed.protocol === 'relative:';
|
||
if (isRelativeUrl) {
|
||
// default value of allowIframeRelativeUrls is true
|
||
// unless allowedIframeHostnames or allowedIframeDomains specified
|
||
allowed = has(options, 'allowIframeRelativeUrls')
|
||
? options.allowIframeRelativeUrls
|
||
: (!options.allowedIframeHostnames && !options.allowedIframeDomains);
|
||
} else if (options.allowedIframeHostnames || options.allowedIframeDomains) {
|
||
const allowedHostname = (options.allowedIframeHostnames || []).find(function (hostname) {
|
||
return hostname === parsed.hostname;
|
||
});
|
||
const allowedDomain = (options.allowedIframeDomains || []).find(function (domain) {
|
||
return parsed.hostname === domain || parsed.hostname.endsWith(`.${domain}`);
|
||
});
|
||
allowed = allowedHostname || allowedDomain;
|
||
}
|
||
} catch (e) {
|
||
// Unparseable iframe src
|
||
allowed = false;
|
||
}
|
||
if (!allowed) {
|
||
delete frame.attribs[a];
|
||
return;
|
||
}
|
||
}
|
||
if (a === 'srcset') {
|
||
delete frame.attribs[a];
|
||
|
||
// ABS UPDATE: srcset not necessary
|
||
// try {
|
||
// parsed = parseSrcset(value);
|
||
// parsed.forEach(function (value) {
|
||
// if (naughtyHref('srcset', value.url)) {
|
||
// value.evil = true;
|
||
// }
|
||
// });
|
||
// parsed = filter(parsed, function (v) {
|
||
// return !v.evil;
|
||
// });
|
||
// if (!parsed.length) {
|
||
// delete frame.attribs[a];
|
||
// return;
|
||
// } else {
|
||
// value = stringifySrcset(filter(parsed, function (v) {
|
||
// return !v.evil;
|
||
// }));
|
||
// frame.attribs[a] = value;
|
||
// }
|
||
// } catch (e) {
|
||
// // Unparseable srcset
|
||
// delete frame.attribs[a];
|
||
// return;
|
||
// }
|
||
}
|
||
if (a === 'class') {
|
||
const allowedSpecificClasses = allowedClassesMap[name];
|
||
const allowedWildcardClasses = allowedClassesMap['*'];
|
||
const allowedSpecificClassesGlob = allowedClassesGlobMap[name];
|
||
const allowedSpecificClassesRegex = allowedClassesRegexMap[name];
|
||
const allowedWildcardClassesGlob = allowedClassesGlobMap['*'];
|
||
const allowedClassesGlobs = [
|
||
allowedSpecificClassesGlob,
|
||
allowedWildcardClassesGlob
|
||
]
|
||
.concat(allowedSpecificClassesRegex)
|
||
.filter(function (t) {
|
||
return t;
|
||
});
|
||
if (allowedSpecificClasses && allowedWildcardClasses) {
|
||
// ABS UPDATE: classes and wildcard classes not necessary now
|
||
// value = filterClasses(value, deepmerge(allowedSpecificClasses, allowedWildcardClasses), allowedClassesGlobs);
|
||
} else {
|
||
value = filterClasses(value, allowedSpecificClasses || allowedWildcardClasses, allowedClassesGlobs);
|
||
}
|
||
if (!value.length) {
|
||
delete frame.attribs[a];
|
||
return;
|
||
}
|
||
}
|
||
if (a === 'style') {
|
||
delete frame.attribs[a];
|
||
|
||
// ABS UPDATE: Styles not necessary
|
||
// try {
|
||
// const abstractSyntaxTree = postcssParse(name + ' {' + value + '}');
|
||
// const filteredAST = filterCss(abstractSyntaxTree, options.allowedStyles);
|
||
|
||
// value = stringifyStyleAttributes(filteredAST);
|
||
|
||
// if (value.length === 0) {
|
||
// delete frame.attribs[a];
|
||
// return;
|
||
// }
|
||
// } catch (e) {
|
||
// delete frame.attribs[a];
|
||
// return;
|
||
// }
|
||
}
|
||
result += ' ' + a;
|
||
if (value && value.length) {
|
||
result += '="' + escapeHtml(value, true) + '"';
|
||
}
|
||
} else {
|
||
delete frame.attribs[a];
|
||
}
|
||
});
|
||
}
|
||
if (options.selfClosing.indexOf(name) !== -1) {
|
||
result += ' />';
|
||
} else {
|
||
result += '>';
|
||
if (frame.innerText && !hasText && !options.textFilter) {
|
||
result += escapeHtml(frame.innerText);
|
||
addedText = true;
|
||
}
|
||
}
|
||
if (skip) {
|
||
result = tempResult + escapeHtml(result);
|
||
tempResult = '';
|
||
}
|
||
},
|
||
ontext: function (text) {
|
||
if (skipText) {
|
||
return;
|
||
}
|
||
const lastFrame = stack[stack.length - 1];
|
||
let tag;
|
||
|
||
if (lastFrame) {
|
||
tag = lastFrame.tag;
|
||
// If inner text was set by transform function then let's use it
|
||
text = lastFrame.innerText !== undefined ? lastFrame.innerText : text;
|
||
}
|
||
|
||
if (options.disallowedTagsMode === 'discard' && ((tag === 'script') || (tag === 'style'))) {
|
||
// htmlparser2 gives us these as-is. Escaping them ruins the content. Allowing
|
||
// script tags is, by definition, game over for XSS protection, so if that's
|
||
// your concern, don't allow them. The same is essentially true for style tags
|
||
// which have their own collection of XSS vectors.
|
||
result += text;
|
||
} else {
|
||
const escaped = escapeHtml(text, false);
|
||
if (options.textFilter && !addedText) {
|
||
result += options.textFilter(escaped, tag);
|
||
} else if (!addedText) {
|
||
result += escaped;
|
||
}
|
||
}
|
||
if (stack.length) {
|
||
const frame = stack[stack.length - 1];
|
||
frame.text += text;
|
||
}
|
||
},
|
||
onclosetag: function (name) {
|
||
|
||
if (skipText) {
|
||
skipTextDepth--;
|
||
if (!skipTextDepth) {
|
||
skipText = false;
|
||
} else {
|
||
return;
|
||
}
|
||
}
|
||
|
||
const frame = stack.pop();
|
||
if (!frame) {
|
||
// Do not crash on bad markup
|
||
return;
|
||
}
|
||
skipText = options.enforceHtmlBoundary ? name === 'html' : false;
|
||
depth--;
|
||
const skip = skipMap[depth];
|
||
if (skip) {
|
||
delete skipMap[depth];
|
||
if (options.disallowedTagsMode === 'discard') {
|
||
frame.updateParentNodeText();
|
||
return;
|
||
}
|
||
tempResult = result;
|
||
result = '';
|
||
}
|
||
|
||
if (transformMap[depth]) {
|
||
name = transformMap[depth];
|
||
delete transformMap[depth];
|
||
}
|
||
|
||
if (options.exclusiveFilter && options.exclusiveFilter(frame)) {
|
||
result = result.substr(0, frame.tagPosition);
|
||
return;
|
||
}
|
||
|
||
frame.updateParentNodeMediaChildren();
|
||
frame.updateParentNodeText();
|
||
|
||
if (options.selfClosing.indexOf(name) !== -1) {
|
||
// Already output />
|
||
if (skip) {
|
||
result = tempResult;
|
||
tempResult = '';
|
||
}
|
||
return;
|
||
}
|
||
|
||
result += '</' + name + '>';
|
||
if (skip) {
|
||
result = tempResult + escapeHtml(result);
|
||
tempResult = '';
|
||
}
|
||
addedText = false;
|
||
}
|
||
}, options.parser);
|
||
parser.write(html);
|
||
parser.end();
|
||
|
||
return result;
|
||
|
||
function initializeState() {
|
||
result = '';
|
||
depth = 0;
|
||
stack = [];
|
||
skipMap = {};
|
||
transformMap = {};
|
||
skipText = false;
|
||
skipTextDepth = 0;
|
||
}
|
||
|
||
function escapeHtml(s, quote) {
|
||
if (typeof (s) !== 'string') {
|
||
s = s + '';
|
||
}
|
||
if (options.parser.decodeEntities) {
|
||
s = s.replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>');
|
||
if (quote) {
|
||
s = s.replace(/"/g, '"');
|
||
}
|
||
}
|
||
// TODO: this is inadequate because it will pass `&0;`. This approach
|
||
// will not work, each & must be considered with regard to whether it
|
||
// is followed by a 100% syntactically valid entity or not, and escaped
|
||
// if it is not. If this bothers you, don't set parser.decodeEntities
|
||
// to false. (The default is true.)
|
||
s = s.replace(/&(?![a-zA-Z0-9#]{1,20};)/g, '&') // Match ampersands not part of existing HTML entity
|
||
.replace(/</g, '<')
|
||
.replace(/>/g, '>');
|
||
if (quote) {
|
||
s = s.replace(/"/g, '"');
|
||
}
|
||
return s;
|
||
}
|
||
|
||
function naughtyHref(name, href) {
|
||
// Browsers ignore character codes of 32 (space) and below in a surprising
|
||
// number of situations. Start reading here:
|
||
// https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#Embedded_tab
|
||
// eslint-disable-next-line no-control-regex
|
||
href = href.replace(/[\x00-\x20]+/g, '');
|
||
// Clobber any comments in URLs, which the browser might
|
||
// interpret inside an XML data island, allowing
|
||
// a javascript: URL to be snuck through
|
||
href = href.replace(/<!--.*?-->/g, '');
|
||
// Case insensitive so we don't get faked out by JAVASCRIPT #1
|
||
// Allow more characters after the first so we don't get faked
|
||
// out by certain schemes browsers accept
|
||
const matches = href.match(/^([a-zA-Z][a-zA-Z0-9.\-+]*):/);
|
||
if (!matches) {
|
||
// Protocol-relative URL starting with any combination of '/' and '\'
|
||
if (href.match(/^[/\\]{2}/)) {
|
||
return !options.allowProtocolRelative;
|
||
}
|
||
|
||
// No scheme
|
||
return false;
|
||
}
|
||
const scheme = matches[1].toLowerCase();
|
||
|
||
if (has(options.allowedSchemesByTag, name)) {
|
||
return options.allowedSchemesByTag[name].indexOf(scheme) === -1;
|
||
}
|
||
|
||
return !options.allowedSchemes || options.allowedSchemes.indexOf(scheme) === -1;
|
||
}
|
||
|
||
/**
|
||
* Filters user input css properties by allowlisted regex attributes.
|
||
* Modifies the abstractSyntaxTree object.
|
||
*
|
||
* @param {object} abstractSyntaxTree - Object representation of CSS attributes.
|
||
* @property {array[Declaration]} abstractSyntaxTree.nodes[0] - Each object cointains prop and value key, i.e { prop: 'color', value: 'red' }.
|
||
* @param {object} allowedStyles - Keys are properties (i.e color), value is list of permitted regex rules (i.e /green/i).
|
||
* @return {object} - The modified tree.
|
||
*/
|
||
// function filterCss(abstractSyntaxTree, allowedStyles) {
|
||
// if (!allowedStyles) {
|
||
// return abstractSyntaxTree;
|
||
// }
|
||
|
||
// const astRules = abstractSyntaxTree.nodes[0];
|
||
// let selectedRule;
|
||
|
||
// // Merge global and tag-specific styles into new AST.
|
||
// if (allowedStyles[astRules.selector] && allowedStyles['*']) {
|
||
// selectedRule = deepmerge(
|
||
// allowedStyles[astRules.selector],
|
||
// allowedStyles['*']
|
||
// );
|
||
// } else {
|
||
// selectedRule = allowedStyles[astRules.selector] || allowedStyles['*'];
|
||
// }
|
||
|
||
// if (selectedRule) {
|
||
// abstractSyntaxTree.nodes[0].nodes = astRules.nodes.reduce(filterDeclarations(selectedRule), []);
|
||
// }
|
||
|
||
// return abstractSyntaxTree;
|
||
// }
|
||
|
||
/**
|
||
* Extracts the style attributes from an AbstractSyntaxTree and formats those
|
||
* values in the inline style attribute format.
|
||
*
|
||
* @param {AbstractSyntaxTree} filteredAST
|
||
* @return {string} - Example: "color:yellow;text-align:center !important;font-family:helvetica;"
|
||
*/
|
||
function stringifyStyleAttributes(filteredAST) {
|
||
return filteredAST.nodes[0].nodes
|
||
.reduce(function (extractedAttributes, attrObject) {
|
||
extractedAttributes.push(
|
||
`${attrObject.prop}:${attrObject.value}${attrObject.important ? ' !important' : ''}`
|
||
);
|
||
return extractedAttributes;
|
||
}, [])
|
||
.join(';');
|
||
}
|
||
|
||
/**
|
||
* Filters the existing attributes for the given property. Discards any attributes
|
||
* which don't match the allowlist.
|
||
*
|
||
* @param {object} selectedRule - Example: { color: red, font-family: helvetica }
|
||
* @param {array} allowedDeclarationsList - List of declarations which pass the allowlist.
|
||
* @param {object} attributeObject - Object representing the current css property.
|
||
* @property {string} attributeObject.type - Typically 'declaration'.
|
||
* @property {string} attributeObject.prop - The CSS property, i.e 'color'.
|
||
* @property {string} attributeObject.value - The corresponding value to the css property, i.e 'red'.
|
||
* @return {function} - When used in Array.reduce, will return an array of Declaration objects
|
||
*/
|
||
function filterDeclarations(selectedRule) {
|
||
return function (allowedDeclarationsList, attributeObject) {
|
||
// If this property is allowlisted...
|
||
if (has(selectedRule, attributeObject.prop)) {
|
||
const matchesRegex = selectedRule[attributeObject.prop].some(function (regularExpression) {
|
||
return regularExpression.test(attributeObject.value);
|
||
});
|
||
|
||
if (matchesRegex) {
|
||
allowedDeclarationsList.push(attributeObject);
|
||
}
|
||
}
|
||
return allowedDeclarationsList;
|
||
};
|
||
}
|
||
|
||
function filterClasses(classes, allowed, allowedGlobs) {
|
||
if (!allowed) {
|
||
// The class attribute is allowed without filtering on this tag
|
||
return classes;
|
||
}
|
||
classes = classes.split(/\s+/);
|
||
return classes.filter(function (clss) {
|
||
return allowed.indexOf(clss) !== -1 || allowedGlobs.some(function (glob) {
|
||
return glob.test(clss);
|
||
});
|
||
}).join(' ');
|
||
}
|
||
}
|
||
|
||
// Defaults are accessible to you so that you can use them as a starting point
|
||
// programmatically if you wish
|
||
|
||
const htmlParserDefaults = {
|
||
decodeEntities: true
|
||
};
|
||
sanitizeHtml.defaults = {
|
||
allowedTags: [
|
||
// Sections derived from MDN element categories and limited to the more
|
||
// benign categories.
|
||
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element
|
||
// Content sectioning
|
||
'address', 'article', 'aside', 'footer', 'header',
|
||
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hgroup',
|
||
'main', 'nav', 'section',
|
||
// Text content
|
||
'blockquote', 'dd', 'div', 'dl', 'dt', 'figcaption', 'figure',
|
||
'hr', 'li', 'main', 'ol', 'p', 'pre', 'ul',
|
||
// Inline text semantics
|
||
'a', 'abbr', 'b', 'bdi', 'bdo', 'br', 'cite', 'code', 'data', 'dfn',
|
||
'em', 'i', 'kbd', 'mark', 'q',
|
||
'rb', 'rp', 'rt', 'rtc', 'ruby',
|
||
's', 'samp', 'small', 'span', 'strong', 'sub', 'sup', 'time', 'u', 'var', 'wbr',
|
||
// Table content
|
||
'caption', 'col', 'colgroup', 'table', 'tbody', 'td', 'tfoot', 'th',
|
||
'thead', 'tr'
|
||
],
|
||
disallowedTagsMode: 'discard',
|
||
allowedAttributes: {
|
||
a: ['href', 'name', 'target'],
|
||
// We don't currently allow img itself by default, but
|
||
// these attributes would make sense if we did.
|
||
img: ['src', 'srcset', 'alt', 'title', 'width', 'height', 'loading']
|
||
},
|
||
// Lots of these won't come up by default because we don't allow them
|
||
selfClosing: ['img', 'br', 'hr', 'area', 'base', 'basefont', 'input', 'link', 'meta'],
|
||
// URL schemes we permit
|
||
allowedSchemes: ['http', 'https', 'ftp', 'mailto', 'tel'],
|
||
allowedSchemesByTag: {},
|
||
allowedSchemesAppliedToAttributes: ['href', 'src', 'cite'],
|
||
allowProtocolRelative: true,
|
||
enforceHtmlBoundary: false
|
||
};
|
||
|
||
sanitizeHtml.simpleTransform = function (newTagName, newAttribs, merge) {
|
||
merge = (merge === undefined) ? true : merge;
|
||
newAttribs = newAttribs || {};
|
||
|
||
return function (tagName, attribs) {
|
||
let attrib;
|
||
if (merge) {
|
||
for (attrib in newAttribs) {
|
||
attribs[attrib] = newAttribs[attrib];
|
||
}
|
||
} else {
|
||
attribs = newAttribs;
|
||
}
|
||
|
||
return {
|
||
tagName: newTagName,
|
||
attribs: attribs
|
||
};
|
||
};
|
||
}; |