You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
131 lines
4.3 KiB
131 lines
4.3 KiB
"use strict"; |
|
var __importDefault = (this && this.__importDefault) || function (mod) { |
|
return (mod && mod.__esModule) ? mod : { "default": mod }; |
|
}; |
|
Object.defineProperty(exports, "__esModule", { value: true }); |
|
exports.decodeHtml = void 0; |
|
const namedChars_json_1 = __importDefault(require("./namedChars.json")); |
|
// lazy compute this to make this file tree-shakable for browser |
|
let maxCRNameLength; |
|
const decodeHtml = (rawText, asAttr) => { |
|
let offset = 0; |
|
const end = rawText.length; |
|
let decodedText = ''; |
|
function advance(length) { |
|
offset += length; |
|
rawText = rawText.slice(length); |
|
} |
|
while (offset < end) { |
|
const head = /&(?:#x?)?/i.exec(rawText); |
|
if (!head || offset + head.index >= end) { |
|
const remaining = end - offset; |
|
decodedText += rawText.slice(0, remaining); |
|
advance(remaining); |
|
break; |
|
} |
|
// Advance to the "&". |
|
decodedText += rawText.slice(0, head.index); |
|
advance(head.index); |
|
if (head[0] === '&') { |
|
// Named character reference. |
|
let name = ''; |
|
let value = undefined; |
|
if (/[0-9a-z]/i.test(rawText[1])) { |
|
if (!maxCRNameLength) { |
|
maxCRNameLength = Object.keys(namedChars_json_1.default).reduce((max, name) => Math.max(max, name.length), 0); |
|
} |
|
for (let length = maxCRNameLength; !value && length > 0; --length) { |
|
name = rawText.slice(1, 1 + length); |
|
value = namedChars_json_1.default[name]; |
|
} |
|
if (value) { |
|
const semi = name.endsWith(';'); |
|
if (asAttr && |
|
!semi && |
|
/[=a-z0-9]/i.test(rawText[name.length + 1] || '')) { |
|
decodedText += '&' + name; |
|
advance(1 + name.length); |
|
} |
|
else { |
|
decodedText += value; |
|
advance(1 + name.length); |
|
} |
|
} |
|
else { |
|
decodedText += '&' + name; |
|
advance(1 + name.length); |
|
} |
|
} |
|
else { |
|
decodedText += '&'; |
|
advance(1); |
|
} |
|
} |
|
else { |
|
// Numeric character reference. |
|
const hex = head[0] === '&#x'; |
|
const pattern = hex ? /^&#x([0-9a-f]+);?/i : /^&#([0-9]+);?/; |
|
const body = pattern.exec(rawText); |
|
if (!body) { |
|
decodedText += head[0]; |
|
advance(head[0].length); |
|
} |
|
else { |
|
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state |
|
let cp = Number.parseInt(body[1], hex ? 16 : 10); |
|
if (cp === 0) { |
|
cp = 0xfffd; |
|
} |
|
else if (cp > 0x10ffff) { |
|
cp = 0xfffd; |
|
} |
|
else if (cp >= 0xd800 && cp <= 0xdfff) { |
|
cp = 0xfffd; |
|
} |
|
else if ((cp >= 0xfdd0 && cp <= 0xfdef) || (cp & 0xfffe) === 0xfffe) { |
|
// noop |
|
} |
|
else if ((cp >= 0x01 && cp <= 0x08) || |
|
cp === 0x0b || |
|
(cp >= 0x0d && cp <= 0x1f) || |
|
(cp >= 0x7f && cp <= 0x9f)) { |
|
cp = CCR_REPLACEMENTS[cp] || cp; |
|
} |
|
decodedText += String.fromCodePoint(cp); |
|
advance(body[0].length); |
|
} |
|
} |
|
} |
|
return decodedText; |
|
}; |
|
exports.decodeHtml = decodeHtml; |
|
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state |
|
const CCR_REPLACEMENTS = { |
|
0x80: 0x20ac, |
|
0x82: 0x201a, |
|
0x83: 0x0192, |
|
0x84: 0x201e, |
|
0x85: 0x2026, |
|
0x86: 0x2020, |
|
0x87: 0x2021, |
|
0x88: 0x02c6, |
|
0x89: 0x2030, |
|
0x8a: 0x0160, |
|
0x8b: 0x2039, |
|
0x8c: 0x0152, |
|
0x8e: 0x017d, |
|
0x91: 0x2018, |
|
0x92: 0x2019, |
|
0x93: 0x201c, |
|
0x94: 0x201d, |
|
0x95: 0x2022, |
|
0x96: 0x2013, |
|
0x97: 0x2014, |
|
0x98: 0x02dc, |
|
0x99: 0x2122, |
|
0x9a: 0x0161, |
|
0x9b: 0x203a, |
|
0x9c: 0x0153, |
|
0x9e: 0x017e, |
|
0x9f: 0x0178, |
|
};
|
|
|