106 lines
4.0 KiB
JavaScript
106 lines
4.0 KiB
JavaScript
"use strict";
|
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
};
|
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
const labels_1 = __importDefault(require("./labels"));
|
|
const buffer_1 = require("../../utils/buffer");
|
|
const builtin_header_names_1 = __importDefault(require("../../request-pipeline/builtin-header-names"));
|
|
const CHARSET_RE = /(?:^|;)\s*charset=(.+)(?:;|$)/i;
|
|
const META_CHARSET_RE = /charset ?= ?['"]?([^ ;"']*)['"]?/i;
|
|
// NOTE: HTTP 1.1 specifies ISO-8859-1 as the default charset
|
|
// (see: http://www.w3.org/International/O-HTTP-charset.en.php).
|
|
const DEFAULT_CHARSET = 'iso-8859-1';
|
|
const CHARSET_BOM_LIST = [
|
|
{
|
|
charset: 'utf-8',
|
|
bom: Buffer.from([0xEF, 0xBB, 0xBF]),
|
|
},
|
|
{
|
|
charset: 'utf-16le',
|
|
bom: Buffer.from([0xFF, 0xFE]),
|
|
},
|
|
{
|
|
charset: 'utf-16be',
|
|
bom: Buffer.from([0xFE, 0xFF]),
|
|
},
|
|
];
|
|
var CharsetPriority;
|
|
(function (CharsetPriority) {
|
|
CharsetPriority[CharsetPriority["BOM"] = 3] = "BOM";
|
|
CharsetPriority[CharsetPriority["CONTENT_TYPE"] = 2] = "CONTENT_TYPE";
|
|
CharsetPriority[CharsetPriority["URL"] = 1] = "URL";
|
|
CharsetPriority[CharsetPriority["META"] = 1] = "META";
|
|
CharsetPriority[CharsetPriority["DEFAULT"] = 0] = "DEFAULT";
|
|
})(CharsetPriority || (CharsetPriority = {}));
|
|
class Charset {
|
|
constructor() {
|
|
this.charset = DEFAULT_CHARSET;
|
|
this.priority = CharsetPriority.DEFAULT;
|
|
}
|
|
set(charset, priority) {
|
|
if (charset && this.charset !== charset && this.priority <= priority) {
|
|
this.charset = charset;
|
|
this.priority = priority;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
get() {
|
|
return this.charset;
|
|
}
|
|
isFromBOM() {
|
|
return this.priority === CharsetPriority.BOM;
|
|
}
|
|
fromBOM(resBuf) {
|
|
for (let i = 0; i < CHARSET_BOM_LIST.length; i++) {
|
|
if ((0, buffer_1.startsWith)(resBuf, CHARSET_BOM_LIST[i].bom))
|
|
return this.set(CHARSET_BOM_LIST[i].charset, CharsetPriority.BOM);
|
|
}
|
|
return false;
|
|
}
|
|
fromContentType(contentTypeHeader) {
|
|
if (this.priority <= CharsetPriority.CONTENT_TYPE) {
|
|
const charsetMatch = contentTypeHeader && contentTypeHeader.match(CHARSET_RE);
|
|
const charset = charsetMatch && charsetMatch[1];
|
|
if (!charset)
|
|
return false;
|
|
return this.set((0, labels_1.default)(charset), CharsetPriority.CONTENT_TYPE);
|
|
}
|
|
return false;
|
|
}
|
|
fromUrl(charsetFromUrl) {
|
|
if (charsetFromUrl && this.priority <= CharsetPriority.URL)
|
|
return this.set((0, labels_1.default)(charsetFromUrl), CharsetPriority.URL);
|
|
return false;
|
|
}
|
|
// NOTE: Parsing charset from meta tags
|
|
// www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#determining-the-character-encoding
|
|
// Each <meta> descriptor should contain values of the "http-equiv", "content" and "charset" attributes.
|
|
fromMeta(metas) {
|
|
if (this.priority < CharsetPriority.META && metas.length) {
|
|
let needPragma = true;
|
|
let charsetStr = '';
|
|
metas.forEach(attrs => {
|
|
const shouldParseFromContentAttr = needPragma && attrs.content && attrs.httpEquiv &&
|
|
attrs.httpEquiv.toLowerCase() === builtin_header_names_1.default.contentType;
|
|
if (shouldParseFromContentAttr) {
|
|
const charsetMatch = attrs.content.match(META_CHARSET_RE);
|
|
if (charsetMatch) {
|
|
needPragma = true;
|
|
charsetStr = charsetMatch[1];
|
|
}
|
|
}
|
|
if (attrs.charset) {
|
|
needPragma = false;
|
|
charsetStr = attrs.charset;
|
|
}
|
|
});
|
|
return this.set((0, labels_1.default)(charsetStr), CharsetPriority.META);
|
|
}
|
|
return false;
|
|
}
|
|
}
|
|
exports.default = Charset;module.exports = exports.default;
|
|
|