"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const labels_1 = __importDefault(require("./labels")); const buffer_1 = require("../../utils/buffer"); const builtin_header_names_1 = __importDefault(require("../../request-pipeline/builtin-header-names")); const CHARSET_RE = /(?:^|;)\s*charset=(.+)(?:;|$)/i; const META_CHARSET_RE = /charset ?= ?['"]?([^ ;"']*)['"]?/i; // NOTE: HTTP 1.1 specifies ISO-8859-1 as the default charset // (see: http://www.w3.org/International/O-HTTP-charset.en.php). const DEFAULT_CHARSET = 'iso-8859-1'; const CHARSET_BOM_LIST = [ { charset: 'utf-8', bom: Buffer.from([0xEF, 0xBB, 0xBF]), }, { charset: 'utf-16le', bom: Buffer.from([0xFF, 0xFE]), }, { charset: 'utf-16be', bom: Buffer.from([0xFE, 0xFF]), }, ]; var CharsetPriority; (function (CharsetPriority) { CharsetPriority[CharsetPriority["BOM"] = 3] = "BOM"; CharsetPriority[CharsetPriority["CONTENT_TYPE"] = 2] = "CONTENT_TYPE"; CharsetPriority[CharsetPriority["URL"] = 1] = "URL"; CharsetPriority[CharsetPriority["META"] = 1] = "META"; CharsetPriority[CharsetPriority["DEFAULT"] = 0] = "DEFAULT"; })(CharsetPriority || (CharsetPriority = {})); class Charset { constructor() { this.charset = DEFAULT_CHARSET; this.priority = CharsetPriority.DEFAULT; } set(charset, priority) { if (charset && this.charset !== charset && this.priority <= priority) { this.charset = charset; this.priority = priority; return true; } return false; } get() { return this.charset; } isFromBOM() { return this.priority === CharsetPriority.BOM; } fromBOM(resBuf) { for (let i = 0; i < CHARSET_BOM_LIST.length; i++) { if ((0, buffer_1.startsWith)(resBuf, CHARSET_BOM_LIST[i].bom)) return this.set(CHARSET_BOM_LIST[i].charset, CharsetPriority.BOM); } return false; } fromContentType(contentTypeHeader) { if (this.priority <= CharsetPriority.CONTENT_TYPE) { const charsetMatch = contentTypeHeader && contentTypeHeader.match(CHARSET_RE); const charset = charsetMatch && charsetMatch[1]; if (!charset) return false; return this.set((0, labels_1.default)(charset), CharsetPriority.CONTENT_TYPE); } return false; } fromUrl(charsetFromUrl) { if (charsetFromUrl && this.priority <= CharsetPriority.URL) return this.set((0, labels_1.default)(charsetFromUrl), CharsetPriority.URL); return false; } // NOTE: Parsing charset from meta tags // www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#determining-the-character-encoding // Each descriptor should contain values of the "http-equiv", "content" and "charset" attributes. fromMeta(metas) { if (this.priority < CharsetPriority.META && metas.length) { let needPragma = true; let charsetStr = ''; metas.forEach(attrs => { const shouldParseFromContentAttr = needPragma && attrs.content && attrs.httpEquiv && attrs.httpEquiv.toLowerCase() === builtin_header_names_1.default.contentType; if (shouldParseFromContentAttr) { const charsetMatch = attrs.content.match(META_CHARSET_RE); if (charsetMatch) { needPragma = true; charsetStr = charsetMatch[1]; } } if (attrs.charset) { needPragma = false; charsetStr = attrs.charset; } }); return this.set((0, labels_1.default)(charsetStr), CharsetPriority.META); } return false; } } exports.default = Charset;module.exports = exports.default;