//该文件用于解析HTML,输出为Object对象 const htmlparser2 = require("htmlparser2"); function getNodeContent(node) { return node[Object.keys(node)[0]]; } /**每个节点的表示方法为: { tagname: { key1: value1, key2: value2, __children: [ { } ] } }*/ function generateNewNode(tagName, attributes = {}) { // 构建新节点 const newNode = {}; newNode[tagName] = attributes; attributes.__children = []; return newNode; } function parseHtml(htmlData) { return new Promise((resolve, reject) => { // 根节点 const root = generateNewNode('root'); // 当前访问的节点 let currentAccessObject = root; // 之前访问的节点数组 let lastAccessStack = [root]; // options docment: https://github.com/fb55/htmlparser2/wiki/Parser-options const parser = new htmlparser2.Parser({ onopentag(tagname, attributes) { const newNode = generateNewNode(tagname, attributes); lastAccessStack.push(newNode); getNodeContent(currentAccessObject).__children.push(newNode); currentAccessObject = newNode; }, ontext(text) { if (text.trim()) { getNodeContent(currentAccessObject).__text__ = text.trim(); } }, onclosetag(tagname) { lastAccessStack.pop(); currentAccessObject = lastAccessStack[lastAccessStack.length - 1]; }, onend(){ resolve(root); }, onerror(error) { reject(error); } }); parser.write( htmlData ); parser.end(); }) } async function html2Json(htmlData) { return await parseHtml(htmlData); } export { html2Json };