contentcollector: Document the dom object

This commit is contained in:
Richard Hansen 2021-01-24 18:03:13 -05:00 committed by John McLear
parent 74bb2f76cc
commit dd7fb1babe

View file

@ -33,26 +33,47 @@ const hooks = require('./pluginfw/hooks');
const sanitizeUnicode = (s) => UNorm.nfc(s); const sanitizeUnicode = (s) => UNorm.nfc(s);
const makeContentCollector = (collectStyles, abrowser, apool, className2Author) => { const makeContentCollector = (collectStyles, abrowser, apool, className2Author) => {
// This file is used both in browsers and with cheerio in Node.js (for importing HTML). Cheerio's
// Node-like objects are not 100% API compatible with the DOM Node specification; this `dom`
// object abstracts away the differences.
const dom = { const dom = {
isNodeText: (n) => n.nodeType === 3, // .nodeType works with DOM and cheerio 0.22.0. Note: Cheerio 0.22.0 does not provide the
// Node.*_NODE constants, so they cannot be used here.
isNodeText: (n) => n.nodeType === 3, // Node.TEXT_NODE
// .tagName works with DOM and cheerio 0.22.0, but:
// * With DOM, .tagName is an uppercase string.
// * With cheerio 0.22.0, .tagName is a lowercase string.
nodeTagName: (n) => n.tagName, nodeTagName: (n) => n.tagName,
// .nodeValue works with DOM and cheerio 0.22.0.
nodeValue: (n) => n.nodeValue, nodeValue: (n) => n.nodeValue,
// Returns the number of Node children (n.childNodes.length), not the number of Element children
// (n.children.length in DOM).
nodeNumChildren: (n) => { nodeNumChildren: (n) => {
// .childNodes.length works with DOM and cheerio 0.22.0, except in cheerio the .childNodes
// property does not exist on text nodes (and maybe other non-element nodes).
if (n.childNodes == null) return 0; if (n.childNodes == null) return 0;
return n.childNodes.length; return n.childNodes.length;
}, },
// Returns the i'th Node child (n.childNodes[i]), not the i'th Element child (n.children[i] in
// DOM).
nodeChild: (n, i) => { nodeChild: (n, i) => {
if (n.childNodes.item == null) { if (n.childNodes.item == null) {
// .childNodes[] works with DOM and cheerio 0.22.0.
return n.childNodes[i]; return n.childNodes[i];
} }
// .childNodes.item() works with DOM but not with cheerio 0.22.0.
return n.childNodes.item(i); return n.childNodes.item(i);
}, },
nodeProp: (n, p) => n[p], nodeProp: (n, p) => n[p],
nodeAttr: (n, a) => { nodeAttr: (n, a) => {
// .getAttribute() works with DOM but not with cheerio 0.22.0.
if (n.getAttribute != null) return n.getAttribute(a); if (n.getAttribute != null) return n.getAttribute(a);
// .attribs[] works with cheerio 0.22.0 but not with DOM.
if (n.attribs != null) return n.attribs[a]; if (n.attribs != null) return n.attribs[a];
return null; return null;
}, },
// .innerHTML works with DOM but not with cheerio 0.22.0. Cheerio's Element-like objects have no
// equivalent. (Cheerio objects have an .html() method, but that isn't accessible here.)
optNodeInnerHTML: (n) => n.innerHTML, optNodeInnerHTML: (n) => n.innerHTML,
}; };