From dd7fb1babee440e8395aa0862e96e57446199001 Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sun, 24 Jan 2021 18:03:13 -0500 Subject: [PATCH] contentcollector: Document the `dom` object --- src/static/js/contentcollector.js | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/src/static/js/contentcollector.js b/src/static/js/contentcollector.js index a77c4e5f..a717df3a 100644 --- a/src/static/js/contentcollector.js +++ b/src/static/js/contentcollector.js @@ -33,26 +33,47 @@ const hooks = require('./pluginfw/hooks'); const sanitizeUnicode = (s) => UNorm.nfc(s); const makeContentCollector = (collectStyles, abrowser, apool, className2Author) => { + // This file is used both in browsers and with cheerio in Node.js (for importing HTML). Cheerio's + // Node-like objects are not 100% API compatible with the DOM Node specification; this `dom` + // object abstracts away the differences. const dom = { - isNodeText: (n) => n.nodeType === 3, + // .nodeType works with DOM and cheerio 0.22.0. Note: Cheerio 0.22.0 does not provide the + // Node.*_NODE constants, so they cannot be used here. + isNodeText: (n) => n.nodeType === 3, // Node.TEXT_NODE + // .tagName works with DOM and cheerio 0.22.0, but: + // * With DOM, .tagName is an uppercase string. + // * With cheerio 0.22.0, .tagName is a lowercase string. nodeTagName: (n) => n.tagName, + // .nodeValue works with DOM and cheerio 0.22.0. nodeValue: (n) => n.nodeValue, + // Returns the number of Node children (n.childNodes.length), not the number of Element children + // (n.children.length in DOM). nodeNumChildren: (n) => { + // .childNodes.length works with DOM and cheerio 0.22.0, except in cheerio the .childNodes + // property does not exist on text nodes (and maybe other non-element nodes). if (n.childNodes == null) return 0; return n.childNodes.length; }, + // Returns the i'th Node child (n.childNodes[i]), not the i'th Element child (n.children[i] in + // DOM). nodeChild: (n, i) => { if (n.childNodes.item == null) { + // .childNodes[] works with DOM and cheerio 0.22.0. return n.childNodes[i]; } + // .childNodes.item() works with DOM but not with cheerio 0.22.0. return n.childNodes.item(i); }, nodeProp: (n, p) => n[p], nodeAttr: (n, a) => { + // .getAttribute() works with DOM but not with cheerio 0.22.0. if (n.getAttribute != null) return n.getAttribute(a); + // .attribs[] works with cheerio 0.22.0 but not with DOM. if (n.attribs != null) return n.attribs[a]; return null; }, + // .innerHTML works with DOM but not with cheerio 0.22.0. Cheerio's Element-like objects have no + // equivalent. (Cheerio objects have an .html() method, but that isn't accessible here.) optNodeInnerHTML: (n) => n.innerHTML, };