contentcollector: Skip over non-Text, non-Element Nodes
This commit is contained in:
parent
075969aea0
commit
1cb5453aeb
1 changed files with 2 additions and 1 deletions
|
@ -39,6 +39,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author)
|
||||||
const dom = {
|
const dom = {
|
||||||
// .nodeType works with DOM and cheerio 0.22.0. Note: Cheerio 0.22.0 does not provide the
|
// .nodeType works with DOM and cheerio 0.22.0. Note: Cheerio 0.22.0 does not provide the
|
||||||
// Node.*_NODE constants, so they cannot be used here.
|
// Node.*_NODE constants, so they cannot be used here.
|
||||||
|
isElementNode: (n) => n.nodeType === 1, // Node.ELEMENT_NODE
|
||||||
isTextNode: (n) => n.nodeType === 3, // Node.TEXT_NODE
|
isTextNode: (n) => n.nodeType === 3, // Node.TEXT_NODE
|
||||||
// .tagName works with DOM and cheerio 0.22.0, but:
|
// .tagName works with DOM and cheerio 0.22.0, but:
|
||||||
// * With DOM, .tagName is an uppercase string.
|
// * With DOM, .tagName is an uppercase string.
|
||||||
|
@ -395,7 +396,7 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author)
|
||||||
cc.startNewLine(state);
|
cc.startNewLine(state);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else if (dom.isElementNode(node)) {
|
||||||
const tname = dom.tagName(node) || '';
|
const tname = dom.tagName(node) || '';
|
||||||
|
|
||||||
if (tname === 'img') {
|
if (tname === 'img') {
|
||||||
|
|
Loading…
Reference in a new issue