Index: third_party/readability/js/readability.js |
diff --git a/third_party/readability/js/readability.js b/third_party/readability/js/readability.js |
index 68a0286497ad9b1a06091004889b035720bb9896..4f648f864613b931b956bb3adb02d7ebe36b4416 100644 |
--- a/third_party/readability/js/readability.js |
+++ b/third_party/readability/js/readability.js |
@@ -4,11 +4,11 @@ var dbg = (typeof console !== 'undefined') ? function(s) { |
} : function() {}; |
/* |
- * Readability. An Arc90 Lab Experiment. |
+ * Readability. An Arc90 Lab Experiment. |
* Website: http://lab.arc90.com/experiments/readability |
* Source: http://code.google.com/p/arc90labs-readability |
* |
- * "Readability" is a trademark of Arc90 Inc and may not be used without explicit permission. |
+ * "Readability" is a trademark of Arc90 Inc and may not be used without explicit permission. |
* |
* Copyright (c) 2010 Arc90 Inc |
* Readability is licensed under the Apache License, Version 2.0. |
@@ -20,6 +20,7 @@ var readability = { |
distilledHTML: '', |
distilledArticleContent: null, |
+ nextPageLink: '', |
version: '1.7.1', |
iframeLoads: 0, |
@@ -41,7 +42,7 @@ var readability = { |
maxPages: 30, /* The maximum number of pages to loop through before we call it quits and just show a link. */ |
parsedPages: {}, /* The list of pages we've parsed in this call of readability, for autopaging. As a key store for easier searching. */ |
pageETags: {}, /* A list of the ETag headers of pages we've parsed, in case they happen to match, we'll know it's a duplicate. */ |
- |
+ |
/** |
* All of the regular expressions in use within readability. |
* Defined up here so we don't instantiate them repeatedly in loops. |
@@ -66,7 +67,7 @@ var readability = { |
/** |
* Runs readability. |
- * |
+ * |
* Workflow: |
* 1. Prep the document by removing script tags, css, etc. |
* 2. Build readability's DOM tree. |
@@ -86,8 +87,11 @@ var readability = { |
readability.parsedPages[window.location.href.replace(/\/$/, '')] = true; |
/* Pull out any possible next page link first */ |
- var nextPageLink = readability.findNextPageLink(document.body); |
- |
+ readability.nextPageLink = readability.findNextPageLink(document.body); |
+ |
+ /* We handle processing of nextPage from C++ set nextPageLink to null */ |
+ var nextPageLink = null; |
+ |
readability.prepDocument(); |
/* Build readability's DOM tree */ |
@@ -157,7 +161,7 @@ var readability = { |
nextPageLink = null; |
if (nextPageLink) { |
- /** |
+ /** |
* Append any additional pages after a small timeout so that people |
* can start reading without having to wait for this to finish processing. |
**/ |
@@ -179,16 +183,16 @@ var readability = { |
var windowHeight = window.innerHeight ? window.innerHeight : (document.documentElement.clientHeight ? document.documentElement.clientHeight : document.body.clientHeight); |
if(readability.reversePageScroll) { |
- readability.scrollTo(readability.scrollTop(), readability.scrollTop() - (windowHeight - 50), 20, 10); |
+ readability.scrollTo(readability.scrollTop(), readability.scrollTop() - (windowHeight - 50), 20, 10); |
} |
else { |
- readability.scrollTo(readability.scrollTop(), readability.scrollTop() + (windowHeight - 50), 20, 10); |
+ readability.scrollTo(readability.scrollTop(), readability.scrollTop() + (windowHeight - 50), 20, 10); |
} |
- |
+ |
return false; |
} |
}; |
- |
+ |
document.onkeyup = function(e) { |
var code = (window.event) ? event.keyCode : e.keyCode; |
if (code === 16) { |
@@ -200,7 +204,7 @@ var readability = { |
/** |
* Run any post-process modifications to article content as necessary. |
- * |
+ * |
* @param Element |
* @return void |
**/ |
@@ -226,7 +230,7 @@ var readability = { |
for(var i=0, il = images.length; i < il; i+=1) { |
var image = images[i]; |
- |
+ |
if(image.offsetWidth > imageWidthThreshold) { |
image.className += " blockImage"; |
} |
@@ -242,7 +246,7 @@ var readability = { |
var articleTools = document.createElement("DIV"); |
articleTools.id = "readTools"; |
- articleTools.innerHTML = |
+ articleTools.innerHTML = |
"<a href='#' onclick='return window.location.reload()' title='Reload original page' id='reload-page'>Reload Original Page</a>" + |
"<a href='#' onclick='javascript:window.print();' title='Print page' id='print-page'>Print Page</a>" + |
"<a href='#' onclick='readability.emailBox(); return false;' title='Email page' id='email-page'>Email Page</a>"; |
@@ -259,13 +263,13 @@ var readability = { |
function sanitizeText() { |
return text.replace(/@\w+/, ""); |
} |
- |
+ |
function countMatches(match) { |
var matches = text.match(new RegExp(match, "g")); |
- return matches !== null ? matches.length : 0; |
+ return matches !== null ? matches.length : 0; |
} |
- |
- function isRTL() { |
+ |
+ function isRTL() { |
var count_heb = countMatches("[\\u05B0-\\u05F4\\uFB1D-\\uFBF4]"); |
var count_arb = countMatches("[\\u060C-\\u06FE\\uFB50-\\uFEFC]"); |
@@ -289,15 +293,15 @@ var readability = { |
try { |
curTitle = origTitle = document.title; |
if(typeof curTitle !== "string") { /* If they had an element with id "title" in their HTML */ |
- curTitle = origTitle = readability.getInnerText(document.getElementsByTagName('title')[0]); |
+ curTitle = origTitle = readability.getInnerText(document.getElementsByTagName('title')[0]); |
} |
} |
catch(e) {} |
- |
+ |
if(curTitle.match(/ [\|\-] /)) |
{ |
curTitle = origTitle.replace(/(.*)[\|\-] .*/gi,'$1'); |
- |
+ |
if(curTitle.split(' ').length < 3) { |
curTitle = origTitle.replace(/[^\|\-]*[\|\-](.*)/gi,'$1'); |
} |
@@ -330,7 +334,7 @@ var readability = { |
/** |
* Prepare the HTML document for readability to scrape it. |
* This includes things like stripping javascript, CSS, and handling terrible markup. |
- * |
+ * |
* @return void |
**/ |
prepDocument: function () { |
@@ -342,7 +346,7 @@ var readability = { |
{ |
var body = document.createElement("body"); |
try { |
- document.body = body; |
+ document.body = body; |
} |
catch(e) { |
document.documentElement.appendChild(body); |
@@ -374,11 +378,11 @@ var readability = { |
biggestFrameSize = frameSize; |
readability.biggestFrame = frames[frameIndex]; |
} |
- |
+ |
if(canAccessFrame && frameSize > bestFrameSize) |
{ |
readability.frameHack = true; |
- |
+ |
bestFrame = frames[frameIndex]; |
bestFrameSize = frameSize; |
} |
@@ -390,7 +394,7 @@ var readability = { |
readability.moveNodeInnards(bestFrame.contentWindow.document.body, newBody); |
newBody.style.overflow = 'scroll'; |
document.body = newBody; |
- |
+ |
var frameset = document.getElementsByTagName('frameset')[0]; |
if(frameset) { |
frameset.parentNode.removeChild(frameset); } |
@@ -455,7 +459,7 @@ var readability = { |
var imgCount = articleParagraphs[i].getElementsByTagName('img').length; |
var embedCount = articleParagraphs[i].getElementsByTagName('embed').length; |
var objectCount = articleParagraphs[i].getElementsByTagName('object').length; |
- |
+ |
if(imgCount === 0 && embedCount === 0 && objectCount === 0 && readability.getInnerText(articleParagraphs[i], false) === '') { |
articleParagraphs[i].parentNode.removeChild(articleParagraphs[i]); |
} |
@@ -468,7 +472,7 @@ var readability = { |
dbg("Cleaning innerHTML of breaks failed. This is an IE strict-block-elements bug. Ignoring.: " + e); |
} |
}, |
- |
+ |
/** |
* Initialize a node with the readability object. Also checks the |
* className/id for special names to add to its score. |
@@ -477,7 +481,7 @@ var readability = { |
* @return void |
**/ |
initializeNode: function (node) { |
- node.readability = {"contentScore": 0}; |
+ node.readability = {"contentScore": 0}; |
switch(node.tagName) { |
case 'DIV': |
@@ -489,7 +493,7 @@ var readability = { |
case 'BLOCKQUOTE': |
node.readability.contentScore += 3; |
break; |
- |
+ |
case 'ADDRESS': |
case 'OL': |
case 'UL': |
@@ -511,10 +515,10 @@ var readability = { |
node.readability.contentScore -= 5; |
break; |
} |
- |
+ |
node.readability.contentScore += readability.getClassWeight(node); |
}, |
- |
+ |
/*** |
* grabArticle - Using a variety of metrics (content score, classname, element types), find the content that is |
* most likely to be the stuff a user wants to read. Then return it wrapped up in a div. |
@@ -525,7 +529,7 @@ var readability = { |
grabArticle: function (pageToClone) { |
var stripUnlikelyCandidates = readability.flagIsActive(readability.FLAG_STRIP_UNLIKELYS), |
isPaging = (page !== null) ? true: false; |
- |
+ |
var page = null; |
// Never work on the actual page. |
if (isPaging) { |
@@ -533,7 +537,7 @@ var readability = { |
} else { |
page = pageToClone.cloneNode(true); |
} |
- |
+ |
var allElements = page.getElementsByTagName('*'); |
/** |
@@ -561,7 +565,7 @@ var readability = { |
node.parentNode.removeChild(node); |
nodeIndex-=1; |
continue; |
- } |
+ } |
} |
if (node.tagName === "P" || node.tagName === "TD" || node.tagName === "PRE") { |
@@ -598,7 +602,7 @@ var readability = { |
} |
} |
} |
- } |
+ } |
} |
/** |
@@ -640,15 +644,15 @@ var readability = { |
/* Add points for any commas within this paragraph */ |
contentScore += innerText.split(',').length; |
- |
+ |
/* For every 100 characters in this paragraph, add another point. Up to 3 points. */ |
contentScore += Math.min(Math.floor(innerText.length / 100), 3); |
- |
+ |
/* Add the score to the parent. The grandparent gets half. */ |
parentNode.readability.contentScore += contentScore; |
if(grandParentNode) { |
- grandParentNode.readability.contentScore += contentScore/2; |
+ grandParentNode.readability.contentScore += contentScore/2; |
} |
} |
@@ -725,12 +729,12 @@ var readability = { |
{ |
append = true; |
} |
- |
+ |
if(siblingNode.nodeName === "P") { |
var linkDensity = readability.getLinkDensity(siblingNode); |
var nodeContent = readability.getInnerText(siblingNode); |
var nodeLength = nodeContent.length; |
- |
+ |
if(nodeLength > 80 && linkDensity < 0.25) |
{ |
append = true; |
@@ -747,7 +751,7 @@ var readability = { |
var nodeToAppend = null; |
if(siblingNode.nodeName !== "DIV" && siblingNode.nodeName !== "P") { |
/* We have a node that isn't a common block level element, like a form or td tag. Turn it into a div so it doesn't get filtered out later by accident. */ |
- |
+ |
dbg("Altering siblingNode of " + siblingNode.nodeName + ' to div.'); |
nodeToAppend = document.createElement("DIV"); |
try { |
@@ -765,7 +769,7 @@ var readability = { |
s-=1; |
sl-=1; |
} |
- |
+ |
/* To ensure a node does not interfere with readability styles, remove its classnames */ |
nodeToAppend.className = ""; |
@@ -779,15 +783,15 @@ var readability = { |
**/ |
readability.distilledArticleContent = articleContent.cloneNode(true); |
//readability.prepArticle(articleContent); |
- |
+ |
if (readability.curPageNum === 1) { |
var newNode = document.createElement('div'); |
newNode.id = "readability-page-1"; |
newNode.setAttribute("class", "page"); |
readability.moveNodeInnards(articleContent, newNode); |
articleContent.appendChild(newNode); |
- } |
- |
+ } |
+ |
/** |
* Now that we've gone through the full algorithm, check to see if we got any meaningful content. |
* If we didn't, we may need to re-run grabArticle with different flags set. This gives us a higher |
@@ -813,7 +817,7 @@ var readability = { |
return articleContent; |
}, |
- |
+ |
/** |
* Removes script tags from the document. |
* |
@@ -828,12 +832,12 @@ var readability = { |
scripts[i].nodeValue=""; |
scripts[i].removeAttribute('src'); |
if (scripts[i].parentNode) { |
- scripts[i].parentNode.removeChild(scripts[i]); |
+ scripts[i].parentNode.removeChild(scripts[i]); |
} |
} |
} |
}, |
- |
+ |
/** |
* Get the inner text of a node - cross browser compatibly. |
* This also strips out any excess whitespace to be found. |
@@ -896,18 +900,18 @@ var readability = { |
if ( cur.nodeType === 1 ) { |
// Remove style attribute(s) : |
if(cur.className !== "readability-styled") { |
- cur.removeAttribute("style"); |
+ cur.removeAttribute("style"); |
} |
readability.cleanStyles( cur ); |
} |
cur = cur.nextSibling; |
- } |
+ } |
}, |
- |
+ |
/** |
* Get the density of links as a percentage of the content |
* This is the amount of text that is inside a link divided by the total text in the node. |
- * |
+ * |
* @param Element |
* @return number (float) |
**/ |
@@ -918,11 +922,11 @@ var readability = { |
for(var i=0, il=links.length; i<il;i+=1) |
{ |
linkLength += readability.getInnerText(links[i]).length; |
- } |
+ } |
return linkLength / textLength; |
}, |
- |
+ |
/** |
* Find a cleaned up version of the current URL, to use for comparing links for possible next-pageyness. |
* |
@@ -944,10 +948,10 @@ var readability = { |
/* If the type isn't alpha-only, it's probably not actually a file extension. */ |
if(!possibleType.match(/[^a-zA-Z]/)) { |
- segment = segment.split(".")[0]; |
+ segment = segment.split(".")[0]; |
} |
} |
- |
+ |
/** |
* EW-CMS specific segment replacement. Ugly. |
* Example: http://www.ew.com/ew/article/0,,20313460_20369436,00.html |
@@ -968,7 +972,7 @@ var readability = { |
if (i < 2 && segment.match(/^\d{1,2}$/)) { |
del = true; |
} |
- |
+ |
/* If this is the first segment and it's just "index", remove it. */ |
if(i === 0 && segment.toLowerCase() === "index") { |
del = true; |
@@ -992,7 +996,7 @@ var readability = { |
/** |
* Look for any paging links that may occur within the document. |
- * |
+ * |
* @param body |
* @return object (array) |
**/ |
@@ -1008,7 +1012,7 @@ var readability = { |
* |
* Also possible: levenshtein distance? longest common subsequence? |
* |
- * After we do that, assign each page a score, and |
+ * After we do that, assign each page a score, and |
**/ |
for(var i = 0, il = allLinks.length; i < il; i+=1) { |
var link = allLinks[i], |
@@ -1018,12 +1022,12 @@ var readability = { |
if(linkHref === "" || linkHref === articleBaseUrl || linkHref === window.location.href || linkHref in readability.parsedPages) { |
continue; |
} |
- |
+ |
/* If it's on a different domain, skip it. */ |
if(window.location.host !== linkHref.split(/\/+/g)[1]) { |
continue; |
} |
- |
+ |
var linkText = readability.getInnerText(link); |
/* If the linkText looks like it's not the next page, skip it. */ |
@@ -1036,9 +1040,9 @@ var readability = { |
if(!linkHrefLeftover.match(/\d/)) { |
continue; |
} |
- |
+ |
if(!(linkHref in possiblePages)) { |
- possiblePages[linkHref] = {"score": 0, "linkText": linkText, "href": linkHref}; |
+ possiblePages[linkHref] = {"score": 0, "linkText": linkText, "href": linkHref}; |
} else { |
possiblePages[linkHref].linkText += ' | ' + linkText; |
} |
@@ -1060,7 +1064,7 @@ var readability = { |
if(linkData.match(/pag(e|ing|inat)/i)) { |
linkObj.score += 25; |
} |
- if(linkData.match(/(first|last)/i)) { // -65 is enough to negate any bonuses gotten from a > or » in the text, |
+ if(linkData.match(/(first|last)/i)) { // -65 is enough to negate any bonuses gotten from a > or » in the text, |
/* If we already matched on "next", last is probably fine. If we didn't, then it's bad. Penalize. */ |
if(!linkObj.linkText.match(readability.regexps.nextLink)) { |
linkObj.score -= 65; |
@@ -1087,10 +1091,10 @@ var readability = { |
/* If this is just something like "footer", give it a negative. If it's something like "body-and-footer", leave it be. */ |
if(!parentNodeClassAndId.match(readability.regexps.positive)) { |
linkObj.score -= 25; |
- negativeNodeMatch = true; |
+ negativeNodeMatch = true; |
} |
} |
- |
+ |
parentNode = parentNode.parentNode; |
} |
@@ -1152,7 +1156,7 @@ var readability = { |
dbg('NEXT PAGE IS ' + nextHref); |
readability.parsedPages[nextHref] = true; |
- return nextHref; |
+ return nextHref; |
} |
else { |
return null; |
@@ -1204,7 +1208,7 @@ var readability = { |
if (typeof options === 'undefined') { options = {}; } |
request.onreadystatechange = respondToReadyState; |
- |
+ |
request.open('get', url, true); |
request.setRequestHeader('Accept', 'text/html'); |
@@ -1239,7 +1243,7 @@ var readability = { |
articlePage.appendChild(linkDiv); |
return; |
} |
- |
+ |
/** |
* Now that we've built the article page DOM element, get the page content |
* asynchronously and load the cleaned content into the div we created for it. |
@@ -1257,7 +1261,7 @@ var readability = { |
return; |
} else { |
readability.pageETags[eTag] = 1; |
- } |
+ } |
} |
// TODO: this ends up doubling up page numbers on NYTimes articles. Need to generically parse those away. |
@@ -1308,7 +1312,7 @@ var readability = { |
} |
} |
} |
- |
+ |
readability.removeScripts(content); |
readability.moveNodeInnards(content, thisPage); |
@@ -1330,9 +1334,9 @@ var readability = { |
}); |
}(nextPageLink, articlePage)); |
}, |
- |
+ |
/** |
- * Get an elements class/id weight. Uses regular expressions to tell if this |
+ * Get an elements class/id weight. Uses regular expressions to tell if this |
* element looks good or bad. |
* |
* @param Element |
@@ -1382,7 +1386,7 @@ var readability = { |
var allElements = e.getElementsByTagName('*'); |
while (i < allElements.length) { |
readability.deleteExtraBreaks(allElements[i]); |
- i++; |
+ i++; |
} |
}, |
@@ -1397,7 +1401,7 @@ var readability = { |
clean: function (e, tag) { |
var targetList = e.getElementsByTagName( tag ); |
var isEmbed = (tag === 'object' || tag === 'embed'); |
- |
+ |
for (var y=targetList.length-1; y >= 0; y-=1) { |
/* Allow youtube and vimeo videos through as people usually want to see those. */ |
if(isEmbed) { |
@@ -1405,7 +1409,7 @@ var readability = { |
for (var i=0, il=targetList[y].attributes.length; i < il; i+=1) { |
attributeValues += targetList[y].attributes[i].value + '|'; |
} |
- |
+ |
/* First, check the elements attributes to see if any of them contain youtube or vimeo */ |
if (attributeValues.search(readability.regexps.videos) !== -1) { |
continue; |
@@ -1415,13 +1419,13 @@ var readability = { |
if (targetList[y].innerHTML.search(readability.regexps.videos) !== -1) { |
continue; |
} |
- |
+ |
} |
targetList[y].parentNode.removeChild(targetList[y]); |
} |
}, |
- |
+ |
/** |
* Clean an element of all tags of type "tag" if they look fishy. |
* "Fishy" is an algorithm based on content length, classnames, link density, number of images & embeds, etc. |
@@ -1446,7 +1450,7 @@ var readability = { |
for (var i=curTagsLength-1; i >= 0; i-=1) { |
var weight = readability.getClassWeight(tagsList[i]); |
var contentScore = (typeof tagsList[i].readability !== 'undefined') ? tagsList[i].readability.contentScore : 0; |
- |
+ |
dbg("Cleaning Conditionally " + tagsList[i] + " (" + tagsList[i].className + ":" + tagsList[i].id + ")" + ((typeof tagsList[i].readability !== 'undefined') ? (" with score " + tagsList[i].readability.contentScore) : '')); |
if(weight+contentScore < 0) |
@@ -1467,7 +1471,7 @@ var readability = { |
var embeds = tagsList[i].getElementsByTagName("embed"); |
for(var ei=0,il=embeds.length; ei < il; ei+=1) { |
if (embeds[ei].src.search(readability.regexps.videos) === -1) { |
- embedCount+=1; |
+ embedCount+=1; |
} |
} |
@@ -1480,7 +1484,7 @@ var readability = { |
} else if(li > p && tag !== "ul" && tag !== "ol") { |
toRemove = true; |
} else if( input > Math.floor(p/3) ) { |
- toRemove = true; |
+ toRemove = true; |
} else if(contentLength < 25 && (img === 0 || img > 2) ) { |
toRemove = true; |
} else if(weight < 25 && linkDensity > 0.2) { |
@@ -1522,7 +1526,7 @@ var readability = { |
addFlag: function(flag) { |
readability.flags = readability.flags | flag; |
}, |
- |
+ |
removeFlag: function(flag) { |
readability.flags = readability.flags & ~flag; |
}, |
@@ -1591,7 +1595,7 @@ var readability = { |
} |
return ret; |
}, |
- |
+ |
// Replaces a pair of <BR> nodes (possibly separated by whitespace), with a |
// <P> node, and makes all next siblings of that pair children of <P>, up |
// until the next pair of <BR> nodes is reached. |
@@ -1600,7 +1604,7 @@ var readability = { |
var second = readability.isMultipleBr(node, true); |
if (!second) { |
return; |
- } |
+ } |
// Make all next siblings of the second BR into children of a P. |
var p = document.createElement('p'); |
var curr = second.nextSibling; |
@@ -1613,7 +1617,7 @@ var readability = { |
curr = next; |
} |
var ret = curr; |
- |
+ |
// Remove all nodes between the first and second BR. |
curr = node.nextSibling; |
while (curr && curr != second) { |
@@ -1625,10 +1629,10 @@ var readability = { |
second.parentNode.removeChild(second); |
// Replace the first BR with the P. |
node.parentNode.replaceChild(p, node); |
- |
+ |
return ret; |
}, |
- |
+ |
// Returns true if the NodeList contains a double <BR>. |
hasDoubleBr: function(nodeList) { |
for (var i = 0; i < nodeList.length; nodeList++) { |
@@ -1637,8 +1641,8 @@ var readability = { |
} |
} |
return false; |
- }, |
- |
+ }, |
+ |
// Replaces double <BR> tags with <P> tags. |
replaceDoubleBrsWithPs: function(node) { |
var allElements = node.getElementsByTagName('BR'); |
@@ -1652,8 +1656,8 @@ var readability = { |
allElements = document.body.getElementsByTagName('BR'); |
} |
}, |
- |
- |
+ |
+ |
// Replaces a BR and the whitespace that follows it with a P. |
replaceBrWithP: function(node) { |
if (!readability.isBrNode(node)) { |
@@ -1673,7 +1677,7 @@ var readability = { |
node.parentNode.replaceChild(p, node); |
return curr; |
}, |
- |
+ |
// Replaces all <BR> tags with <P> tags. Makes all next siblings of a <BR> tag |
// children of the <P>. |
replaceBrsWithPs: function(node) { |
@@ -1687,27 +1691,27 @@ var readability = { |
allElements = document.body.getElementsByTagName('BR'); |
} |
}, |
- |
+ |
// Replaces any tag with any other tag. |
replaceTagsWithTags: function(node, srcTag, destTag) { |
var allElements = node.getElementsByTagName(srcTag); |
for (var i = 0; i < allElements.length; i++) { |
var dest = document.createElement(destTag); |
readability.moveNodeInnards(allElements[i], dest); |
- node.replaceNode(dest, allElements[i]); |
+ allElements[i].parentNode.replaceChild(dest, allElements[i]); |
} |
}, |
- |
+ |
// Replaces all <noscript> tags with <p> tags. |
replaceNoscriptsWithPs: function(node) { |
readability.replaceTagsWithTags(node, 'noscript', 'p'); |
}, |
- |
+ |
// Replaces all <font> tags with <span> tags. |
replaceFontsWithSpans: function(node) { |
readability.replaceTagsWithTags(node, 'font', 'span'); |
}, |
- |
+ |
// Returns a list of image URLs in the distilled article. |
getImages : function() { |
var images = document.getElementsByTagName('img'); |
@@ -1719,10 +1723,15 @@ var readability = { |
} |
return result; |
}, |
- |
+ |
// Returns the distilled article HTML from the page(s). |
getDistilledArticleHTML : function() { |
return readability.distilledHTML; |
+ }, |
+ |
+ // Returns the next page of this article. |
+ getNextPageLink : function() { |
+ return readability.nextPageLink; |
} |
}; |
@@ -1730,12 +1739,13 @@ var readability = { |
// element is the article title, the second element is HTML containing the |
// long-form content, and remaining elements are URLs for images referenced by |
// that HTML. Each <img> tag in the HTML has an id field set to k - 2, which |
-// corresponds to a URL listed at index k in the array returned. |
+// corresponds to a URL listed at index k in the array returned. |
(function () { |
readability.init(); |
- var result = new Array(2); |
+ var result = new Array(3); |
result[0] = readability.getArticleTitle(); |
result[1] = readability.getDistilledArticleHTML(); |
+ result[2] = readability.getNextPageLink(); |
return result.concat(readability.getImages()); |
}()) |