Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(366)

Unified Diff: javatests/org/chromium/distiller/ContentExtractorTest.java

Issue 1230583006: Fix for keeping lists structure (Closed) Base URL: https://github.com/chromium/dom-distiller.git@master
Patch Set: canBeNested move out of the switch. Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: javatests/org/chromium/distiller/ContentExtractorTest.java
diff --git a/javatests/org/chromium/distiller/ContentExtractorTest.java b/javatests/org/chromium/distiller/ContentExtractorTest.java
index 63d349c923a4c341670a5bd85a4632cab8903c8f..87f502fbe9033e62a3c2610b437fa568d2c21aad 100644
--- a/javatests/org/chromium/distiller/ContentExtractorTest.java
+++ b/javatests/org/chromium/distiller/ContentExtractorTest.java
@@ -143,6 +143,267 @@ public class ContentExtractorTest extends DomDistillerJsTestCase {
TestUtil.removeAllDirAttributes(extractedContent));
}
+ public void testPreserveOrderedList() {
+ Element outerListTag = Document.get().createElement("OL");
+ mBody.appendChild(outerListTag);
+
+ outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+ outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+ outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+ outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+
+ ContentExtractor extractor = new ContentExtractor(mRoot);
+ String extractedContent = extractor.extractContent();
+ assertEquals("<OL>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "</OL>",
+ TestUtil.removeAllDirAttributes(extractedContent));
+ }
+
+ public void testPreserveNestedOrderedList() {
+ Element outerListTag = Document.get().createElement("OL");
+ Element outerListItem = Document.get().createElement("LI");
+
+ Element innerListTag = Document.get().createElement("OL");
+ innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+ innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+ innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+ innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+
+ outerListItem.appendChild(innerListTag);
+ outerListTag.appendChild(outerListItem);
+ outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+
+ mBody.appendChild(outerListTag);
+ ContentExtractor extractor = new ContentExtractor(mRoot);
+ String extractedContent = extractor.extractContent();
+ assertEquals("<OL>" +
+ "<LI>" +
+ "<OL>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "</OL>" +
+ "</LI>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "</OL>",
+ TestUtil.removeAllDirAttributes(extractedContent));
+ }
+
+ public void testPreserveNestedOrderedListWithOtherElementsInside() {
+ Element outerListTag = Document.get().createElement("OL");
+ Element outerListItem = Document.get().createElement("LI");
+ outerListItem.appendChild(TestUtil.createText(CONTENT_TEXT));
+ outerListItem.appendChild(TestUtil.createParagraph(CONTENT_TEXT));
+
+ Element innerListTag = Document.get().createElement("OL");
+ innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+ innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+ innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+ innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+ innerListTag.appendChild(TestUtil.createParagraph(""));
+
+ outerListItem.appendChild(innerListTag);
+ outerListTag.appendChild(outerListItem);
+ outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+ outerListTag.appendChild(TestUtil.createParagraph(CONTENT_TEXT));
+
+ mBody.appendChild(outerListTag);
+ ContentExtractor extractor = new ContentExtractor(mRoot);
+ String extractedContent = extractor.extractContent();
+ assertEquals("<OL>" +
+ "<LI>" + CONTENT_TEXT +
+ "<p>" + CONTENT_TEXT + "</p>" +
+ "<OL>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "</OL>" +
+ "</LI>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "<p>" + CONTENT_TEXT + "</p>" +
+ "</OL>",
+ TestUtil.removeAllDirAttributes(extractedContent));
+ }
+
+ public void testPreserveUnorderedList() {
+ Element outerListTag = Document.get().createElement("UL");
+ mBody.appendChild(outerListTag);
+
+ outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+ outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+ outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+ outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+
+ ContentExtractor extractor = new ContentExtractor(mRoot);
+ String extractedContent = extractor.extractContent();
+ assertEquals("<UL>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "</UL>",
+ TestUtil.removeAllDirAttributes(extractedContent));
+ }
+
+ public void testPreserveNestedUnorderedList() {
+ Element outerListTag = Document.get().createElement("UL");
+ Element outerListItem = Document.get().createElement("LI");
+
+ Element innerListTag = Document.get().createElement("UL");
+ innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+ innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+ innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+ innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+
+ outerListItem.appendChild(innerListTag);
+ outerListTag.appendChild(outerListItem);
+ outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+
+ mBody.appendChild(outerListTag);
+ ContentExtractor extractor = new ContentExtractor(mRoot);
+ String extractedContent = extractor.extractContent();
+ assertEquals("<UL>" +
+ "<LI>" +
+ "<UL>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "</UL>" +
+ "</LI>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "</UL>",
+ TestUtil.removeAllDirAttributes(extractedContent));
+ }
+
+ public void testPreserveNestedUnorderedListWithOtherElementsInside() {
+ Element outerListTag = Document.get().createElement("UL");
+ Element outerListItem = Document.get().createElement("LI");
+ outerListItem.appendChild(TestUtil.createText(CONTENT_TEXT));
+ outerListItem.appendChild(TestUtil.createParagraph(CONTENT_TEXT));
+
+ Element innerListTag = Document.get().createElement("UL");
+ innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+ innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+ innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+ innerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+ innerListTag.appendChild(TestUtil.createParagraph(""));
+
+ outerListItem.appendChild(innerListTag);
+ outerListTag.appendChild(outerListItem);
+ outerListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+ outerListTag.appendChild(TestUtil.createParagraph(CONTENT_TEXT));
+
+ mBody.appendChild(outerListTag);
+ ContentExtractor extractor = new ContentExtractor(mRoot);
+ String extractedContent = extractor.extractContent();
+ assertEquals("<UL>" +
+ "<LI>" + CONTENT_TEXT +
+ "<p>" + CONTENT_TEXT + "</p>" +
+ "<UL>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "</UL>" +
+ "</LI>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "<p>" + CONTENT_TEXT + "</p>" +
+ "</UL>",
+ TestUtil.removeAllDirAttributes(extractedContent));
+ }
+
+ public void testPreserveUnorderedListWithNestedOrderedList() {
+ Element unorderedListTag = Document.get().createElement("UL");
+ Element li = Document.get().createElement("LI");
+ Element orderedList = Document.get().createElement("OL");
+ orderedList.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+ orderedList.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+ li.appendChild(orderedList);
+ unorderedListTag.appendChild(li);
+ unorderedListTag.appendChild(TestUtil.createListItem(CONTENT_TEXT));
+ mBody.appendChild(unorderedListTag);
+ ContentExtractor extractor = new ContentExtractor(mRoot);
+ String extractedContent = extractor.extractContent();
+ assertEquals("<UL>" +
+ "<LI>" +
+ "<OL>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "</OL>" +
+ "</LI>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "</UL>",
+ TestUtil.removeAllDirAttributes(extractedContent));
+ }
+
+ public void testMalformedListStructureWithExtraLITagEnd() {
+ Element unorderedListTag = Document.get().createElement("UL");
+ String html = "<LI>" + CONTENT_TEXT + "</LI></LI><LI>" + CONTENT_TEXT + "</LI>";
+ unorderedListTag.setInnerHTML(html);
+ mBody.appendChild(unorderedListTag);
+ ContentExtractor extractor = new ContentExtractor(mRoot);
+ String extractedContent = extractor.extractContent();
+ assertEquals("<UL>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "</UL>",
+ TestUtil.removeAllDirAttributes(extractedContent));
+ }
+
+ public void testMalformedListStructureWithExtraLITagStart() {
+ Element unorderedListTag = Document.get().createElement("OL");
+ String html = "<LI><LI>" + CONTENT_TEXT + "</LI><LI>" + CONTENT_TEXT + "</LI>";
+ unorderedListTag.setInnerHTML(html);
+ mBody.appendChild(unorderedListTag);
+ ContentExtractor extractor = new ContentExtractor(mRoot);
+ String extractedContent = extractor.extractContent();
+ assertEquals("<OL>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "</OL>",
+ TestUtil.removeAllDirAttributes(extractedContent));
+ }
+
+ public void testMalformedListStructureWithExtraOLTagStart() {
+ Element unorderedListTag = Document.get().createElement("OL");
+ String html = "<OL><LI>" + CONTENT_TEXT + "</LI><LI>" + CONTENT_TEXT + "</LI>";
+ unorderedListTag.setInnerHTML(html);
+ mBody.appendChild(unorderedListTag);
+ ContentExtractor extractor = new ContentExtractor(mRoot);
+ String extractedContent = extractor.extractContent();
+ assertEquals("<OL>" +
+ "<OL>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "</OL>" +
+ "</OL>",
+ TestUtil.removeAllDirAttributes(extractedContent));
+ }
+
+ public void testMalformedListStructureWithoutLITag(){
+ Element orderedListTag = Document.get().createElement("OL");
+ String html = "<LI>" + CONTENT_TEXT + "</LI>" +
+ CONTENT_TEXT +
+ "<LI>" + CONTENT_TEXT + "</LI>";
+ orderedListTag.setInnerHTML(html);
+ mBody.appendChild(orderedListTag);
+ ContentExtractor extractor = new ContentExtractor(mRoot);
+ String extractedContent = extractor.extractContent();
+ assertEquals("<OL>" +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ CONTENT_TEXT +
+ "<LI>" + CONTENT_TEXT + "</LI>" +
+ "</OL>" ,
+ TestUtil.removeAllDirAttributes(extractedContent));
+ }
+
private void assertExtractor(String expected, String html) {
mBody.setInnerHTML("");
Element div = TestUtil.createDiv(0);

Powered by Google App Engine
This is Rietveld 408576698