Index: javatests/org/chromium/distiller/PageParameterParserTest.java |
diff --git a/javatests/org/chromium/distiller/PageParameterParserTest.java b/javatests/org/chromium/distiller/PageParameterParserTest.java |
new file mode 100644 |
index 0000000000000000000000000000000000000000..9180a333f68da04836a3b722fab5e9242cf5a171 |
--- /dev/null |
+++ b/javatests/org/chromium/distiller/PageParameterParserTest.java |
@@ -0,0 +1,254 @@ |
+// Copyright 2015 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+package org.chromium.distiller; |
+ |
+import com.google.gwt.dom.client.BaseElement; |
+import com.google.gwt.dom.client.Document; |
+ |
+public class PageParameterParserTest extends DomDistillerJsTestCase { |
+ private static final String BASE_URL = "http://www.test.com/"; |
+ private static final String TEST_URL = BASE_URL + "foo/bar"; |
+ |
+ public void testBasic() { |
+ PageParamInfo info = processDocument( |
+ "1<br>" + |
+ "<a href=\"/foo/bar/2\">2</a>"); |
+ assertEquals(2, info.mAllPageInfo.size()); |
+ |
+ info = processDocument( |
+ "1<br>" + |
+ "<a href=\"/foo/bar/2\">2</a>" + |
+ "<a href=\"/foo/bar/3\">3</a>"); |
+ assertEquals(3, info.mAllPageInfo.size()); |
+ } |
+ |
+ public void testRejectOnlyPage2LinkWithoutCurrentPageText() { |
+ // Although there is a digital outlink to 2nd page, there is no plain text "1" |
+ // before it, so there is no pagination. |
+ PageParamInfo info = processDocument( |
+ "If there were a '1', pagination should be detected. But there isn't." + |
+ "<a href=\"/foo/bar/2\">2</a>" + |
+ "Main content"); |
+ PageParameterDetectorTest.expectEmptyPageParamInfo(info); |
+ } |
+ |
+ public void testRejectNonAdjacentOutlinks() { |
+ PageParamInfo info = processDocument( |
+ "1<br>" + |
+ "Unrelated terms<br>" + |
+ "<a href=\"/foo/bar/2\">2</a>" + |
+ "Unrelated terms<br>" + |
+ "<a href=\"/foo/bar/3\">3</a>" + |
+ "<a href=\"/foo/bar/all\">All</a>"); |
+ PageParameterDetectorTest.expectEmptyPageParamInfo(info); |
+ } |
+ |
+ public void testAcceptAdjacentOutlinks() { |
+ PageParamInfo info = processDocumentWithoutBase( |
+ "Unrelated link: <a href=\"http://www.test.com/other/2\">2</a>" + |
+ "<p>Main content</p>" + |
+ "1<br>" + |
+ "<a href=\"http://www.test.com/foo/bar/2\">2</a>" + |
+ "<a href=\"http://www.test.com/foo/bar/3\">3</a>", |
+ TEST_URL); |
+ assertEquals(3, info.mAllPageInfo.size()); |
+ PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); |
+ assertEquals(1, page.mPageNum); |
+ assertEquals(BASE_URL + "foo/bar", page.mUrl); |
+ page = info.mAllPageInfo.get(1); |
+ assertEquals(2, page.mPageNum); |
+ assertEquals(BASE_URL + "foo/bar/2", page.mUrl); |
+ page = info.mAllPageInfo.get(2); |
+ assertEquals(3, page.mPageNum); |
+ assertEquals(BASE_URL + "foo/bar/3", page.mUrl); |
+ assertEquals(BASE_URL + "foo/bar/2", info.mNextPagingUrl); |
+ } |
+ |
+ public void testAcceptDuplicatePatterns() { |
+ PageParamInfo info = processDocument( |
+ "1<br>" + |
+ "<a href=\"http://www.test.com/foo/bar/2\">2</a>" + |
+ "<a href=\"http://www.test.com/foo/bar/3\">3</a>" + |
+ "<p>Main content</p>" + |
+ "1<br>" + |
+ "<a href=\"http://www.test.com/foo/bar/2\">2</a>" + |
+ "<a href=\"http://www.test.com/foo/bar/3\">3</a>"); |
+ assertEquals(3, info.mAllPageInfo.size()); |
+ PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); |
+ assertEquals(1, page.mPageNum); |
+ assertEquals(BASE_URL + "foo/bar", page.mUrl); |
+ page = info.mAllPageInfo.get(1); |
+ assertEquals(2, page.mPageNum); |
+ assertEquals(BASE_URL + "foo/bar/2", page.mUrl); |
+ page = info.mAllPageInfo.get(2); |
+ assertEquals(3, page.mPageNum); |
+ assertEquals(BASE_URL + "foo/bar/3", page.mUrl); |
+ assertEquals(BASE_URL + "foo/bar/2", info.mNextPagingUrl); |
+ } |
+ |
+ public void testPreferPageNumber() { |
+ PageParamInfo info = processDocument( |
+ "<a href=\"http://www.test.com/foo/bar/size-25\">25</a>" + |
+ "<a href=\"http://www.test.com/foo/bar/size-50\">50</a>" + |
+ "<a href=\"http://www.test.com/foo/bar/size-100\">100</a>" + |
+ "<p>Main content</p>" + |
+ "1<br>" + |
+ "<a href=\"http://www.test.com/foo/bar/2\">2</a>" + |
+ "<a href=\"http://www.test.com/foo/bar/3\">3</a>"); |
+ assertEquals(PageParamInfo.Type.PAGE_NUMBER, info.mType); |
+ assertEquals(3, info.mAllPageInfo.size()); |
+ PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); |
+ assertEquals(1, page.mPageNum); |
+ assertEquals(BASE_URL + "foo/bar", page.mUrl); |
+ page = info.mAllPageInfo.get(1); |
+ assertEquals(2, page.mPageNum); |
+ assertEquals(BASE_URL + "foo/bar/2", page.mUrl); |
+ page = info.mAllPageInfo.get(2); |
+ assertEquals(3, page.mPageNum); |
+ assertEquals(BASE_URL + "foo/bar/3", page.mUrl); |
+ assertEquals(BASE_URL + "foo/bar/2", info.mNextPagingUrl); |
+ } |
+ |
+ public void testRejectMultiplePageNumberPatterns() { |
+ PageParamInfo info = processDocumentWithoutBase( |
+ "<a href=\"http://www.google.com/test/list.php?start=10\">2</a>" + |
+ "<a href=\"http://www.google.com/test/list.php?start=20\">3</a>" + |
+ "<a href=\"http://www.google.com/test/list.php?start=30\">4</a>" + |
+ "<p>Main content</p>" + |
+ "<a href=\"http://www.google.com/test/list.php?offset=10\">2</a>" + |
+ "<a href=\"http://www.google.com/test/list.php?offset=20\">3</a>" + |
+ "<a href=\"http://www.google.com/test/list.php?offset=30\">4</a>" + |
+ "<a href=\"http://www.google.com/test/list.php?offset=all\">All</a>", |
+ "http://www.google.com/test/list.php"); |
+ |
+ assertEquals(PageParamInfo.Type.PAGE_NUMBER, info.mType); |
+ assertEquals(4, info.mAllPageInfo.size()); |
+ PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); |
+ assertEquals(1, page.mPageNum); |
+ assertEquals("http://www.google.com/test/list.php", page.mUrl); |
+ page = info.mAllPageInfo.get(1); |
+ assertEquals(2, page.mPageNum); |
+ assertEquals("http://www.google.com/test/list.php?start=10", page.mUrl); |
+ page = info.mAllPageInfo.get(2); |
+ assertEquals(3, page.mPageNum); |
+ assertEquals("http://www.google.com/test/list.php?start=20", page.mUrl); |
+ page = info.mAllPageInfo.get(3); |
+ assertEquals(4, page.mPageNum); |
+ assertEquals("http://www.google.com/test/list.php?start=30", page.mUrl); |
+ assertTrue(info.mFormula != null); |
+ assertEquals(10, info.mFormula.mCoefficient); |
+ assertEquals(-10, info.mFormula.mDelta); |
+ assertEquals("http://www.google.com/test/list.php?start=10", info.mNextPagingUrl); |
+ } |
+ |
+ public void testInvalidAndVoidLinks() { |
+ PageParamInfo info = processDocument( |
+ "1<br>" + |
+ "<a href=\"javascript:void(0)\">2</a>"); |
+ PageParameterDetectorTest.expectEmptyPageParamInfo(info); |
+ } |
+ |
+ public void testDifferentHostLinks() { |
+ PageParamInfo info = processDocumentWithoutBase( |
+ "1<br>" + |
+ "<a href=\"http://www.foo.com/foo/bar/2\">2</a>", |
+ TEST_URL); |
+ PageParameterDetectorTest.expectEmptyPageParamInfo(info); |
+ } |
+ |
+ public void testWhitespaceSibling() { |
+ PageParamInfo info = processDocument( |
+ "1<br>" + |
+ " " + |
+ "<a href=\"/foo/bar/2\">2</a>"); |
+ assertEquals(2, info.mAllPageInfo.size()); |
+ } |
+ |
+ public void testPunctuationSibling() { |
+ PageParamInfo info = processDocument( |
+ "<a href=\"/foo/bar/1\">1</a>" + |
+ "," + |
+ "<a href=\"/foo/bar/2\">2</a>"); |
+ assertEquals(2, info.mAllPageInfo.size()); |
+ } |
+ |
+ public void testParentSibling0() { |
wychen
2015/09/21 23:08:03
Should we add tests for things like this to test s
kuan
2015/10/02 15:59:17
Done. fyi, i already had testPuncationSibling() t
|
+ PageParamInfo info = processDocumentWithoutBase( |
+ "<div>begin" + |
+ "<strong>1</strong>" + |
+ "<div><a href=\"http://www.test.com/foo/bar/2\">2</a></div>" + |
+ "<div><a href=\"http://www.test.com/foo/bar/3\">3</a></div>" + |
+ "end</div>", |
+ TEST_URL); |
+ assertEquals(3, info.mAllPageInfo.size()); |
+ PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); |
+ assertEquals(1, page.mPageNum); |
+ assertEquals(TEST_URL, page.mUrl); |
+ page = info.mAllPageInfo.get(1); |
+ assertEquals(2, page.mPageNum); |
+ assertEquals(TEST_URL + "/2", page.mUrl); |
+ page = info.mAllPageInfo.get(2); |
+ assertEquals(3, page.mPageNum); |
+ assertEquals(TEST_URL + "/3", page.mUrl); |
+ assertEquals("http://www.test.com/foo/bar/2", info.mNextPagingUrl); |
+ } |
+ |
+ public void testParentSibling1() { |
+ PageParamInfo info = processDocumentWithoutBase( |
+ "<div>begin" + |
+ "<div><a href=\"http://www.test.com/foo/bar\">1</a></div>" + |
+ "<strong>2</strong>" + |
+ "<div><a href=\"http://www.test.com/foo/bar/3\">3</a></div>" + |
+ "end</div>", |
+ "http://www.test.com/foo/bar/2"); |
+ assertEquals(2, info.mAllPageInfo.size()); |
+ PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); |
+ assertEquals(1, page.mPageNum); |
+ assertEquals(TEST_URL, page.mUrl); |
+ page = info.mAllPageInfo.get(1); |
+ assertEquals(3, page.mPageNum); |
+ assertEquals(TEST_URL + "/3", page.mUrl); |
+ assertEquals("http://www.test.com/foo/bar/3", info.mNextPagingUrl); |
+ } |
+ |
+ public void testParentSibling2() { |
+ PageParamInfo info = processDocumentWithoutBase( |
+ "<div>begin" + |
+ "<div><a href=\"http://www.test.com/foo/bar\">1</a></div>" + |
+ "<div><a href=\"http://www.test.com/foo/bar/2\">2</a></div>" + |
+ "<strong>3</strong>" + |
+ "end</div>", |
+ "http://www.test.com/foo/bar/3"); |
+ assertEquals(2, info.mAllPageInfo.size()); |
+ PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); |
+ assertEquals(1, page.mPageNum); |
+ assertEquals(TEST_URL, page.mUrl); |
+ page = info.mAllPageInfo.get(1); |
+ assertEquals(2, page.mPageNum); |
+ assertEquals(TEST_URL + "/2", page.mUrl); |
+ assertTrue(info.mNextPagingUrl.isEmpty()); |
+ } |
+ |
+ private PageParamInfo processDocument(String content) { |
+ // Create and add a <base> element so that all anchors are based off it. |
+ BaseElement baseTag = Document.get().createBaseElement(); |
+ baseTag.setHref(BASE_URL); |
+ mHead.appendChild(baseTag); |
+ |
+ // Append content to body. |
+ mBody.setInnerHTML(content); |
+ |
+ PageParamInfo info = PageParameterParser.parse(TEST_URL, null); |
+ mHead.removeChild(baseTag); |
+ return info; |
+ } |
+ |
+ private PageParamInfo processDocumentWithoutBase(String content, String originalUrl) { |
+ // Append content to body. |
+ mBody.setInnerHTML(content); |
+ return PageParameterParser.parse(originalUrl, null); |
+ } |
+ |
+} |