| OLD | NEW | 
|---|
| (Empty) |  | 
|  | 1 // Copyright 2015 The Chromium Authors. All rights reserved. | 
|  | 2 // Use of this source code is governed by a BSD-style license that can be | 
|  | 3 // found in the LICENSE file. | 
|  | 4 | 
|  | 5 package org.chromium.distiller; | 
|  | 6 | 
|  | 7 import com.google.gwt.dom.client.BaseElement; | 
|  | 8 import com.google.gwt.dom.client.Document; | 
|  | 9 | 
|  | 10 public class PageParameterParserTest extends DomDistillerJsTestCase { | 
|  | 11     private static final String BASE_URL = "http://www.test.com/"; | 
|  | 12     private static final String TEST_URL = BASE_URL + "foo/bar"; | 
|  | 13 | 
|  | 14     public void testBasic() { | 
|  | 15         PageParamInfo info = processDocument( | 
|  | 16             "1<br>" + | 
|  | 17             "<a href=\"/foo/bar/2\">2</a>"); | 
|  | 18         assertEquals(2, info.mAllPageInfo.size()); | 
|  | 19 | 
|  | 20         info = processDocument( | 
|  | 21             "1<br>" + | 
|  | 22             "<a href=\"/foo/bar/2\">2</a>" + | 
|  | 23             "<a href=\"/foo/bar/3\">3</a>"); | 
|  | 24         assertEquals(3, info.mAllPageInfo.size()); | 
|  | 25     } | 
|  | 26 | 
|  | 27     public void testRejectOnlyPage2LinkWithoutCurrentPageText() { | 
|  | 28         // Although there is a digital outlink to 2nd page, there is no plain te
     xt "1" | 
|  | 29         // before it, so there is no pagination. | 
|  | 30         PageParamInfo info = processDocument( | 
|  | 31             "If there were a '1', pagination should be detected. But there isn't
     ." + | 
|  | 32             "<a href=\"/foo/bar/2\">2</a>" + | 
|  | 33             "Main content"); | 
|  | 34         PageParameterDetectorTest.expectEmptyPageParamInfo(info); | 
|  | 35     } | 
|  | 36 | 
|  | 37     public void testRejectNonAdjacentOutlinks() { | 
|  | 38         PageParamInfo info = processDocument( | 
|  | 39             "1<br>" + | 
|  | 40             "Unrelated terms<br>" + | 
|  | 41             "<a href=\"/foo/bar/2\">2</a>" + | 
|  | 42             "Unrelated terms<br>" + | 
|  | 43             "<a href=\"/foo/bar/3\">3</a>" + | 
|  | 44             "<a href=\"/foo/bar/all\">All</a>"); | 
|  | 45         PageParameterDetectorTest.expectEmptyPageParamInfo(info); | 
|  | 46     } | 
|  | 47 | 
|  | 48     public void testAcceptAdjacentOutlinks() { | 
|  | 49         PageParamInfo info = processDocumentWithoutBase( | 
|  | 50             "Unrelated link: <a href=\"http://www.test.com/other/2\">2</a>" + | 
|  | 51             "<p>Main content</p>" + | 
|  | 52             "1<br>" + | 
|  | 53             "<a href=\"http://www.test.com/foo/bar/2\">2</a>" + | 
|  | 54             "<a href=\"http://www.test.com/foo/bar/3\">3</a>", | 
|  | 55             TEST_URL); | 
|  | 56         assertEquals(3, info.mAllPageInfo.size()); | 
|  | 57         PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); | 
|  | 58         assertEquals(1, page.mPageNum); | 
|  | 59         assertEquals(BASE_URL + "foo/bar", page.mUrl); | 
|  | 60         page = info.mAllPageInfo.get(1); | 
|  | 61         assertEquals(2, page.mPageNum); | 
|  | 62         assertEquals(BASE_URL + "foo/bar/2", page.mUrl); | 
|  | 63         page = info.mAllPageInfo.get(2); | 
|  | 64         assertEquals(3, page.mPageNum); | 
|  | 65         assertEquals(BASE_URL + "foo/bar/3", page.mUrl); | 
|  | 66         assertEquals(BASE_URL + "foo/bar/2", info.mNextPagingUrl); | 
|  | 67     } | 
|  | 68 | 
|  | 69     public void testAcceptDuplicatePatterns() { | 
|  | 70         PageParamInfo info = processDocument( | 
|  | 71             "1<br>" + | 
|  | 72             "<a href=\"http://www.test.com/foo/bar/2\">2</a>" + | 
|  | 73             "<a href=\"http://www.test.com/foo/bar/3\">3</a>" + | 
|  | 74             "<p>Main content</p>" + | 
|  | 75             "1<br>" + | 
|  | 76             "<a href=\"http://www.test.com/foo/bar/2\">2</a>" + | 
|  | 77             "<a href=\"http://www.test.com/foo/bar/3\">3</a>"); | 
|  | 78         assertEquals(3, info.mAllPageInfo.size()); | 
|  | 79         PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); | 
|  | 80         assertEquals(1, page.mPageNum); | 
|  | 81         assertEquals(BASE_URL + "foo/bar", page.mUrl); | 
|  | 82         page = info.mAllPageInfo.get(1); | 
|  | 83         assertEquals(2, page.mPageNum); | 
|  | 84         assertEquals(BASE_URL + "foo/bar/2", page.mUrl); | 
|  | 85         page = info.mAllPageInfo.get(2); | 
|  | 86         assertEquals(3, page.mPageNum); | 
|  | 87         assertEquals(BASE_URL + "foo/bar/3", page.mUrl); | 
|  | 88         assertEquals(BASE_URL + "foo/bar/2", info.mNextPagingUrl); | 
|  | 89     } | 
|  | 90 | 
|  | 91     public void testPreferPageNumber() { | 
|  | 92         PageParamInfo info = processDocument( | 
|  | 93             "<a href=\"http://www.test.com/foo/bar/size-25\">25</a>" + | 
|  | 94             "<a href=\"http://www.test.com/foo/bar/size-50\">50</a>" + | 
|  | 95             "<a href=\"http://www.test.com/foo/bar/size-100\">100</a>" + | 
|  | 96             "<p>Main content</p>" + | 
|  | 97             "1<br>" + | 
|  | 98             "<a href=\"http://www.test.com/foo/bar/2\">2</a>" + | 
|  | 99             "<a href=\"http://www.test.com/foo/bar/3\">3</a>"); | 
|  | 100         assertEquals(PageParamInfo.Type.PAGE_NUMBER, info.mType); | 
|  | 101         assertEquals(3, info.mAllPageInfo.size()); | 
|  | 102         PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); | 
|  | 103         assertEquals(1, page.mPageNum); | 
|  | 104         assertEquals(BASE_URL + "foo/bar", page.mUrl); | 
|  | 105         page = info.mAllPageInfo.get(1); | 
|  | 106         assertEquals(2, page.mPageNum); | 
|  | 107         assertEquals(BASE_URL + "foo/bar/2", page.mUrl); | 
|  | 108         page = info.mAllPageInfo.get(2); | 
|  | 109         assertEquals(3, page.mPageNum); | 
|  | 110         assertEquals(BASE_URL + "foo/bar/3", page.mUrl); | 
|  | 111         assertEquals(BASE_URL + "foo/bar/2", info.mNextPagingUrl); | 
|  | 112     } | 
|  | 113 | 
|  | 114     public void testRejectMultiplePageNumberPatterns() { | 
|  | 115         PageParamInfo info = processDocumentWithoutBase( | 
|  | 116             "<a href=\"http://www.google.com/test/list.php?start=10\">2</a>" + | 
|  | 117             "<a href=\"http://www.google.com/test/list.php?start=20\">3</a>" + | 
|  | 118             "<a href=\"http://www.google.com/test/list.php?start=30\">4</a>" + | 
|  | 119             "<p>Main content</p>" + | 
|  | 120             "<a href=\"http://www.google.com/test/list.php?offset=10\">2</a>" + | 
|  | 121             "<a href=\"http://www.google.com/test/list.php?offset=20\">3</a>" + | 
|  | 122             "<a href=\"http://www.google.com/test/list.php?offset=30\">4</a>" + | 
|  | 123             "<a href=\"http://www.google.com/test/list.php?offset=all\">All</a>"
     , | 
|  | 124             "http://www.google.com/test/list.php"); | 
|  | 125 | 
|  | 126         assertEquals(PageParamInfo.Type.PAGE_NUMBER, info.mType); | 
|  | 127         assertEquals(4, info.mAllPageInfo.size()); | 
|  | 128         PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); | 
|  | 129         assertEquals(1, page.mPageNum); | 
|  | 130         assertEquals("http://www.google.com/test/list.php", page.mUrl); | 
|  | 131         page = info.mAllPageInfo.get(1); | 
|  | 132         assertEquals(2, page.mPageNum); | 
|  | 133         assertEquals("http://www.google.com/test/list.php?start=10", page.mUrl); | 
|  | 134         page = info.mAllPageInfo.get(2); | 
|  | 135         assertEquals(3, page.mPageNum); | 
|  | 136         assertEquals("http://www.google.com/test/list.php?start=20", page.mUrl); | 
|  | 137         page = info.mAllPageInfo.get(3); | 
|  | 138         assertEquals(4, page.mPageNum); | 
|  | 139         assertEquals("http://www.google.com/test/list.php?start=30", page.mUrl); | 
|  | 140         assertTrue(info.mFormula != null); | 
|  | 141         assertEquals(10, info.mFormula.mCoefficient); | 
|  | 142         assertEquals(-10, info.mFormula.mDelta); | 
|  | 143         assertEquals("http://www.google.com/test/list.php?start=10", info.mNextP
     agingUrl); | 
|  | 144     } | 
|  | 145 | 
|  | 146     public void testInvalidAndVoidLinks() { | 
|  | 147         PageParamInfo info = processDocument( | 
|  | 148             "1<br>" + | 
|  | 149             "<a href=\"javascript:void(0)\">2</a>"); | 
|  | 150         PageParameterDetectorTest.expectEmptyPageParamInfo(info); | 
|  | 151     } | 
|  | 152 | 
|  | 153     public void testDifferentHostLinks() { | 
|  | 154         PageParamInfo info = processDocumentWithoutBase( | 
|  | 155             "1<br>" + | 
|  | 156             "<a href=\"http://www.foo.com/foo/bar/2\">2</a>", | 
|  | 157             TEST_URL); | 
|  | 158         PageParameterDetectorTest.expectEmptyPageParamInfo(info); | 
|  | 159     } | 
|  | 160 | 
|  | 161     public void testWhitespaceSibling() { | 
|  | 162         PageParamInfo info = processDocument( | 
|  | 163             "1<br>" + | 
|  | 164             "       " + | 
|  | 165             "<a href=\"/foo/bar/2\">2</a>"); | 
|  | 166         assertEquals(2, info.mAllPageInfo.size()); | 
|  | 167     } | 
|  | 168 | 
|  | 169     public void testPunctuationSibling() { | 
|  | 170         PageParamInfo info = processDocument( | 
|  | 171             "<a href=\"/foo/bar/1\">1</a>" + | 
|  | 172             "," + | 
|  | 173             "<a href=\"/foo/bar/2\">2</a>"); | 
|  | 174         assertEquals(2, info.mAllPageInfo.size()); | 
|  | 175     } | 
|  | 176 | 
|  | 177     public void testSeparatorSibling() { | 
|  | 178         PageParamInfo info = processDocument( | 
|  | 179             "<div>" + | 
|  | 180             "1 | " + | 
|  | 181             "<a href=\"/foo/bar/2\">2</a>" + | 
|  | 182             " | " + | 
|  | 183             "<a href=\"/foo/bar/3\">3</a>" + | 
|  | 184             "</div>"); | 
|  | 185         assertEquals(3, info.mAllPageInfo.size()); | 
|  | 186     } | 
|  | 187 | 
|  | 188     public void testParentSibling0() { | 
|  | 189         PageParamInfo info = processDocumentWithoutBase( | 
|  | 190             "<div>begin" + | 
|  | 191             "<strong>1</strong>" + | 
|  | 192             "<div><a href=\"http://www.test.com/foo/bar/2\">2</a></div>" + | 
|  | 193             "<div><a href=\"http://www.test.com/foo/bar/3\">3</a></div>" + | 
|  | 194             "end</div>", | 
|  | 195             TEST_URL); | 
|  | 196         assertEquals(3, info.mAllPageInfo.size()); | 
|  | 197         PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); | 
|  | 198         assertEquals(1, page.mPageNum); | 
|  | 199         assertEquals(TEST_URL, page.mUrl); | 
|  | 200         page = info.mAllPageInfo.get(1); | 
|  | 201         assertEquals(2, page.mPageNum); | 
|  | 202         assertEquals(TEST_URL + "/2", page.mUrl); | 
|  | 203         page = info.mAllPageInfo.get(2); | 
|  | 204         assertEquals(3, page.mPageNum); | 
|  | 205         assertEquals(TEST_URL + "/3", page.mUrl); | 
|  | 206         assertEquals("http://www.test.com/foo/bar/2", info.mNextPagingUrl); | 
|  | 207     } | 
|  | 208 | 
|  | 209     public void testParentSibling1() { | 
|  | 210         PageParamInfo info = processDocumentWithoutBase( | 
|  | 211             "<div>begin" + | 
|  | 212             "<div><a href=\"http://www.test.com/foo/bar\">1</a></div>" + | 
|  | 213             "<strong>2</strong>" + | 
|  | 214             "<div><a href=\"http://www.test.com/foo/bar/3\">3</a></div>" + | 
|  | 215             "end</div>", | 
|  | 216             "http://www.test.com/foo/bar/2"); | 
|  | 217         assertEquals(2, info.mAllPageInfo.size()); | 
|  | 218         PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); | 
|  | 219         assertEquals(1, page.mPageNum); | 
|  | 220         assertEquals(TEST_URL, page.mUrl); | 
|  | 221         page = info.mAllPageInfo.get(1); | 
|  | 222         assertEquals(3, page.mPageNum); | 
|  | 223         assertEquals(TEST_URL + "/3", page.mUrl); | 
|  | 224         assertEquals("http://www.test.com/foo/bar/3", info.mNextPagingUrl); | 
|  | 225     } | 
|  | 226 | 
|  | 227     public void testParentSibling2() { | 
|  | 228         PageParamInfo info = processDocumentWithoutBase( | 
|  | 229             "<div>begin" + | 
|  | 230             "<div><a href=\"http://www.test.com/foo/bar\">1</a></div>" + | 
|  | 231             "<div><a href=\"http://www.test.com/foo/bar/2\">2</a></div>" + | 
|  | 232             "<strong>3</strong>" + | 
|  | 233             "end</div>", | 
|  | 234             "http://www.test.com/foo/bar/3"); | 
|  | 235         assertEquals(2, info.mAllPageInfo.size()); | 
|  | 236         PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); | 
|  | 237         assertEquals(1, page.mPageNum); | 
|  | 238         assertEquals(TEST_URL, page.mUrl); | 
|  | 239         page = info.mAllPageInfo.get(1); | 
|  | 240         assertEquals(2, page.mPageNum); | 
|  | 241         assertEquals(TEST_URL + "/2", page.mUrl); | 
|  | 242         assertTrue(info.mNextPagingUrl.isEmpty()); | 
|  | 243     } | 
|  | 244 | 
|  | 245     public void testNestedStructure() { | 
|  | 246         PageParamInfo info = processDocumentWithoutBase( | 
|  | 247             "<div>begin" + | 
|  | 248             "<span><a href=\"http://www.test.com/foo?page=2\">‹‹ P
     rev</a></span>" + | 
|  | 249             "<span><a href=\"http://www.test.com/foo?page=1\">1</a></span>" + | 
|  | 250             "<span><a href=\"http://www.test.com/foo?page=2\">2</a></span>" + | 
|  | 251             "<span>3</span>" + | 
|  | 252             "<span><a href=\"http://www.test.com/foo?page=4\">4</a></span>" + | 
|  | 253             "<span><a href=\"http://www.test.com/foo?page=5\">5</a></span>" + | 
|  | 254             "<span>...</span>" + | 
|  | 255             "<span><a href=\"http://www.test.com/foo?page=48\">48</a></span>" + | 
|  | 256             "<span><a href=\"http://www.test.com/foo?page=4\">Next ›&rsaq
     uo;</a></span>" + | 
|  | 257             "</div>", | 
|  | 258             "http://www.test.com/foo?page=3"); | 
|  | 259         assertEquals(5, info.mAllPageInfo.size()); | 
|  | 260         final String urlPrefix = "http://www.test.com/foo?page="; | 
|  | 261         PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); | 
|  | 262         assertEquals(1, page.mPageNum); | 
|  | 263         assertEquals(urlPrefix + "1", page.mUrl); | 
|  | 264         page = info.mAllPageInfo.get(1); | 
|  | 265         assertEquals(2, page.mPageNum); | 
|  | 266         assertEquals(urlPrefix + "2", page.mUrl); | 
|  | 267         page = info.mAllPageInfo.get(2); | 
|  | 268         assertEquals(4, page.mPageNum); | 
|  | 269         assertEquals(urlPrefix + "4", page.mUrl); | 
|  | 270         page = info.mAllPageInfo.get(3); | 
|  | 271         assertEquals(5, page.mPageNum); | 
|  | 272         assertEquals(urlPrefix + "5", page.mUrl); | 
|  | 273         page = info.mAllPageInfo.get(4); | 
|  | 274         assertEquals(48, page.mPageNum); | 
|  | 275         assertEquals(urlPrefix + "48", page.mUrl); | 
|  | 276         assertEquals(urlPrefix + "4", info.mNextPagingUrl); | 
|  | 277     } | 
|  | 278 | 
|  | 279     private PageParamInfo processDocument(String content) { | 
|  | 280         // Create and add a <base> element so that all anchors are based off it. | 
|  | 281         BaseElement baseTag = Document.get().createBaseElement(); | 
|  | 282         baseTag.setHref(BASE_URL); | 
|  | 283         mHead.appendChild(baseTag); | 
|  | 284 | 
|  | 285         // Append content to body. | 
|  | 286         mBody.setInnerHTML(content); | 
|  | 287 | 
|  | 288         PageParamInfo info = PageParameterParser.parse(TEST_URL, null); | 
|  | 289         mHead.removeChild(baseTag); | 
|  | 290         return info; | 
|  | 291     } | 
|  | 292 | 
|  | 293     private PageParamInfo processDocumentWithoutBase(String content, String orig
     inalUrl) { | 
|  | 294         // Append content to body. | 
|  | 295         mBody.setInnerHTML(content); | 
|  | 296         return PageParameterParser.parse(originalUrl, null); | 
|  | 297     } | 
|  | 298 | 
|  | 299 } | 
| OLD | NEW | 
|---|