OLD | NEW |
(Empty) | |
| 1 // Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 package org.chromium.distiller; |
| 6 |
| 7 import com.google.gwt.dom.client.BaseElement; |
| 8 import com.google.gwt.dom.client.Document; |
| 9 |
| 10 public class PageParameterParserTest extends DomDistillerJsTestCase { |
| 11 private static final String BASE_URL = "http://www.test.com/"; |
| 12 private static final String TEST_URL = BASE_URL + "foo/bar"; |
| 13 |
| 14 public void testBasic() { |
| 15 PageParamInfo info = processDocument( |
| 16 "1<br>" + |
| 17 "<a href=\"/foo/bar/2\">2</a>"); |
| 18 assertEquals(2, info.mAllPageInfo.size()); |
| 19 |
| 20 info = processDocument( |
| 21 "1<br>" + |
| 22 "<a href=\"/foo/bar/2\">2</a>" + |
| 23 "<a href=\"/foo/bar/3\">3</a>"); |
| 24 assertEquals(3, info.mAllPageInfo.size()); |
| 25 } |
| 26 |
| 27 public void testRejectOnlyPage2LinkWithoutCurrentPageText() { |
| 28 // Although there is a digital outlink to 2nd page, there is no plain te
xt "1" |
| 29 // before it, so there is no pagination. |
| 30 PageParamInfo info = processDocument( |
| 31 "If there were a '1', pagination should be detected. But there isn't
." + |
| 32 "<a href=\"/foo/bar/2\">2</a>" + |
| 33 "Main content"); |
| 34 PageParameterDetectorTest.expectEmptyPageParamInfo(info); |
| 35 } |
| 36 |
| 37 public void testRejectNonAdjacentOutlinks() { |
| 38 PageParamInfo info = processDocument( |
| 39 "1<br>" + |
| 40 "Unrelated terms<br>" + |
| 41 "<a href=\"/foo/bar/2\">2</a>" + |
| 42 "Unrelated terms<br>" + |
| 43 "<a href=\"/foo/bar/3\">3</a>" + |
| 44 "<a href=\"/foo/bar/all\">All</a>"); |
| 45 PageParameterDetectorTest.expectEmptyPageParamInfo(info); |
| 46 } |
| 47 |
| 48 public void testAcceptAdjacentOutlinks() { |
| 49 PageParamInfo info = processDocumentWithoutBase( |
| 50 "Unrelated link: <a href=\"http://www.test.com/other/2\">2</a>" + |
| 51 "<p>Main content</p>" + |
| 52 "1<br>" + |
| 53 "<a href=\"http://www.test.com/foo/bar/2\">2</a>" + |
| 54 "<a href=\"http://www.test.com/foo/bar/3\">3</a>", |
| 55 TEST_URL); |
| 56 assertEquals(3, info.mAllPageInfo.size()); |
| 57 PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); |
| 58 assertEquals(1, page.mPageNum); |
| 59 assertEquals(BASE_URL + "foo/bar", page.mUrl); |
| 60 page = info.mAllPageInfo.get(1); |
| 61 assertEquals(2, page.mPageNum); |
| 62 assertEquals(BASE_URL + "foo/bar/2", page.mUrl); |
| 63 page = info.mAllPageInfo.get(2); |
| 64 assertEquals(3, page.mPageNum); |
| 65 assertEquals(BASE_URL + "foo/bar/3", page.mUrl); |
| 66 assertEquals(BASE_URL + "foo/bar/2", info.mNextPagingUrl); |
| 67 } |
| 68 |
| 69 public void testAcceptDuplicatePatterns() { |
| 70 PageParamInfo info = processDocument( |
| 71 "1<br>" + |
| 72 "<a href=\"http://www.test.com/foo/bar/2\">2</a>" + |
| 73 "<a href=\"http://www.test.com/foo/bar/3\">3</a>" + |
| 74 "<p>Main content</p>" + |
| 75 "1<br>" + |
| 76 "<a href=\"http://www.test.com/foo/bar/2\">2</a>" + |
| 77 "<a href=\"http://www.test.com/foo/bar/3\">3</a>"); |
| 78 assertEquals(3, info.mAllPageInfo.size()); |
| 79 PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); |
| 80 assertEquals(1, page.mPageNum); |
| 81 assertEquals(BASE_URL + "foo/bar", page.mUrl); |
| 82 page = info.mAllPageInfo.get(1); |
| 83 assertEquals(2, page.mPageNum); |
| 84 assertEquals(BASE_URL + "foo/bar/2", page.mUrl); |
| 85 page = info.mAllPageInfo.get(2); |
| 86 assertEquals(3, page.mPageNum); |
| 87 assertEquals(BASE_URL + "foo/bar/3", page.mUrl); |
| 88 assertEquals(BASE_URL + "foo/bar/2", info.mNextPagingUrl); |
| 89 } |
| 90 |
| 91 public void testPreferPageNumber() { |
| 92 PageParamInfo info = processDocument( |
| 93 "<a href=\"http://www.test.com/foo/bar/size-25\">25</a>" + |
| 94 "<a href=\"http://www.test.com/foo/bar/size-50\">50</a>" + |
| 95 "<a href=\"http://www.test.com/foo/bar/size-100\">100</a>" + |
| 96 "<p>Main content</p>" + |
| 97 "1<br>" + |
| 98 "<a href=\"http://www.test.com/foo/bar/2\">2</a>" + |
| 99 "<a href=\"http://www.test.com/foo/bar/3\">3</a>"); |
| 100 assertEquals(PageParamInfo.Type.PAGE_NUMBER, info.mType); |
| 101 assertEquals(3, info.mAllPageInfo.size()); |
| 102 PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); |
| 103 assertEquals(1, page.mPageNum); |
| 104 assertEquals(BASE_URL + "foo/bar", page.mUrl); |
| 105 page = info.mAllPageInfo.get(1); |
| 106 assertEquals(2, page.mPageNum); |
| 107 assertEquals(BASE_URL + "foo/bar/2", page.mUrl); |
| 108 page = info.mAllPageInfo.get(2); |
| 109 assertEquals(3, page.mPageNum); |
| 110 assertEquals(BASE_URL + "foo/bar/3", page.mUrl); |
| 111 assertEquals(BASE_URL + "foo/bar/2", info.mNextPagingUrl); |
| 112 } |
| 113 |
| 114 public void testRejectMultiplePageNumberPatterns() { |
| 115 PageParamInfo info = processDocumentWithoutBase( |
| 116 "<a href=\"http://www.google.com/test/list.php?start=10\">2</a>" + |
| 117 "<a href=\"http://www.google.com/test/list.php?start=20\">3</a>" + |
| 118 "<a href=\"http://www.google.com/test/list.php?start=30\">4</a>" + |
| 119 "<p>Main content</p>" + |
| 120 "<a href=\"http://www.google.com/test/list.php?offset=10\">2</a>" + |
| 121 "<a href=\"http://www.google.com/test/list.php?offset=20\">3</a>" + |
| 122 "<a href=\"http://www.google.com/test/list.php?offset=30\">4</a>" + |
| 123 "<a href=\"http://www.google.com/test/list.php?offset=all\">All</a>"
, |
| 124 "http://www.google.com/test/list.php"); |
| 125 |
| 126 assertEquals(PageParamInfo.Type.PAGE_NUMBER, info.mType); |
| 127 assertEquals(4, info.mAllPageInfo.size()); |
| 128 PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); |
| 129 assertEquals(1, page.mPageNum); |
| 130 assertEquals("http://www.google.com/test/list.php", page.mUrl); |
| 131 page = info.mAllPageInfo.get(1); |
| 132 assertEquals(2, page.mPageNum); |
| 133 assertEquals("http://www.google.com/test/list.php?start=10", page.mUrl); |
| 134 page = info.mAllPageInfo.get(2); |
| 135 assertEquals(3, page.mPageNum); |
| 136 assertEquals("http://www.google.com/test/list.php?start=20", page.mUrl); |
| 137 page = info.mAllPageInfo.get(3); |
| 138 assertEquals(4, page.mPageNum); |
| 139 assertEquals("http://www.google.com/test/list.php?start=30", page.mUrl); |
| 140 assertTrue(info.mFormula != null); |
| 141 assertEquals(10, info.mFormula.mCoefficient); |
| 142 assertEquals(-10, info.mFormula.mDelta); |
| 143 assertEquals("http://www.google.com/test/list.php?start=10", info.mNextP
agingUrl); |
| 144 } |
| 145 |
| 146 public void testInvalidAndVoidLinks() { |
| 147 PageParamInfo info = processDocument( |
| 148 "1<br>" + |
| 149 "<a href=\"javascript:void(0)\">2</a>"); |
| 150 PageParameterDetectorTest.expectEmptyPageParamInfo(info); |
| 151 } |
| 152 |
| 153 public void testDifferentHostLinks() { |
| 154 PageParamInfo info = processDocumentWithoutBase( |
| 155 "1<br>" + |
| 156 "<a href=\"http://www.foo.com/foo/bar/2\">2</a>", |
| 157 TEST_URL); |
| 158 PageParameterDetectorTest.expectEmptyPageParamInfo(info); |
| 159 } |
| 160 |
| 161 public void testWhitespaceSibling() { |
| 162 PageParamInfo info = processDocument( |
| 163 "1<br>" + |
| 164 " " + |
| 165 "<a href=\"/foo/bar/2\">2</a>"); |
| 166 assertEquals(2, info.mAllPageInfo.size()); |
| 167 } |
| 168 |
| 169 public void testPunctuationSibling() { |
| 170 PageParamInfo info = processDocument( |
| 171 "<a href=\"/foo/bar/1\">1</a>" + |
| 172 "," + |
| 173 "<a href=\"/foo/bar/2\">2</a>"); |
| 174 assertEquals(2, info.mAllPageInfo.size()); |
| 175 } |
| 176 |
| 177 public void testSeparatorSibling() { |
| 178 PageParamInfo info = processDocument( |
| 179 "<div>" + |
| 180 "1 | " + |
| 181 "<a href=\"/foo/bar/2\">2</a>" + |
| 182 " | " + |
| 183 "<a href=\"/foo/bar/3\">3</a>" + |
| 184 "</div>"); |
| 185 assertEquals(3, info.mAllPageInfo.size()); |
| 186 } |
| 187 |
| 188 public void testParentSibling0() { |
| 189 PageParamInfo info = processDocumentWithoutBase( |
| 190 "<div>begin" + |
| 191 "<strong>1</strong>" + |
| 192 "<div><a href=\"http://www.test.com/foo/bar/2\">2</a></div>" + |
| 193 "<div><a href=\"http://www.test.com/foo/bar/3\">3</a></div>" + |
| 194 "end</div>", |
| 195 TEST_URL); |
| 196 assertEquals(3, info.mAllPageInfo.size()); |
| 197 PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); |
| 198 assertEquals(1, page.mPageNum); |
| 199 assertEquals(TEST_URL, page.mUrl); |
| 200 page = info.mAllPageInfo.get(1); |
| 201 assertEquals(2, page.mPageNum); |
| 202 assertEquals(TEST_URL + "/2", page.mUrl); |
| 203 page = info.mAllPageInfo.get(2); |
| 204 assertEquals(3, page.mPageNum); |
| 205 assertEquals(TEST_URL + "/3", page.mUrl); |
| 206 assertEquals("http://www.test.com/foo/bar/2", info.mNextPagingUrl); |
| 207 } |
| 208 |
| 209 public void testParentSibling1() { |
| 210 PageParamInfo info = processDocumentWithoutBase( |
| 211 "<div>begin" + |
| 212 "<div><a href=\"http://www.test.com/foo/bar\">1</a></div>" + |
| 213 "<strong>2</strong>" + |
| 214 "<div><a href=\"http://www.test.com/foo/bar/3\">3</a></div>" + |
| 215 "end</div>", |
| 216 "http://www.test.com/foo/bar/2"); |
| 217 assertEquals(2, info.mAllPageInfo.size()); |
| 218 PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); |
| 219 assertEquals(1, page.mPageNum); |
| 220 assertEquals(TEST_URL, page.mUrl); |
| 221 page = info.mAllPageInfo.get(1); |
| 222 assertEquals(3, page.mPageNum); |
| 223 assertEquals(TEST_URL + "/3", page.mUrl); |
| 224 assertEquals("http://www.test.com/foo/bar/3", info.mNextPagingUrl); |
| 225 } |
| 226 |
| 227 public void testParentSibling2() { |
| 228 PageParamInfo info = processDocumentWithoutBase( |
| 229 "<div>begin" + |
| 230 "<div><a href=\"http://www.test.com/foo/bar\">1</a></div>" + |
| 231 "<div><a href=\"http://www.test.com/foo/bar/2\">2</a></div>" + |
| 232 "<strong>3</strong>" + |
| 233 "end</div>", |
| 234 "http://www.test.com/foo/bar/3"); |
| 235 assertEquals(2, info.mAllPageInfo.size()); |
| 236 PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); |
| 237 assertEquals(1, page.mPageNum); |
| 238 assertEquals(TEST_URL, page.mUrl); |
| 239 page = info.mAllPageInfo.get(1); |
| 240 assertEquals(2, page.mPageNum); |
| 241 assertEquals(TEST_URL + "/2", page.mUrl); |
| 242 assertTrue(info.mNextPagingUrl.isEmpty()); |
| 243 } |
| 244 |
| 245 public void testNestedStructure() { |
| 246 PageParamInfo info = processDocumentWithoutBase( |
| 247 "<div>begin" + |
| 248 "<span><a href=\"http://www.test.com/foo?page=2\">‹‹ P
rev</a></span>" + |
| 249 "<span><a href=\"http://www.test.com/foo?page=1\">1</a></span>" + |
| 250 "<span><a href=\"http://www.test.com/foo?page=2\">2</a></span>" + |
| 251 "<span>3</span>" + |
| 252 "<span><a href=\"http://www.test.com/foo?page=4\">4</a></span>" + |
| 253 "<span><a href=\"http://www.test.com/foo?page=5\">5</a></span>" + |
| 254 "<span>...</span>" + |
| 255 "<span><a href=\"http://www.test.com/foo?page=48\">48</a></span>" + |
| 256 "<span><a href=\"http://www.test.com/foo?page=4\">Next ›&rsaq
uo;</a></span>" + |
| 257 "</div>", |
| 258 "http://www.test.com/foo?page=3"); |
| 259 assertEquals(5, info.mAllPageInfo.size()); |
| 260 final String urlPrefix = "http://www.test.com/foo?page="; |
| 261 PageParamInfo.PageInfo page = info.mAllPageInfo.get(0); |
| 262 assertEquals(1, page.mPageNum); |
| 263 assertEquals(urlPrefix + "1", page.mUrl); |
| 264 page = info.mAllPageInfo.get(1); |
| 265 assertEquals(2, page.mPageNum); |
| 266 assertEquals(urlPrefix + "2", page.mUrl); |
| 267 page = info.mAllPageInfo.get(2); |
| 268 assertEquals(4, page.mPageNum); |
| 269 assertEquals(urlPrefix + "4", page.mUrl); |
| 270 page = info.mAllPageInfo.get(3); |
| 271 assertEquals(5, page.mPageNum); |
| 272 assertEquals(urlPrefix + "5", page.mUrl); |
| 273 page = info.mAllPageInfo.get(4); |
| 274 assertEquals(48, page.mPageNum); |
| 275 assertEquals(urlPrefix + "48", page.mUrl); |
| 276 assertEquals(urlPrefix + "4", info.mNextPagingUrl); |
| 277 } |
| 278 |
| 279 private PageParamInfo processDocument(String content) { |
| 280 // Create and add a <base> element so that all anchors are based off it. |
| 281 BaseElement baseTag = Document.get().createBaseElement(); |
| 282 baseTag.setHref(BASE_URL); |
| 283 mHead.appendChild(baseTag); |
| 284 |
| 285 // Append content to body. |
| 286 mBody.setInnerHTML(content); |
| 287 |
| 288 PageParamInfo info = PageParameterParser.parse(TEST_URL, null); |
| 289 mHead.removeChild(baseTag); |
| 290 return info; |
| 291 } |
| 292 |
| 293 private PageParamInfo processDocumentWithoutBase(String content, String orig
inalUrl) { |
| 294 // Append content to body. |
| 295 mBody.setInnerHTML(content); |
| 296 return PageParameterParser.parse(originalUrl, null); |
| 297 } |
| 298 |
| 299 } |
OLD | NEW |