OLD | NEW |
(Empty) | |
| 1 package org.chromium.distiller.webdocument.filters; |
| 2 |
| 3 import org.chromium.distiller.webdocument.WebDocument; |
| 4 import org.chromium.distiller.webdocument.WebElement; |
| 5 import org.chromium.distiller.webdocument.WebTag; |
| 6 |
| 7 import java.util.Stack; |
| 8 |
| 9 /** |
| 10 * This class is used to identify what WebTag should be |
| 11 * marked as <i>isContent</i> based on its {@link WebElement}s inside. |
| 12 * A {@link WebTag} is content when: |
| 13 * <ul> |
| 14 * <li>Has any {@link WebElement} which is content.</li> |
| 15 * <li>Has at least one nested {@link WebTag} which is content.</li> |
| 16 * </ul> |
| 17 */ |
| 18 public class NestedElementRetainer { |
| 19 public static void process(WebDocument document) { |
| 20 boolean isContent = false; |
| 21 int stackMark = -1; |
| 22 Stack<WebTag> stack = new Stack<>(); |
| 23 |
| 24 for (WebElement e : document.getElements()) { |
| 25 if (!(e instanceof WebTag)) { |
| 26 if (!isContent) { |
| 27 isContent = e.getIsContent(); |
| 28 } |
| 29 } else { |
| 30 WebTag webTag = (WebTag) e; |
| 31 if (webTag.isStartTag()) { |
| 32 webTag.setIsContent(isContent); |
| 33 stack.push(webTag); |
| 34 isContent = false; |
| 35 } else { |
| 36 WebTag startWebTag = stack.pop(); |
| 37 isContent |= stackMark >= stack.size(); |
| 38 if (isContent) { |
| 39 stackMark = stack.size() - 1; |
| 40 } |
| 41 boolean wasContent = startWebTag.getIsContent(); |
| 42 startWebTag.setIsContent(isContent); |
| 43 webTag.setIsContent(isContent); |
| 44 isContent = wasContent; |
| 45 } |
| 46 } |
| 47 } |
| 48 } |
| 49 } |
OLD | NEW |