OLD | NEW |
---|---|
(Empty) | |
1 #import ("dart:html"); | |
nweiz
2012/02/01 00:10:39
This whole file could use considerably more docume
Jacob
2012/02/01 07:48:26
I agree that this script needs a larger comment de
nweiz
2012/02/01 21:23:02
Some of the methods are certainly self-explanatory
Jacob
2012/02/02 22:03:14
Done.
| |
2 #import ("dart:htmlimpl"); | |
3 #import ("dart:dom", prefix:"dom"); | |
4 #import ("dart:json"); | |
5 | |
6 // Workaround for HTML lib missing feature. | |
7 Range newRange() { | |
8 return LevelDom.wrapRange(dom.document.createRange()); | |
9 } | |
10 | |
11 // Temporary range object to optimize performance computing client rects | |
12 // from text nodes. | |
13 Range _tempRange; | |
14 // Hacks because ASYNC measurement is annoying when just writing a script. | |
15 ClientRect getClientRect(Node n) { | |
16 if (n is Element) { | |
17 Element e = n; | |
nweiz
2012/02/01 00:10:39
You're just calling e.dynamic on the next line :-p
Jacob
2012/02/01 07:48:26
Notice that this method is documented as a hack to
| |
18 dom.Element raw = unwrapDomObject(e.dynamic); | |
19 return LevelDom.wrapClientRect(raw.getBoundingClientRect()); | |
20 } else { | |
21 // Crazy hacks that works for nodes.... create a range and measure it. | |
22 if (_tempRange == null) { | |
23 _tempRange = newRange(); | |
24 } | |
25 _tempRange.setStartBefore(n); | |
26 _tempRange.setEndAfter(n); | |
27 return _tempRange.getBoundingClientRect(); | |
28 } | |
29 } | |
30 | |
31 final DART_REMOVED = "dart_removed"; | |
nweiz
2012/02/01 00:10:39
What are the semantics of this class?
Jacob
2012/02/01 07:48:26
documented
| |
32 | |
33 final DEBUG_CSS = """ | |
34 <style type="text/css"> | |
35 .dart_removed { | |
36 background-color: rgba(255, 0, 0, 0.5); | |
37 } | |
38 </style>"""; | |
39 | |
40 final MIN_PIXELS_DIFFERENT_LINES = 10; | |
41 | |
42 final IDL_SELECTOR = "pre.eval, pre.idl"; | |
43 | |
44 Map data; | |
45 | |
46 // TODO(rnystrom): Hack! Copied from domTypes.json. | |
Jacob
2012/01/30 21:36:21
Rather than hacking this here, use the same trick
Bob Nystrom
2012/01/31 20:56:29
For now added a comment explaining we should be do
| |
47 var domTypes; | |
48 final domTypesRaw = const [ | |
49 "AbstractWorker", "ArrayBuffer", "ArrayBufferView", "Attr", | |
50 "AudioBuffer", "AudioBufferSourceNode", "AudioChannelMerger", | |
51 "AudioChannelSplitter", "AudioContext", "AudioDestinationNode", | |
52 "AudioGain", "AudioGainNode", "AudioListener", "AudioNode", | |
53 "AudioPannerNode", "AudioParam", "AudioProcessingEvent", | |
54 "AudioSourceNode", "BarInfo", "BeforeLoadEvent", "BiquadFilterNode", | |
55 "Blob", "CDATASection", "CSSCharsetRule", "CSSFontFaceRule", | |
56 "CSSImportRule", "CSSMediaRule", "CSSPageRule", "CSSPrimitiveValue", | |
57 "CSSRule", "CSSRuleList", "CSSStyleDeclaration", "CSSStyleRule", | |
58 "CSSStyleSheet", "CSSUnknownRule", "CSSValue", "CSSValueList", | |
59 "CanvasGradient", "CanvasPattern", "CanvasPixelArray", | |
60 "CanvasRenderingContext", "CanvasRenderingContext2D", | |
61 "CharacterData", "ClientRect", "ClientRectList", "Clipboard", | |
62 "CloseEvent", "Comment", "CompositionEvent", "Console", | |
63 "ConvolverNode", "Coordinates", "Counter", "Crypto", "CustomEvent", | |
64 "DOMApplicationCache", "DOMException", "DOMFileSystem", | |
65 "DOMFileSystemSync", "DOMFormData", "DOMImplementation", | |
66 "DOMMimeType", "DOMMimeTypeArray", "DOMParser", "DOMPlugin", | |
67 "DOMPluginArray", "DOMSelection", "DOMSettableTokenList", | |
68 "DOMTokenList", "DOMURL", "DOMWindow", "DataTransferItem", | |
69 "DataTransferItemList", "DataView", "Database", "DatabaseSync", | |
70 "DedicatedWorkerContext", "DelayNode", "DeviceMotionEvent", | |
71 "DeviceOrientationEvent", "DirectoryEntry", "DirectoryEntrySync", | |
72 "DirectoryReader", "DirectoryReaderSync", "Document", | |
73 "DocumentFragment", "DocumentType", "DynamicsCompressorNode", | |
74 "Element", "ElementTimeControl", "ElementTraversal", "Entity", | |
75 "EntityReference", "Entry", "EntryArray", "EntryArraySync", | |
76 "EntrySync", "ErrorEvent", "Event", "EventException", "EventSource", | |
77 "EventTarget", "File", "FileEntry", "FileEntrySync", "FileError", | |
78 "FileException", "FileList", "FileReader", "FileReaderSync", | |
79 "FileWriter", "FileWriterSync", "Float32Array", "Float64Array", | |
80 "Geolocation", "Geoposition", "HTMLAllCollection", | |
81 "HTMLAnchorElement", "HTMLAppletElement", "HTMLAreaElement", | |
82 "HTMLAudioElement", "HTMLBRElement", "HTMLBaseElement", | |
83 "HTMLBaseFontElement", "HTMLBodyElement", "HTMLButtonElement", | |
84 "HTMLCanvasElement", "HTMLCollection", "HTMLDListElement", | |
85 "HTMLDataListElement", "HTMLDetailsElement", "HTMLDirectoryElement", | |
86 "HTMLDivElement", "HTMLDocument", "HTMLElement", "HTMLEmbedElement", | |
87 "HTMLFieldSetElement", "HTMLFontElement", "HTMLFormElement", | |
88 "HTMLFrameElement", "HTMLFrameSetElement", "HTMLHRElement", | |
89 "HTMLHeadElement", "HTMLHeadingElement", "HTMLHtmlElement", | |
90 "HTMLIFrameElement", "HTMLImageElement", "HTMLInputElement", | |
91 "HTMLIsIndexElement", "HTMLKeygenElement", "HTMLLIElement", | |
92 "HTMLLabelElement", "HTMLLegendElement", "HTMLLinkElement", | |
93 "HTMLMapElement", "HTMLMarqueeElement", "HTMLMediaElement", | |
94 "HTMLMenuElement", "HTMLMetaElement", "HTMLMeterElement", | |
95 "HTMLModElement", "HTMLOListElement", "HTMLObjectElement", | |
96 "HTMLOptGroupElement", "HTMLOptionElement", "HTMLOptionsCollection", | |
97 "HTMLOutputElement", "HTMLParagraphElement", "HTMLParamElement", | |
98 "HTMLPreElement", "HTMLProgressElement", "HTMLQuoteElement", | |
99 "HTMLScriptElement", "HTMLSelectElement", "HTMLSourceElement", | |
100 "HTMLSpanElement", "HTMLStyleElement", "HTMLTableCaptionElement", | |
101 "HTMLTableCellElement", "HTMLTableColElement", "HTMLTableElement", | |
102 "HTMLTableRowElement", "HTMLTableSectionElement", | |
103 "HTMLTextAreaElement", "HTMLTitleElement", "HTMLTrackElement", | |
104 "HTMLUListElement", "HTMLUnknownElement", "HTMLVideoElement", | |
105 "HashChangeEvent", "HighPass2FilterNode", "History", "IDBAny", | |
106 "IDBCursor", "IDBCursorWithValue", "IDBDatabase", | |
107 "IDBDatabaseError", "IDBDatabaseException", "IDBFactory", | |
108 "IDBIndex", "IDBKey", "IDBKeyRange", "IDBObjectStore", "IDBRequest", | |
109 "IDBTransaction", "IDBVersionChangeEvent", | |
110 "IDBVersionChangeRequest", "ImageData", "InjectedScriptHost", | |
111 "InspectorFrontendHost", "Int16Array", "Int32Array", "Int8Array", | |
112 "JavaScriptAudioNode", "JavaScriptCallFrame", "KeyboardEvent", | |
113 "Location", "LowPass2FilterNode", "MediaElementAudioSourceNode", | |
114 "MediaError", "MediaList", "MediaQueryList", | |
115 "MediaQueryListListener", "MemoryInfo", "MessageChannel", | |
116 "MessageEvent", "MessagePort", "Metadata", "MouseEvent", | |
117 "MutationCallback", "MutationEvent", "MutationRecord", | |
118 "NamedNodeMap", "Navigator", "NavigatorUserMediaError", | |
119 "NavigatorUserMediaSuccessCallback", "Node", "NodeFilter", | |
120 "NodeIterator", "NodeList", "NodeSelector", "Notation", | |
121 "Notification", "NotificationCenter", "OESStandardDerivatives", | |
122 "OESTextureFloat", "OESVertexArrayObject", | |
123 "OfflineAudioCompletionEvent", "OperationNotAllowedException", | |
124 "OverflowEvent", "PageTransitionEvent", "Performance", | |
125 "PerformanceNavigation", "PerformanceTiming", "PopStateEvent", | |
126 "PositionError", "ProcessingInstruction", "ProgressEvent", | |
127 "RGBColor", "Range", "RangeException", "RealtimeAnalyserNode", | |
128 "Rect", "SQLError", "SQLException", "SQLResultSet", | |
129 "SQLResultSetRowList", "SQLTransaction", "SQLTransactionSync", | |
130 "SVGAElement", "SVGAltGlyphDefElement", "SVGAltGlyphElement", | |
131 "SVGAltGlyphItemElement", "SVGAngle", "SVGAnimateColorElement", | |
132 "SVGAnimateElement", "SVGAnimateMotionElement", | |
133 "SVGAnimateTransformElement", "SVGAnimatedAngle", | |
134 "SVGAnimatedBoolean", "SVGAnimatedEnumeration", | |
135 "SVGAnimatedInteger", "SVGAnimatedLength", "SVGAnimatedLengthList", | |
136 "SVGAnimatedNumber", "SVGAnimatedNumberList", | |
137 "SVGAnimatedPreserveAspectRatio", "SVGAnimatedRect", | |
138 "SVGAnimatedString", "SVGAnimatedTransformList", | |
139 "SVGAnimationElement", "SVGCircleElement", "SVGClipPathElement", | |
140 "SVGColor", "SVGComponentTransferFunctionElement", | |
141 "SVGCursorElement", "SVGDefsElement", "SVGDescElement", | |
142 "SVGDocument", "SVGElement", "SVGElementInstance", | |
143 "SVGElementInstanceList", "SVGEllipseElement", "SVGException", | |
144 "SVGExternalResourcesRequired", "SVGFEBlendElement", | |
145 "SVGFEColorMatrixElement", "SVGFEComponentTransferElement", | |
146 "SVGFECompositeElement", "SVGFEConvolveMatrixElement", | |
147 "SVGFEDiffuseLightingElement", "SVGFEDisplacementMapElement", | |
148 "SVGFEDistantLightElement", "SVGFEDropShadowElement", | |
149 "SVGFEFloodElement", "SVGFEFuncAElement", "SVGFEFuncBElement", | |
150 "SVGFEFuncGElement", "SVGFEFuncRElement", | |
151 "SVGFEGaussianBlurElement", "SVGFEImageElement", | |
152 "SVGFEMergeElement", "SVGFEMergeNodeElement", | |
153 "SVGFEMorphologyElement", "SVGFEOffsetElement", | |
154 "SVGFEPointLightElement", "SVGFESpecularLightingElement", | |
155 "SVGFESpotLightElement", "SVGFETileElement", | |
156 "SVGFETurbulenceElement", "SVGFilterElement", | |
157 "SVGFilterPrimitiveStandardAttributes", "SVGFitToViewBox", | |
158 "SVGFontElement", "SVGFontFaceElement", "SVGFontFaceFormatElement", | |
159 "SVGFontFaceNameElement", "SVGFontFaceSrcElement", | |
160 "SVGFontFaceUriElement", "SVGForeignObjectElement", "SVGGElement", | |
161 "SVGGlyphElement", "SVGGlyphRefElement", "SVGGradientElement", | |
162 "SVGHKernElement", "SVGImageElement", "SVGLangSpace", "SVGLength", | |
163 "SVGLengthList", "SVGLineElement", "SVGLinearGradientElement", | |
164 "SVGLocatable", "SVGMPathElement", "SVGMarkerElement", | |
165 "SVGMaskElement", "SVGMatrix", "SVGMetadataElement", | |
166 "SVGMissingGlyphElement", "SVGNumber", "SVGNumberList", "SVGPaint", | |
167 "SVGPathElement", "SVGPathSeg", "SVGPathSegArcAbs", | |
168 "SVGPathSegArcRel", "SVGPathSegClosePath", | |
169 "SVGPathSegCurvetoCubicAbs", "SVGPathSegCurvetoCubicRel", | |
170 "SVGPathSegCurvetoCubicSmoothAbs", | |
171 "SVGPathSegCurvetoCubicSmoothRel", "SVGPathSegCurvetoQuadraticAbs", | |
172 "SVGPathSegCurvetoQuadraticRel", | |
173 "SVGPathSegCurvetoQuadraticSmoothAbs", | |
174 "SVGPathSegCurvetoQuadraticSmoothRel", "SVGPathSegLinetoAbs", | |
175 "SVGPathSegLinetoHorizontalAbs", "SVGPathSegLinetoHorizontalRel", | |
176 "SVGPathSegLinetoRel", "SVGPathSegLinetoVerticalAbs", | |
177 "SVGPathSegLinetoVerticalRel", "SVGPathSegList", | |
178 "SVGPathSegMovetoAbs", "SVGPathSegMovetoRel", "SVGPatternElement", | |
179 "SVGPoint", "SVGPointList", "SVGPolygonElement", | |
180 "SVGPolylineElement", "SVGPreserveAspectRatio", | |
181 "SVGRadialGradientElement", "SVGRect", "SVGRectElement", | |
182 "SVGRenderingIntent", "SVGSVGElement", "SVGScriptElement", | |
183 "SVGSetElement", "SVGStopElement", "SVGStringList", "SVGStylable", | |
184 "SVGStyleElement", "SVGSwitchElement", "SVGSymbolElement", | |
185 "SVGTRefElement", "SVGTSpanElement", "SVGTests", | |
186 "SVGTextContentElement", "SVGTextElement", "SVGTextPathElement", | |
187 "SVGTextPositioningElement", "SVGTitleElement", "SVGTransform", | |
188 "SVGTransformList", "SVGTransformable", "SVGURIReference", | |
189 "SVGUnitTypes", "SVGUseElement", "SVGVKernElement", | |
190 "SVGViewElement", "SVGViewSpec", "SVGZoomAndPan", "SVGZoomEvent", | |
191 "Screen", "ScriptProfile", "ScriptProfileNode", "SharedWorker", | |
192 "SharedWorkercontext", "SpeechInputEvent", "SpeechInputResult", | |
193 "SpeechInputResultList", "Storage", "StorageEvent", "StorageInfo", | |
194 "StyleMedia", "StyleSheet", "StyleSheetList", "Text", "TextEvent", | |
195 "TextMetrics", "TextTrack", "TextTrackCue", "TextTrackCueList", | |
196 "TimeRanges", "Touch", "TouchEvent", "TouchList", "TreeWalker", | |
197 "UIEvent", "Uint16Array", "Uint32Array", "Uint8Array", | |
198 "ValidityState", "VoidCallback", "WaveShaperNode", | |
199 "WebGLActiveInfo", "WebGLBuffer", "WebGLContextAttributes", | |
200 "WebGLContextEvent", "WebGLDebugRendererInfo", "WebGLDebugShaders", | |
201 "WebGLFramebuffer", "WebGLProgram", "WebGLRenderbuffer", | |
202 "WebGLRenderingContext", "WebGLShader", "WebGLTexture", | |
203 "WebGLUniformLocation", "WebGLVertexArrayObjectOES", | |
204 "WebKitAnimation", "WebKitAnimationEvent", "WebKitAnimationList", | |
205 "WebKitBlobBuilder", "WebKitCSSFilterValue", | |
206 "WebKitCSSKeyframeRule", "WebKitCSSKeyframesRule", | |
207 "WebKitCSSMatrix", "WebKitCSSTransformValue", "WebKitFlags", | |
208 "WebKitLoseContext", "WebKitMutationObserver", "WebKitPoint", | |
209 "WebKitTransitionEvent", "WebSocket", "WheelEvent", "Worker", | |
210 "WorkerContext", "WorkerLocation", "WorkerNavigator", | |
211 "XMLHttpRequest", "XMLHttpRequestException", | |
212 "XMLHttpRequestProgressEvent", "XMLHttpRequestUpload", | |
213 "XMLSerializer", "XPathEvaluator", "XPathException", | |
214 "XPathExpression", "XPathNSResolver", "XPathResult", | |
215 "XSLTProcessor", "AudioBufferCallback", "DatabaseCallback", | |
216 "EntriesCallback", "EntryCallback", "ErrorCallback", "FileCallback", | |
217 "FileSystemCallback", "FileWriterCallback", "MetadataCallback", | |
218 "NavigatorUserMediaErrorCallback", "PositionCallback", | |
219 "PositionErrorCallback", "SQLStatementCallback", | |
220 "SQLStatementErrorCallback", "SQLTransactionCallback", | |
221 "SQLTransactionErrorCallback", "SQLTransactionSyncCallback", | |
222 "StorageInfoErrorCallback", "StorageInfoQuotaCallback", | |
223 "StorageInfoUsageCallback", "StringCallback" | |
224 ]; | |
225 | |
226 Map dbEntry; | |
227 | |
228 Map get dartIdl() => data['dartIdl']; | |
229 String get currentType() => data['type']; | |
230 | |
231 String _currentTypeShort; | |
232 String get currentTypeShort() { | |
233 if (_currentTypeShort == null) { | |
234 _currentTypeShort = currentType; | |
235 _currentTypeShort = trimPrefix(_currentTypeShort, "HTML"); | |
236 _currentTypeShort = trimPrefix(_currentTypeShort, "SVG"); | |
237 _currentTypeShort = trimPrefix(_currentTypeShort, "DOM"); | |
238 _currentTypeShort = trimPrefix(_currentTypeShort, "WebKit"); | |
239 _currentTypeShort = trimPrefix(_currentTypeShort, "Webkit"); | |
240 } | |
241 return _currentTypeShort; | |
242 } | |
243 | |
244 String _currentTypeTiny; | |
245 String get currentTypeTiny() { | |
246 if (_currentTypeTiny == null) { | |
247 _currentTypeTiny = currentTypeShort; | |
248 _currentTypeTiny = trimEnd(_currentTypeTiny, "Element"); | |
249 } | |
250 return _currentTypeTiny; | |
251 } | |
252 | |
253 Map get searchResult() => data['searchResult']; | |
254 String get pageUrl() => searchResult['link']; | |
255 | |
256 String _pageDomain; | |
257 String get pageDomain() { | |
258 if (_pageDomain == null) { | |
259 _pageDomain = pageUrl.substring(0, pageUrl.indexOf("/", "https://".length)); | |
nweiz
2012/02/01 00:10:39
Seems like this would be easier to understand with
| |
260 } | |
261 return _pageDomain; | |
262 } | |
263 | |
264 String get pageDir() { | |
265 return pageUrl.substring(0, pageUrl.lastIndexOf('/') + 1); | |
266 } | |
267 | |
268 String getAbsoluteUrl(AnchorElement anchor) { | |
269 if (anchor == null || anchor.href.length == 0) return ''; | |
270 String path = anchor.href; | |
271 RegExp fullUrlRegExp = new RegExp("^https?://"); | |
nweiz
2012/02/01 00:10:39
Style nit: no need to assign this to a variable.
| |
272 if (fullUrlRegExp.hasMatch(path)) return path; | |
273 if (path.startsWith('/')) { | |
274 return "$pageDomain$path"; | |
275 } else if (path.startsWith("#")) { | |
276 return "$pageUrl$path"; | |
277 } else { | |
278 return "$pageDir$path"; | |
279 } | |
280 } | |
281 | |
282 bool inTable(Node n) { | |
283 while(n != null) { | |
284 if (n is TableElement) return true; | |
285 n = n.parent; | |
286 } | |
287 return false; | |
288 } | |
289 | |
290 String escapeHTML(str) { | |
291 Element e = new Element.tag("div"); | |
nweiz
2012/02/01 00:10:39
Style nit: redundant type declaration here (and qu
Jacob
2012/02/01 07:48:26
I wrote this code using the editor and for now tha
nweiz
2012/02/01 21:23:02
I'm generally not too keen on making code harder t
| |
292 e.text = str; | |
293 return e.innerHTML; | |
294 } | |
295 | |
296 List<Text> getAllTextNodes(Element elem) { | |
297 List<Text> nodes = <Text>[]; | |
298 helper(Node n) { | |
299 if (n is Text) { | |
300 nodes.add(n); | |
301 } else { | |
302 for (Node child in n.nodes) { | |
nweiz
2012/02/01 00:10:39
Style nit: redundant type declaration here and els
Jacob
2012/02/01 07:48:26
Done.
| |
303 helper(child); | |
304 } | |
305 } | |
306 }; | |
307 | |
308 helper(elem); | |
309 return nodes; | |
310 } | |
311 | |
312 /** | |
313 * Whether a node and its children are all types that are safe to skip if the | |
314 * nodes have no text content. | |
315 */ | |
316 bool isSkippableType(Node n) { | |
317 // TODO(jacobr): are there any types we don't want to skip even if they | |
318 // have no text content? | |
319 if (n is ImageElement || n is CanvasElement || n is InputElement | |
320 || n is ObjectElement) { | |
321 return false; | |
322 } | |
323 if (n is Text) return true; | |
324 | |
325 for (Node child in n.nodes) { | |
326 if (isSkippableType(child) == false) { | |
nweiz
2012/02/01 00:10:39
!isSkippableType(child)
Jacob
2012/02/01 07:48:26
Done.
| |
327 return false; | |
328 } | |
329 } | |
330 return true; | |
331 } | |
332 | |
333 bool isSkippable(Node n) { | |
334 if (!isSkippableType(n)) return false; | |
335 return n.text.trim().length == 0; | |
336 } | |
337 | |
338 void onEnd() { | |
339 // Hideous hack to send JSON back to JS. | |
340 String dbJson = JSON.stringify(dbEntry); | |
341 // workaround bug in JSON parser. | |
342 dbJson = dbJson.replaceAll("ZDARTIUMDOESNTESCAPESLASHNJXXXX", "\\n"); | |
343 | |
344 window.postMessage("START_DART_MESSAGE_UNIQUE_IDENTIFIER$dbJson", "*"); | |
nweiz
2012/02/01 00:10:39
I'm pretty confused about why this is happening. I
Jacob
2012/02/01 07:48:26
See the comment above... this is a hack to work ar
nweiz
2012/02/01 21:23:02
The comment made me think that only the "\n" stuff
Jacob
2012/02/02 05:26:38
postMessage is just a natural way you communicate
nweiz
2012/02/02 19:54:34
What does that have to do with JSON parser bugs?
Jacob
2012/02/02 22:03:14
The postMessage line isn't related to the JSON bug
| |
345 } | |
346 | |
347 class SectionParseResult { | |
348 final String html; | |
349 final String url; | |
350 final String idl; | |
351 SectionParseResult(this.html, this.url, this.idl); | |
352 } | |
353 | |
354 String genCleanHtml(Element root) { | |
355 for (Element e in root.queryAll(".$DART_REMOVED")) { | |
356 e.classes.remove(DART_REMOVED); | |
357 } | |
358 | |
359 // These elements are just tags that we should suppress. | |
360 for (Element e in root.queryAll(".lang.lang-en")) { | |
361 e.remove(); | |
362 } | |
363 | |
364 bool changed = true; | |
365 while (changed) { | |
366 changed = false; | |
367 while (root.nodes.length == 1) { | |
nweiz
2012/02/01 00:10:39
"while (root.nodes.length == 1 && root.nodes.first
Jacob
2012/02/01 07:48:26
Done.
| |
368 Node child = root.nodes.first; | |
369 if (child is Element) { | |
370 root = child; | |
371 changed = true; | |
372 } else { | |
373 // Just calling innerHTML on the parent will be sufficient... | |
374 // and insures the output is properly escaped. | |
375 break; | |
376 } | |
377 } | |
378 | |
379 // Trim useless nodes from the front. | |
380 while(root.nodes.length > 0 && | |
381 isSkippable(root.nodes.first)) { | |
382 root.nodes.first.remove(); | |
383 changed = true; | |
384 } | |
385 | |
386 // Trim useless nodes from the back. | |
387 while(root.nodes.length > 0 && | |
388 isSkippable(root.nodes.last())) { | |
389 root.nodes.last().remove(); | |
390 changed = true; | |
391 } | |
392 } | |
393 return JSONFIXUPHACK(root.innerHTML); | |
394 } | |
395 | |
396 String genPrettyHtml(DocumentFragment fragment) { | |
nweiz
2012/02/01 00:10:39
Redundant method
Jacob
2012/02/01 07:48:26
removed
| |
397 return genCleanHtml(fragment); | |
398 } | |
399 | |
400 String genPrettyHtmlFromElement(Element e) { | |
401 e = e.clone(true); | |
402 return genCleanHtml(e); | |
403 } | |
404 | |
405 class PostOrderTraversalIterator implements Iterator<Node> { | |
406 | |
407 Node _next; | |
408 | |
409 PostOrderTraversalIterator(Node start) { | |
410 _next = _leftMostDescendent(start); | |
411 } | |
412 | |
413 bool hasNext() => _next != null; | |
414 | |
415 Node next() { | |
416 if (_next == null) return null; | |
417 Node ret = _next; | |
418 if (_next.nextNode != null) { | |
419 _next = _leftMostDescendent(_next.nextNode); | |
420 } else { | |
421 _next = _next.parent; | |
422 } | |
423 return ret; | |
424 } | |
425 | |
426 static Node _leftMostDescendent(Node n) { | |
427 while (n.nodes.length > 0) { | |
428 n = n.nodes.first; | |
429 } | |
430 return n; | |
431 } | |
432 } | |
433 | |
434 class PostOrderTraversal implements Iterable<Node> { | |
435 final Node _node; | |
436 PostOrderTraversal(this._node); | |
437 | |
438 Iterator<Node> iterator() => new PostOrderTraversalIterator(_node); | |
439 } | |
440 | |
441 Range findFirstLine(Range section, String prop) { | |
442 Range firstLine = newRange(); | |
443 firstLine.setStart(section.startContainer, section.startOffset); | |
444 | |
445 num maxBottom = null; | |
446 for (Node n in new PostOrderTraversal(section.startContainer)) { | |
447 int compareResult = section.comparePoint(n, 0); | |
448 if (compareResult == -1) { | |
449 // before range so skip. | |
450 continue; | |
451 } else if (compareResult > 0) { | |
452 // After range so exit. | |
453 break; | |
454 } | |
455 | |
456 final rect = getClientRect(n); | |
457 num bottom = rect.bottom; | |
458 if (rect.height > 0 && rect.width > 0) { | |
459 if (maxBottom != null && ( | |
460 maxBottom + MIN_PIXELS_DIFFERENT_LINES < bottom | |
461 )) { | |
462 break; | |
463 } else if (maxBottom == null || maxBottom > bottom) { | |
464 maxBottom = bottom; | |
465 } | |
466 } | |
467 | |
468 firstLine.setEndAfter(n); | |
469 } | |
470 | |
471 if (firstLine.toString().indexOf(stripWebkit(prop)) == -1) { | |
nweiz
2012/02/01 00:10:39
This seems conceptually distinct from what the res
Jacob
2012/02/01 07:48:26
Added a comment explaining why this is the way it
| |
472 return null; | |
473 } | |
474 return firstLine; | |
475 } | |
476 | |
477 AnchorElement findAnchorElement(Element root, String prop) { | |
478 for (AnchorElement a in root.queryAll("a")) { | |
479 if (a.text.indexOf(prop) != -1) { | |
nweiz
2012/02/01 00:10:39
Style nit: "a.text.contains(prop)"
Jacob
2012/02/01 07:48:26
Done.
| |
480 return a; | |
481 } | |
482 } | |
483 return null; | |
484 } | |
485 | |
486 // First surrounding element with an ID is safe enough. | |
487 Element findTigherRoot(Element elem, Element root) { | |
nweiz
2012/02/01 00:10:39
s/Tigher/Tighter/
Jacob
2012/02/01 07:48:26
Done.
| |
488 Element candidate = elem; | |
489 while(root != candidate) { | |
nweiz
2012/02/01 00:10:39
Style nit: space after "while"
Jacob
2012/02/01 07:48:26
Done.
| |
490 candidate = candidate.parent; | |
491 if (candidate.id.length > 0 && candidate.id.indexOf("section_") != 0) { | |
492 break; | |
493 } | |
494 } | |
495 return candidate; | |
496 } | |
497 | |
498 // this is very slow and ugly.. consider rewriting. | |
499 SectionParseResult filteredHtml(Element elem, Element root, String prop, | |
nweiz
2012/02/01 00:10:39
This is pretty hard to follow. If you don't end up
Jacob
2012/02/01 07:48:26
Clarified the above comment as a TODO... this code
| |
500 Function fragmentGeneratedCallback) { | |
501 // Using a tighter root avoids false positives at the risk of trimming | |
502 // text we shouldn't. | |
503 root = findTigherRoot(elem, root); | |
504 Range range = newRange(); | |
505 range.setStartBefore(elem); | |
506 | |
507 Element current = elem; | |
508 while (current != null) { | |
509 range.setEndBefore(current); | |
510 if (current.classes.contains(DART_REMOVED)) { | |
nweiz
2012/02/01 00:10:39
Style nit: use && rather than nested ifs
Jacob
2012/02/01 07:48:26
Done.
| |
511 if (range.toString().trim().length > 0) { | |
512 break; | |
513 } | |
514 } | |
515 if (current.firstElementChild != null) { | |
516 current = current.firstElementChild; | |
517 } else { | |
518 while (current != null) { | |
519 range.setEndAfter(current); | |
520 if (current == root) { | |
521 current = null; | |
522 break; | |
523 } | |
524 if (current.nextElementSibling != null) { | |
525 current = current.nextElementSibling; | |
526 break; | |
527 } | |
528 current = current.parent; | |
529 } | |
530 } | |
531 } | |
532 String url = null; | |
533 if (prop != null) { | |
534 Range firstLine = findFirstLine(range, prop); | |
535 if (firstLine != null) { | |
536 range.setStart(firstLine.endContainer, firstLine.endOffset); | |
537 DocumentFragment firstLineClone = firstLine.cloneContents(); | |
538 AnchorElement anchor = findAnchorElement(firstLineClone, prop); | |
539 if (anchor != null) { | |
540 url = getAbsoluteUrl(anchor); | |
541 } | |
542 } | |
543 } | |
544 DocumentFragment fragment = range.cloneContents(); | |
545 if (fragmentGeneratedCallback != null) { | |
546 fragmentGeneratedCallback(fragment); | |
547 } | |
548 // Strip tags we don't want | |
549 for (Element e in fragment.queryAll("script, object, style")) { | |
550 e.remove(); | |
551 } | |
552 | |
553 // Extract idl | |
554 StringBuffer idl = new StringBuffer(); | |
555 if (prop != null && prop.length > 0) { | |
556 // Only expect properties to have HTML. | |
557 for(Element e in fragment.queryAll(IDL_SELECTOR)) { | |
558 idl.add(e.outerHTML); | |
559 e.remove(); | |
560 } | |
561 // TODO(jacobr) this is a very basic regex to see if text looks like IDL | |
562 RegExp likelyIdl = new RegExp(" $prop\\w*\\("); | |
563 | |
564 for (Element e in fragment.queryAll("pre")) { | |
565 // Check if it looks like idl... | |
566 String txt = e.text.trim(); | |
567 if (likelyIdl.hasMatch(txt) && txt.indexOf("\n") != -1 | |
568 && txt.indexOf(")") != -1) { | |
nweiz
2012/02/01 00:10:39
Style nit: #contains rather than #indexOf
Jacob
2012/02/01 07:48:26
When making comments like that just note that all
| |
569 idl.add(e.outerHTML); | |
570 e.remove(); | |
571 } | |
572 } | |
573 } | |
574 return new SectionParseResult(genPrettyHtml(fragment), url, idl.toString()); | |
575 } | |
576 | |
577 Element findBest(Element root, List<Text> allText, String prop, String propType) { | |
nweiz
2012/02/01 00:10:39
Line length
Jacob
2012/02/01 07:48:26
Done.
| |
578 // Best bet: match an id | |
579 Element cand; | |
nweiz
2012/02/01 00:10:39
Style nit: var cand = root.query("#" + prop);
Jacob
2012/02/01 07:48:26
I prefer
Element cand = root.query("#$prop")
nweiz
2012/02/01 21:23:02
Agreed about "#$prop", but I'd prefer "final" sinc
Jacob
2012/02/02 05:26:38
Fixed.
| |
580 cand = root.query("#" + prop); | |
581 | |
582 if (cand == null && propType == "methods") { | |
583 cand = root.query("[id=" + prop + "\\(\\)]"); | |
nweiz
2012/02/01 00:10:39
Style nit: use interpolation
Jacob
2012/02/01 07:48:26
Done. This code started its life as JS.
| |
584 } | |
585 if (cand != null) { | |
nweiz
2012/02/01 00:10:39
Redundant if statement; everything in here will be
Jacob
2012/02/01 07:48:26
Done.
| |
586 while (cand != null && cand.text.trim().length == 0) { | |
587 // We found the bookmark for the element but sadly it is just an empty | |
588 // placeholder. Find the first real element. | |
589 cand = cand.nextElementSibling; | |
590 } | |
591 if (cand != null) { | |
592 return cand; | |
593 } | |
594 } | |
595 | |
596 // If you are at least 70 pixels from the left, something is definitely fishy and we shouldn't even consider this candidate. | |
nweiz
2012/02/01 00:10:39
Line length
Jacob
2012/02/01 07:48:26
Done.
| |
597 num candLeft = 70; | |
598 | |
599 for (Text text in allText) { | |
600 Element proposed = null; | |
601 | |
602 // var t = safeNameCleanup(text.text); | |
603 // TODO(jacobr): does it hurt precision to use the full cleanup? | |
604 String t = fullNameCleanup(text.text); | |
605 if (t == prop) { | |
606 proposed = text.parent; | |
607 ClientRect candRect = getClientRect(proposed); | |
608 | |
609 // TODO(jacobr): this is a good heuristic | |
610 // if (selObj.selector.indexOf(" > DD ") == -1 | |
611 if (candRect.left < candLeft) { | |
612 cand = proposed; | |
613 candLeft = candRect.left; | |
614 } | |
615 } | |
616 } | |
617 return cand; | |
618 } | |
619 | |
620 bool isObsolete(Element e) { | |
621 RegExp obsoleteRegExp = new RegExp(@"(^|\s)obsolete(?=\s|$)"); | |
622 RegExp deprecatedRegExp = new RegExp(@"(^|\s)deprecated(?=\s|$)"); | |
623 for (Element child in e.queryAll("span")) { | |
624 String t = child.text.toLowerCase(); | |
625 if (t.startsWith("obsolete") || t.startsWith("deprecated")) return true; | |
626 } | |
627 | |
628 String text = e.text.toLowerCase(); | |
629 return obsoleteRegExp.hasMatch(text) || deprecatedRegExp.hasMatch(text); | |
630 } | |
631 | |
632 bool isFirstCharLowerCase(String str) { | |
633 RegExp firstLower = new RegExp("^[a-z]"); | |
nweiz
2012/02/01 00:10:39
Doesn't need a variable.
Jacob
2012/02/01 07:48:26
Done.
| |
634 return firstLower.hasMatch(str); | |
635 } | |
636 | |
637 void scrapeSection(Element root, String sectionSelector, | |
nweiz
2012/02/01 00:10:39
This is another method that could really use some
Jacob
2012/02/01 07:48:26
Added todo. cleanup indentation
| |
638 String currentType, | |
639 List members, | |
640 String propType) { | |
641 Map expectedProps = dartIdl[propType]; | |
642 | |
643 Set<String> alreadyMatchedProperties = new Set<String>(); | |
644 bool onlyConsiderTables = false; | |
645 ElementList allMatches = root.queryAll(sectionSelector); | |
646 if (allMatches.length == 0) { | |
647 allMatches = root.queryAll(".fullwidth-table"); | |
648 onlyConsiderTables = true; | |
649 } | |
650 for (Element matchElement in allMatches) { | |
651 DivElement match = matchElement.parent; | |
652 if (!match.id.startsWith("section") && !(match.id == "pageText")) { | |
nweiz
2012/02/01 00:10:39
Style nit: !=
Jacob
2012/02/01 07:48:26
Done.
| |
653 throw "Enexpected element $match"; | |
nweiz
2012/02/01 00:10:39
"Unexpected"
Jacob
2012/02/01 07:48:26
Done.
| |
654 } | |
655 match.classes.add(DART_REMOVED); | |
656 | |
657 bool foundProps = false; | |
658 | |
659 // TODO(jacobr): we should really look for the table tag instead | |
660 // add an assert if we are missing something that is a table... | |
661 // TODO(jacobr) ignore tables in tables.... | |
662 for (Element t in match.queryAll('.standard-table, .fullwidth-table')) { | |
663 int helpIndex = -1; | |
664 num i = 0; | |
665 for (Element r in t.queryAll("th, td.header")) { | |
666 var txt = r.text.trim().split(" ")[0].toLowerCase(); | |
nweiz
2012/02/01 00:10:39
Style nit: final
Jacob
2012/02/01 07:48:26
Done.
| |
667 if (txt == "description") { | |
668 helpIndex = i; | |
669 break; | |
670 } | |
671 i++; | |
672 } | |
673 | |
674 List<int> numMatches = new List<int>(i); | |
675 for (int j = 0; j < i; j++) { | |
676 numMatches[j] = 0; | |
677 } | |
678 | |
679 // Find the row that seems to have the most names that look like | |
680 // expected properties. | |
681 for (Element r in t.queryAll("tbody tr")) { | |
682 ElementList $row = r.elements; | |
nweiz
2012/02/01 00:10:39
What's with the "$"?
Jacob
2012/02/01 07:48:26
This was jquery code to start with.... using $ is
| |
683 if ($row.length == 0 || $row.first.classes.contains(".header")) { | |
nweiz
2012/02/01 00:10:39
No need to manually continue if length is 0... the
Jacob
2012/02/01 07:48:26
That is needed. otherwise you will throw an excep
| |
684 continue; | |
685 } | |
686 | |
687 for (int k = 0; k < numMatches.length && k < $row.length; k++) { | |
688 Element e = $row[k]; | |
nweiz
2012/02/01 00:10:39
Unnecessary variable.
Jacob
2012/02/01 07:48:26
Done.
| |
689 if (expectedProps.containsKey(fullNameCleanup(e.text))) { | |
690 numMatches[k]++; | |
691 break; | |
692 } | |
693 } | |
694 } | |
695 | |
696 int propNameIndex = 0; | |
697 { | |
698 int bestCount = numMatches[0]; | |
699 for (int k = 1; k < numMatches.length; k++) { | |
700 if (numMatches[k] > bestCount) { | |
701 bestCount = numMatches[k]; | |
702 propNameIndex = k; | |
703 } | |
704 } | |
705 } | |
706 | |
707 for (Element r in t.queryAll("tbody tr")) { | |
708 ElementList $row = r.elements; | |
709 if ($row.length > propNameIndex && $row.length > helpIndex ) { | |
710 if ($row.first.classes.contains(".header")) { | |
711 continue; | |
712 } | |
713 // TODO(jacobr): this code for determining the namestr is needlessly | |
714 // messy. | |
715 Element nameRow = $row[propNameIndex]; | |
716 AnchorElement a = nameRow.query("a"); | |
717 String goodName = ''; | |
718 if (a != null) { | |
719 goodName = a.text.trim(); | |
720 } | |
721 String nameStr = nameRow.text; | |
722 | |
723 Map entry = new Map<String, String>(); | |
724 | |
725 // "currentType": $($row[1]).text().trim(), // find("code") ? | |
nweiz
2012/02/01 00:10:39
These comments should be cleaned up.
Jacob
2012/02/01 07:48:26
removed
| |
726 entry["name"] = fullNameCleanup(nameStr.length > 0 ? nameStr : goodNam e); | |
nweiz
2012/02/01 00:10:39
Long line
Jacob
2012/02/01 07:48:26
Done.
| |
727 | |
728 final parse = filteredHtml(nameRow, nameRow, entry["name"], null); | |
729 String altHelp = parse.html; | |
730 | |
731 // "jsSignature": nameStr, | |
732 entry["help"] = (helpIndex == -1 || $row[helpIndex] == null) ? altHelp : genPrettyHtmlFromElement($row[helpIndex]); | |
nweiz
2012/02/01 00:10:39
Long line
Jacob
2012/02/01 07:48:26
Done.
| |
733 // "altHelp" : altHelp, | |
734 if (parse.url != null) { | |
735 entry["url"] = parse.url; | |
736 } | |
737 | |
738 if (parse.idl.length > 0) { | |
739 entry["idl"] = parse.idl; | |
740 } | |
741 | |
742 entry["obsolete"] = isObsolete(r); | |
743 | |
744 if (entry["name"].length > 0) { | |
745 cleanupEntry(members, entry); | |
746 alreadyMatchedProperties.add(entry['name']); | |
747 foundProps = true; | |
748 } | |
749 } | |
750 } | |
751 } | |
752 | |
753 if (onlyConsiderTables) { | |
754 continue; | |
755 } | |
756 // After this point we have higher risk tests that attempt to perform | |
757 // rudimentary page segmentation. | |
758 | |
759 // Search for expected matching names. | |
760 List<Text> allText = getAllTextNodes(match); | |
761 | |
762 Map<String, Element> pmap = new Map<String, Element>(); | |
763 for (String prop in expectedProps.getKeys()) { | |
764 if (alreadyMatchedProperties.contains(prop)) { | |
765 continue; | |
766 } | |
767 Element e = findBest(match, allText, prop, propType); | |
768 if (e != null && !inTable(e)) { | |
769 pmap[prop] = e; | |
770 } | |
771 } | |
772 | |
773 for (String prop in pmap.getKeys()) { | |
774 Element e = pmap[prop]; | |
775 e.classes.add(DART_REMOVED); | |
776 } | |
777 | |
778 for (String prop in pmap.getKeys()) { | |
779 Element e = pmap[prop]; | |
780 ClientRect r = getClientRect(e); | |
781 // TODO(jacobr): a lot of these queries are identical. | |
782 for (Element cand in match.queryAll(e.tagName)) { | |
783 if (!cand.classes.contains(DART_REMOVED) && !inTable(cand) ) { // XXX us e a neg selector. | |
nweiz
2012/02/01 00:10:39
Long line
Jacob
2012/02/01 07:48:26
Done.
| |
784 ClientRect candRect = getClientRect(cand); | |
785 // TODO(jacobr): this is somewhat loose. | |
786 if (candRect.left == r.left && | |
787 (candRect.height - r.height).abs() < 5) { | |
788 String propName = fullNameCleanup(cand.text); | |
789 if (isFirstCharLowerCase(propName) && pmap.containsKey(propName) == false && alreadyMatchedProperties.contains(propName) == false) { | |
790 // Don't set here to avoid layouts... cand.classes.add(DART_REMOVE D); | |
nweiz
2012/02/01 00:10:39
Long lines
Jacob
2012/02/01 07:48:26
please add one comment to the top of a file with l
| |
791 pmap[propName] = cand; | |
792 } | |
793 } | |
794 } | |
795 } | |
796 } | |
797 | |
798 for (String prop in pmap.getKeys()) { | |
799 Element e = pmap[prop]; | |
nweiz
2012/02/01 00:10:39
Style nit: unnecessary variable
Jacob
2012/02/01 07:48:26
Done.
| |
800 e.classes.add(DART_REMOVED); | |
801 } | |
802 | |
803 // Find likely "subsections" of the main section and mark them with | |
804 // DART_REMOVED so we don't include them in member descriptions... which | |
805 // would suck. | |
806 for (Element e in match.queryAll("[id]")) { | |
807 if (e.id.indexOf(matchElement.id) != -1) { | |
nweiz
2012/02/01 00:10:39
Style nit: #contains rather than #indexOf
Jacob
2012/02/01 07:48:26
Done.
| |
808 e.classes.add(DART_REMOVED); | |
809 } | |
810 } | |
811 | |
812 for (String prop in pmap.getKeys()) { | |
813 Element elem = pmap[prop]; | |
814 bool obsolete = false; | |
815 final parse = filteredHtml( | |
816 elem, match, prop, | |
817 (Element e) { | |
818 obsolete = isObsolete(e); | |
819 }); | |
820 Map entry = { | |
821 "url" : parse.url, | |
822 "name" : prop, | |
823 "help" : parse.html, | |
824 "obsolete" : obsolete | |
825 //"jsSignature" : nameStr | |
nweiz
2012/02/01 00:10:39
Remove
Jacob
2012/02/01 07:48:26
Done.
| |
826 }; | |
827 if (parse.idl.length > 0) { | |
828 entry["idl"] = parse.idl; | |
829 } | |
830 cleanupEntry(members, entry); | |
831 } | |
832 } | |
833 } | |
834 | |
835 String trimHtml(String html) { | |
836 // TODO(jacobr): impl. | |
837 return html; | |
838 } | |
839 | |
840 bool maybeName(String name) { | |
841 RegExp nameRegExp = new RegExp("^[a-z][a-z0-9A-Z]+\$"); | |
842 if (nameRegExp.hasMatch(name)) return true; | |
843 RegExp constRegExp = new RegExp("^[A-Z][A-Z_]*\$"); | |
844 if (constRegExp.hasMatch(name)) return true; | |
nweiz
2012/02/01 00:10:39
Style nit: Unnecessary variables
Jacob
2012/02/01 07:48:26
Done.
| |
845 } | |
846 | |
847 void markRemoved(var e) { | |
nweiz
2012/02/01 00:10:39
Why not just have this take an Element and use lis
Jacob
2012/02/01 07:48:26
I don't follow
nweiz
2012/02/01 21:23:02
Type this as "void markRemoved(Element e)" and rem
Jacob
2012/02/02 05:26:38
Added a todo clarifying that the way the code is c
| |
848 if (e != null) { | |
849 // TODO( remove) | |
850 if (e is Element) { | |
851 e.classes.add(DART_REMOVED); | |
852 } else { | |
853 for (Element el in e) { | |
854 el.classes.add(DART_REMOVED); | |
855 } | |
856 } | |
857 } | |
858 } | |
859 | |
860 String JSONFIXUPHACK(String value) { | |
861 return value.replaceAll("\n", "ZDARTIUMDOESNTESCAPESLASHNJXXXX"); | |
862 } | |
863 | |
864 String mozToWebkit(String name) { | |
865 RegExp regExp = new RegExp("^moz"); | |
866 name = name.replaceFirst(regExp, "webkit"); | |
nweiz
2012/02/01 00:10:39
Style nit: Both of these variable assignments are
Jacob
2012/02/01 07:48:26
Done.
| |
867 return name; | |
868 } | |
869 | |
870 String stripWebkit(String name) { | |
871 return trimPrefix(name, "webkit"); | |
872 } | |
873 | |
874 String fullNameCleanup(String name) { | |
875 int parenIndex = name.indexOf('('); | |
876 if (parenIndex != -1) { | |
877 // TODO(jacobr): workaround bug in: | |
nweiz
2012/02/01 00:10:39
Reference bug ID
Jacob
2012/02/02 05:26:38
removed todo. I forget what this was about.
| |
878 // name = name.split("(")[0]; | |
879 name = name.substring(0, parenIndex); | |
880 } | |
881 name = name.split(" ")[0]; | |
882 name = name.split("\n")[0]; | |
883 name = name.split("\t")[0]; | |
884 name = name.split("*")[0]; | |
885 name = name.trim(); | |
886 name = safeNameCleanup(name); | |
887 return name; | |
888 } | |
889 | |
890 // Less agressive than the full cleanup to avoid overeager matching of | |
891 // everytyhing | |
892 String safeNameCleanup(String name) { | |
893 int parenIndex = name.indexOf('('); | |
894 if (parenIndex != -1 && name.indexOf(")") != -1) { | |
895 // TODO(jacobr): workaround bug in: | |
896 // name = name.split("(")[0]; | |
897 name = name.substring(0, parenIndex); | |
898 } | |
899 name = name.trim(); | |
900 name = trimPrefix(name, currentType + "."); | |
901 name = trimPrefix(name, currentType.toLowerCase() + "."); | |
902 name = trimPrefix(name, currentTypeShort + "."); | |
903 name = trimPrefix(name, currentTypeShort.toLowerCase() + "."); | |
904 name = trimPrefix(name, currentTypeTiny + "."); | |
905 name = trimPrefix(name, currentTypeTiny.toLowerCase() + "."); | |
906 name = name.trim(); | |
907 name = mozToWebkit(name); | |
908 return name; | |
909 } | |
910 | |
911 void removeHeaders(DocumentFragment fragment) { | |
912 for (Element e in fragment.queryAll("h1, h2, h3")) { | |
913 e.remove(); | |
914 } | |
915 } | |
916 | |
917 void cleanupEntry(List members, Map entry) { | |
918 if (entry.containsKey('help')) { | |
919 entry['help'] = trimHtml(entry['help']); | |
920 } | |
921 String name = fullNameCleanup(entry['name']); | |
922 entry['name'] = name; | |
923 if (maybeName(name)) { | |
924 for (String key in entry.getKeys()) { | |
925 var value = entry[key]; | |
926 if (value == null) { | |
927 entry.remove(key); | |
928 continue; | |
929 } | |
930 if (value is String) { | |
931 entry[key] = JSONFIXUPHACK(value); | |
932 } | |
933 } | |
934 members.add(entry); | |
935 } | |
936 } | |
937 | |
938 // TODO(jacobr) dup with trim start.... | |
939 String trimPrefix(String str, String prefix) { | |
940 if (str.indexOf(prefix) == 0) { | |
941 return str.substring(prefix.length); | |
942 } else { | |
943 return str; | |
944 } | |
945 } | |
946 | |
947 void resourceLoaded() { | |
nweiz
2012/02/01 00:10:39
Why is this in its own function?
Jacob
2012/02/01 07:48:26
obsolete. removed.
| |
948 if (data != null) run(); | |
949 } | |
950 | |
951 String trimStart(String str, String start) { | |
952 if (str.startsWith(start) && str.length > start.length) { | |
953 return str.substring(start.length); | |
954 } | |
955 return str; | |
956 } | |
957 | |
958 String trimEnd(String str, String end) { | |
959 if (str.endsWith(end) && str.length > end.length) { | |
960 return str.substring(0, str.length - end.length); | |
961 } | |
962 return str; | |
963 } | |
964 | |
965 void extractSection(String selector, String key) { | |
966 for (Element e in document.queryAll(selector)) { | |
967 e = e.parent; | |
968 for (Element skip in e.queryAll("h1, h2, $IDL_SELECTOR")) { | |
969 skip.remove(); | |
970 } | |
971 String html = filteredHtml(e, e, null, removeHeaders).html; | |
972 if (html.length > 0) { | |
973 if (dbEntry.containsKey(key)) { | |
974 dbEntry[key] += html; | |
975 } else { | |
976 dbEntry[key] = html; | |
977 } | |
978 } | |
979 e.classes.add(DART_REMOVED); | |
980 } | |
981 } | |
982 | |
983 void run() { | |
984 // Inject CSS to insure lines don't wrap unless it was intentional. | |
nweiz
2012/02/01 00:10:39
s/insure/ensure/
Jacob
2012/02/01 07:48:26
Done.
| |
985 document.head.nodes.add(new Element.html(""" | |
986 <style type="text/css"> | |
987 body { | |
988 width: 10000px; | |
989 } | |
990 </style>""")); | |
991 | |
992 String title = trimEnd(window.document.title.trim(), " - MDN"); | |
993 dbEntry['title'] = title; | |
994 | |
995 // TODO(rnystrom): Clean up the page a bunch. Not sure if this is the best | |
996 // place to do this... | |
997 | |
998 // Ditch inline styles. | |
999 for (Element e in document.queryAll('[style]')) { | |
Jacob
2012/01/30 21:36:21
this is the wrong place to do this. Only do this
Bob Nystrom
2012/01/31 20:56:29
Done.
| |
1000 e.attributes.remove('style'); | |
1001 } | |
1002 | |
1003 // Remove the "Introduced in HTML <version>" boxes. | |
1004 for (Element e in document.queryAll('.htmlVersionHeaderTemplate')) { | |
1005 e.remove(); | |
1006 } | |
1007 | |
1008 // Flatten the list of known DOM types into a faster and case-insensitive map. | |
1009 domTypes = {}; | |
1010 for (final domType in domTypesRaw) { | |
1011 domTypes[domType.toLowerCase()] = domType; | |
1012 } | |
1013 | |
1014 // Fix up links. | |
1015 final SHORT_LINK = const RegExp(@'^[\w/]+$'); | |
1016 final INNER_LINK = const RegExp(@'[Ee]n/(?:[\w/]+/|)([\w#.]+)(?:\(\))?$'); | |
1017 final MEMBER_LINK = const RegExp(@'(\w+)[.#](\w+)'); | |
1018 final RELATIVE_LINK = const RegExp(@'^(?:../)*/?[Ee][Nn]/(.+)'); | |
1019 | |
1020 // - Make relative links absolute. | |
1021 // - If we can, take links that point to other MDN pages and retarget them | |
1022 // to appropriate pages in our docs. | |
1023 // TODO(rnystrom): Add rel external to links we didn't fix. | |
1024 for (AnchorElement a in document.queryAll('a')) { | |
1025 // Get the raw attribute because we *don't* want the browser to fully- | |
1026 // qualify the name for us since it has the wrong base address for the page. | |
1027 var href = a.attributes['href']; | |
Jacob
2012/01/30 21:36:21
use
a.href instead of a.attributes['href']
Bob Nystrom
2012/01/31 20:56:29
See preceding comment.
Jacob
2012/01/31 21:08:19
See which preceding comment? This seems like a tri
Bob Nystrom
2012/01/31 21:26:38
This one:
// Get the raw attribute because we *do
| |
1028 | |
1029 // Ignore busted links. | |
1030 if (href == null) continue; | |
1031 | |
1032 // If we can recognize what it's pointing to, point it to our page instead. | |
1033 lookUpType(maybeType) { | |
Jacob
2012/01/30 21:36:21
return value not used. Name is also misleading.
Bob Nystrom
2012/01/31 20:56:29
Renamed and removed return.
nweiz
2012/02/01 00:10:39
It seems unnecessary to me that this function modi
| |
1034 // See if we know a type with that name. | |
1035 final realType = domTypes[maybeType.toLowerCase()]; | |
1036 if (realType != null) { | |
1037 href = '../html/$realType.html'; | |
1038 return true; | |
1039 } | |
1040 | |
1041 return false; | |
1042 } | |
1043 | |
1044 // If it's a word link like "foo" find a type or make it absolute. | |
1045 var match = SHORT_LINK.firstMatch(href); | |
1046 if (match != null) { | |
1047 href = 'https://developer.mozilla.org/en/DOM/${match[0]}'; | |
1048 } | |
1049 | |
1050 // If it's a relative link (that we know how to root), make it absolute. | |
1051 match = RELATIVE_LINK.firstMatch(href); | |
1052 if (match != null) { | |
1053 href = 'https://developer.mozilla.org/en/${match[1]}'; | |
1054 } | |
1055 | |
1056 match = INNER_LINK.firstMatch(href); | |
Jacob
2012/01/30 21:36:21
add a TODO that this is the wrong way to be doing
Bob Nystrom
2012/01/31 20:56:29
Added a comment up by the giant list of type names
| |
1057 if (match != null) { | |
1058 // See if we're linking to a member ("type.name" or "type#name") or just | |
1059 // a type ("type"). | |
1060 final member = MEMBER_LINK.firstMatch(match[1]); | |
1061 if (member != null) { | |
1062 lookUpType(member[1]); | |
1063 } else { | |
1064 lookUpType(match[1]); | |
1065 } | |
1066 } | |
1067 | |
1068 // Put it back into the element. | |
1069 a.attributes['href'] = href; | |
1070 } | |
1071 | |
1072 if (title.toLowerCase().indexOf(currentTypeTiny.toLowerCase()) == -1) { | |
nweiz
2012/02/01 00:10:39
Style nit: !#contains rather than #indexOf == -1
Jacob
2012/02/01 07:48:26
done. see above comments about only marking one in
| |
1073 bool foundMatch = false; | |
1074 // Test out if the title is really an HTML tag that matches the | |
1075 // current class name. | |
1076 for (String tag in [title.split(" ")[0], title.split(".").last()]) { | |
1077 try { | |
1078 dom.Element element = dom.document.createElement(tag); | |
1079 if (element.typeName == currentType) { | |
1080 foundMatch = true; | |
1081 break; | |
1082 } | |
1083 } catch(e) {} | |
1084 } | |
1085 if (foundMatch == false) { | |
nweiz
2012/02/01 00:10:39
!foundMatch
Jacob
2012/02/01 07:48:26
Done.
| |
1086 dbEntry['skipped'] = true; | |
1087 dbEntry['cause'] = "Suspect title"; | |
1088 onEnd(); | |
1089 return; | |
1090 } | |
1091 } | |
1092 | |
1093 Element root = document.query(".pageText"); | |
1094 if (root == null) { | |
1095 dbEntry['cause'] = '.pageText not found'; | |
1096 onEnd(); | |
1097 return; | |
1098 } | |
1099 | |
1100 markRemoved(root.query("#Notes")); | |
1101 List members = dbEntry['members']; | |
nweiz
2012/02/01 00:10:39
Style nit: declare this closer to where it's used
Jacob
2012/02/01 07:48:26
It is 6 lines above where it is used. I think thi
| |
1102 | |
1103 markRemoved(document.queryAll(".pageToc, footer, header, #nav-toolbar")); | |
1104 markRemoved(document.queryAll("#article-nav")); | |
1105 markRemoved(document.queryAll(".hideforedit")); | |
1106 markRemoved(document.queryAll(".navbox")); | |
1107 markRemoved(document.query("#Method_overview")); | |
1108 markRemoved(document.queryAll("h1, h2")); | |
1109 | |
1110 scrapeSection(root, "#Methods", currentType, members, 'methods'); | |
1111 scrapeSection(root, "#Constants, #Error_codes, #State_constants", currentType, members, 'constants'); | |
nweiz
2012/02/01 00:10:39
Line length
Jacob
2012/02/01 07:48:26
Done.
| |
1112 // TODO(jacobr): infer tables based on multiple matches rather than | |
1113 // using a hard coded list of section ids. | |
1114 scrapeSection(root, | |
1115 "[id^=Properties], #Notes, [id^=Other_properties], #Attributes, #DOM_prope rties, #Event_handlers, #Event_Handlers", | |
nweiz
2012/02/01 00:10:39
Line length
Jacob
2012/02/01 07:48:26
Done.
| |
1116 currentType, members, 'properties'); | |
1117 | |
1118 // Avoid doing this till now to avoid messing up the section scrape. | |
1119 markRemoved(document.queryAll("h3")); | |
1120 | |
1121 ElementList $examples = root.queryAll("span[id^=example], span[id^=Example]"); | |
1122 | |
1123 extractSection("#See_also", 'seeAlso'); | |
1124 extractSection("#Specification, #Specifications", "specification"); | |
1125 // $("#Methods").parent().remove(); // not safe (e.g. Document) | |
1126 | |
1127 // TODO(jacobr): actually extract the constructor(s) | |
1128 extractSection("#Constructor, #Constructors", 'constructor'); | |
1129 extractSection("#Browser_compatibility, #Compatibility", 'compatibility'); | |
1130 | |
1131 List<String> exampleHtml = []; | |
1132 for (Element e in $examples) { | |
1133 e.classes.add(DART_REMOVED); | |
1134 } | |
1135 for (Element e in $examples) { | |
1136 String html = filteredHtml(e, root, null, | |
1137 (DocumentFragment fragment) { | |
1138 removeHeaders(fragment); | |
1139 if (fragment.text.trim().toLowerCase() == "example") { | |
1140 // Degenerate example. | |
1141 fragment.nodes.clear(); | |
1142 } | |
1143 }).html; | |
1144 if (html.length > 0) { | |
1145 exampleHtml.add(html); | |
1146 } | |
1147 } | |
1148 if (exampleHtml.length > 0) { | |
1149 dbEntry['examples'] = exampleHtml; | |
1150 } | |
1151 | |
1152 StringBuffer summary = new StringBuffer(); | |
1153 | |
1154 for (Element e in root.queryAll("#Summary, #Description")) { | |
1155 summary.add(filteredHtml(root, e, null, removeHeaders).html); | |
1156 } | |
1157 | |
1158 if (summary.length == 0) { | |
1159 // Remove the "Gecko DOM Reference text" | |
1160 Element ref = root.query(".lang.lang-en"); | |
1161 if (ref != null) { | |
1162 ref = ref.parent; | |
1163 String refText = ref.text.trim(); | |
1164 if (refText == "Gecko DOM Reference" || | |
1165 refText == "« Gecko DOM Reference") { | |
1166 ref.remove(); | |
1167 } | |
1168 } | |
1169 // Risky... this might add stuff we shouldn't. | |
1170 summary.add(filteredHtml(root, root, null, removeHeaders).html); | |
1171 } | |
1172 | |
1173 if (summary.length > 0) { | |
1174 dbEntry['summary'] = summary.toString(); | |
1175 } | |
1176 | |
1177 // Inject CSS to aid debugging in the browser. | |
1178 document.head.nodes.add(new Element.html(DEBUG_CSS)); | |
1179 | |
1180 onEnd(); | |
1181 } | |
1182 | |
1183 void main() { | |
1184 //window.on.load.add(documentLoaded); | |
1185 documentLoaded(null); | |
Jacob
2012/01/30 21:36:21
Looks like this was a hack I forgot to remove.
Swi
Bob Nystrom
2012/01/31 20:56:29
Done.
| |
1186 } | |
1187 | |
1188 void documentLoaded(event) { | |
1189 new XMLHttpRequest.getTEMPNAME('${window.location}.json', (req) { | |
1190 data = JSON.parse(req.responseText); | |
1191 dbEntry = {'members': [], 'srcUrl': pageUrl}; | |
1192 resourceLoaded(); | |
1193 }); | |
1194 } | |
OLD | NEW |