utils/apidoc/mdn/crawl.js - Issue 9225039: Integrate MDN content into API documentation. - Code Review

Chromium Code Reviews

chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out

(40)

My Issues | Starred Open | Closed | All

Side by Side Diff: utils/apidoc/mdn/crawl.js

Issue 9225039: Integrate MDN content into API documentation. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart

Patch Set: Remove temp code. Created 8 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« utils/apidoc/mdn/README.txt ('K') | « utils/apidoc/mdn/README.txt ('k') | utils/apidoc/mdn/data/dartIdl.json » ('j') | utils/apidoc/mdn/extract.dart » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
(Empty)
	1 var http = require('http');
	nweiz 2012/02/01 00:10:39 Why is this file in JS? Why is this file in JS? Jacob 2012/02/01 07:48:26 Because the dart server libraries currently lack t Show quoted text On 2012/02/01 00:10:39, nweiz wrote: > Why is this file in JS? Because the dart server libraries currently lack the functionality we need. When they do this script should be ported. nweiz 2012/02/01 21:23:02 It would be good to document exactly what's keepin Show quoted text On 2012/02/01 07:48:26, Jacob wrote: > On 2012/02/01 00:10:39, nweiz wrote: > > Why is this file in JS? > Because the dart server libraries currently lack the functionality we need. > When they do this script should be ported. It would be good to document exactly what's keeping all the JS scripts as JS. Jacob 2012/02/02 05:26:38 that seems low value. Just look at each call to a Show quoted text On 2012/02/01 21:23:02, nweiz wrote: > On 2012/02/01 07:48:26, Jacob wrote: > > On 2012/02/01 00:10:39, nweiz wrote: > > > Why is this file in JS? > > Because the dart server libraries currently lack the functionality we need. > > When they do this script should be ported. > > It would be good to document exactly what's keeping all the JS scripts as JS. that seems low value. Just look at each call to a Node method (e.g. http. and fs.) and you have something that is missing in Dart currently. However I have added a TODO to convert to dart when all these methods are available. nweiz 2012/02/02 19:54:34 It was non-obvious to me. It's important that some Show quoted text On 2012/02/02 05:26:38, Jacob wrote: > On 2012/02/01 21:23:02, nweiz wrote: > > On 2012/02/01 07:48:26, Jacob wrote: > > > On 2012/02/01 00:10:39, nweiz wrote: > > > > Why is this file in JS? > > > Because the dart server libraries currently lack the functionality we need. > > > When they do this script should be ported. > > > > It would be good to document exactly what's keeping all the JS scripts as JS. > > that seems low value. Just look at each call to a Node method (e.g. http. and > fs.) > and you have something that is missing in Dart currently. However I have added > a TODO to convert to dart when all these methods are available. It was non-obvious to me. It's important that someone coming across this in three months is able to understand whether or not it should be converted to Dart. Jacob 2012/02/02 22:03:14 The trouble is if I made a list of which methods s Show quoted text On 2012/02/02 19:54:34, nweiz wrote: > On 2012/02/02 05:26:38, Jacob wrote: > > On 2012/02/01 21:23:02, nweiz wrote: > > > On 2012/02/01 07:48:26, Jacob wrote: > > > > On 2012/02/01 00:10:39, nweiz wrote: > > > > > Why is this file in JS? > > > > Because the dart server libraries currently lack the functionality we > need. > > > > When they do this script should be ported. > > > > > > It would be good to document exactly what's keeping all the JS scripts as > JS. > > > > that seems low value. Just look at each call to a Node method (e.g. http. and > > fs.) > > and you have something that is missing in Dart currently. However I have > added > > a TODO to convert to dart when all these methods are available. > > It was non-obvious to me. It's important that someone coming across this in > three months is able to understand whether or not it should be converted to > Dart. The trouble is if I made a list of which methods still need to be implemented by the dart server side support then that list will be obsolete in 3 months so there isn't value in explicitly listing that here.
	2 var fs = require('fs');

	3

	4 try {

	5 fs.mkdirSync('output/crawl');

	6 } catch (e) {

	7 // It doesn't matter if the directories already exist.

	8 }

	9

	10 var domTypes = JSON.parse(fs.readFileSync('data/domTypes.json', 'utf8'));

	11

	12 var cacheData = {};

	13

	14 function scrape(filename, link) {

	15 console.log(link);

	16 var httpsPrefix = "https://";

	17 var prefix = 'https://developer.mozilla.org/';

	18 var notFoundPrefix = 'https://developer.mozilla.org/Article_not_found?uri=';

	19 if (link.indexOf(prefix) != 0 ) {

	20 throw "Unexpected url:" + link;
	nweiz 2012/02/01 00:10:39 Space after ":" Space after ":" Jacob 2012/02/01 07:48:26 Done. Show quoted text On 2012/02/01 00:10:39, nweiz wrote: > Space after ":" Done.
	21 }

	22 var scrapePath = "/search?q=cache:" + link;

	23 // We crawl content from googleusercontent.com so we don't have to worry about

	24 // crawler politeness like we would have to if scraping developer.mozilla.org

	25 // directly.

	26 var options = {

	27 host: 'webcache.googleusercontent.com',

	28 path: scrapePath,

	29 port: 80,

	30 method: 'GET'

	31 };

	32

	33 var req = http.request(options, function(res) {

	34 res.setEncoding('utf8');

	35 var data='';

	36

	37 res.on('data', function(d) {

	38 data += d;

	39 });

	40 var onClose = function(e) {

	41 console.log("Writing crawl result for " + link);

	42 fs.writeFileSync("output/crawl/" + filename + ".html", data, 'utf8');

	43 }

	44 res.on('close', onClose);

	45 res.on('end', onClose);

	46 });

	47 req.end();

	48

	49 req.on('error', function(e) {

	50 throw "Error " + e + " scraping " + link;

	51 });

	52 }

	53

	54 for (var i = 0; i < domTypes.length; i++) {

	55 var type = domTypes[i];

	56

	57 // Json containing the search results for the current type.

	58 var data = fs.readFileSync("output/search/" + type + ".json");

	59 json = JSON.parse(data);

	60 if (!('items' in json)) {

	61 console.warn("No search results for " + type);

	62 continue;

	63 }

	64 var items = json['items'];

	65

	66 var entry = [];

	67 cacheData[type] = entry;

	68

	69 // Hardcode the correct matching url for a few types where the search engine

	70 // gets the wrong answer.

	71 var link = null;

	72 if (type == 'Screen') {

	73 link = 'https://developer.mozilla.org/en/DOM/window.screen';

	74 } else if (type == 'Text') {

	75 link = 'https://developer.mozilla.org/en/DOM/Text';

	76 } else if (type == 'Touch') {

	77 link = 'https://developer.mozilla.org/en/DOM/Touch';

	78 } else if (type == 'TouchEvent' \|\| type == 'webkitTouchEvent' \|\| type == 'Webk itTouchEvent' \|\| type == 'WebKitTouchEvent') {

	79 link = 'https://developer.mozilla.org/en/DOM/TouchEvent';

	80 } else if (type == 'HTMLSpanElement') {

	81 link = 'https://developer.mozilla.org/en/HTML/Element/span';

	82 } else if (type == 'HTMLPreElement') {

	83 link = 'https://developer.mozilla.org/en/HTML/Element/pre';

	84 } else if (type == 'HTMLFrameElement') {

	85 link = 'https://developer.mozilla.org/en/HTML/Element/frame';

	86 } else if (type == 'HTMLFrameSetElement') {

	87 link = 'https://developer.mozilla.org/en/HTML/Element/frameset';

	88 } else if (type == 'Geolocation') {

	89 link = 'https://developer.mozilla.org/en/nsIDOMGeolocation;'

	90 } else if (type == 'Notification') {

	91 link = 'https://developer.mozilla.org/en/DOM/notification';

	92 } else if (type == 'IDBDatabase') {

	93 link = 'https://developer.mozilla.org/en/IndexedDB/IDBDatabase'

	94 }

	95 if (link != null) {

	96 entry.push({index: 0, link: link, title: type});

	97 scrape(type + 0, link);

	98 continue;

	99 }

	100

	101 for (j = 0; j < items.length; j++) {

	102 var item = items[j];

	103 var prefix = 'https://developer.mozilla.org/';

	104 var notFoundPrefix = 'https://developer.mozilla.org/Article_not_found?uri=';

	105 // Be optimistic and replace article not found links with links to where the

	106 // article should be.

	107 link = item['link'];

	108 if (link.indexOf(notFoundPrefix) == 0) {

	109 link = prefix + link.substr(notFoundPrefix.length);

	110 }

	111

	112 entry.push({index: j, link: link, title: item['title']});

	113 scrape(type + j, link);

	114 }

	115 }

	116

	117 fs.writeFileSync('output/crawl/cache.json', JSON.stringify(cacheData, null, ' ') , 'utf8');

OLD	NEW

« utils/apidoc/mdn/README.txt ('K') | « utils/apidoc/mdn/README.txt ('k') | utils/apidoc/mdn/data/dartIdl.json » ('j') | utils/apidoc/mdn/extract.dart » ('J')

Powered by Google App Engine

This is Rietveld 408576698