Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(224)

Side by Side Diff: third_party/WebKit/Source/devtools/scripts/convert-3pas-product-registry.js

Issue 2772493002: [Devtools] Product registry to support prefix & import of data (Closed)
Patch Set: Merge branch 'ADD_SHA1' into NEW_PRODUCT_REGISTRY_STRUCTURE Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/WebKit/Source/devtools/front_end/product_registry/ProductRegistryData.js ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 const fs = require('fs');
2
3 /*
4 How to use:
5 1) Get dump of data as CSV format and name it 3pas.csv same directory as this sc ript.
6 2) Header fields in the CSV will be used as keys when destructing into JSON obje cts [ie: top row data should not have spaces or special chars]
7 3) The two important column names are: 'name_legal_product' and 'domain'.
8 4) There may not be a header named 'prefix'.
9 5) 'name_legal_product' Will have it's data cleaned up a bit, so be prepared for it to change.
10 6) This script tries to de-duplicate any data, so be prepared for many entries t o go away if it finds a shorter one.
11 7) This script will output a javascript file in the product_registry's data form at.
12 */
13
14 /*
15 * Configurable variables. You may need to tweak these to be compatible with
16 * the server-side, but the defaults work in most cases.
17 */
18 const hexcase = 0; /* hex output format. 0 - lowercase; 1 - uppercase */
19 const b64pad = '='; /* base-64 pad character. "=" for strict RFC compliance */
20 const chrsz = 8; /* bits per input character. 8 - ASCII; 16 - Unicode */
21
22 var data = fs.readFileSync('3pas.csv', 'utf8');
23 var headerLine = data.split('\n', 1)[0];
24 data = data.substr(headerLine.length);
25 var headerLineOrigLength = headerLine.length;
26
27 var columnNames = Array.from(csvUnmarshaller(headerLine)).map(v => v[0]);
28 var lineObjs = [];
29
30 var marshaller = csvUnmarshaller(data, 2);
31 var lineObj = {};
32 var colIndex = 0;
33 for (var [colData, isEnding] of marshaller) {
34 if (!(columnNames[colIndex] in lineObj))
35 lineObj[columnNames[colIndex]] = colData;
36 colIndex++;
37 if (isEnding) {
38 lineObj = {};
39 lineObjs.push(lineObj);
40 colIndex = 0;
41 }
42 }
43
44 var map = new Map();
45 for (var lineObj of lineObjs) {
46 if (lineObj.domain === null || lineObj.domain === undefined ||
47 (lineObj.status_allowed !== 'allowed' && lineObj.status_allowed !== 'disal lowed'))
48 continue;
49 lineObj.domain =
50 lineObj.domain.trim().toLowerCase().replace(/[^a-z0-9_\-*.]/g, '').replace (/^www\.(?=[^.]+\.[^.]+$)/, '');
51
52 lineObj.name_legal_product = lineObj.name_legal_product.trim()
53 .replace(/\s\s/g, ' ')
54 .replace(/[\x00-\x1F]/g, '')
55 .replace(/"/g, '"')
56 // The following two lines are to keep input data from currupting output data.
57 .replace(/","/g, '')
58 .replace(/},{/g, '')
59 .replace(/“|”/g, '"')
60 .replace(/,$/g, '')
61 .replace(/&/g, '&')
62 // This is how csv escapes double quotes.
63 .replace(/""/g, '"');
64 if (!map.has(lineObj.domain))
65 map.set(lineObj.domain, lineObj);
66 }
67
68 lineObjs = Array.from(map.values());
69
70 var map = new Map();
71 for (var lineObj of lineObjs) {
72 if (!lineObj)
73 continue;
74 var domain = lineObj.domain.trim();
75 if (!domain.length)
76 continue;
77 var prefixSuffix = domain.split('*');
78 if (prefixSuffix.length > 2)
79 throw 'We do not support multiple * in domains';
80 var prefix = '';
81 var suffixDomain = '';
82 if (prefixSuffix.length === 1) {
83 suffixDomain = prefixSuffix[0];
84 } else {
85 prefix = prefixSuffix[0];
86 if (prefix === '')
87 prefix = '*';
88 suffixDomain = prefixSuffix[1];
89 }
90
91 var domainParts = suffixDomain.split('.');
92 if (domainParts.length < 2)
93 throw 'Invalid domain';
94 var baseDomain = domainParts[domainParts.length - 2] + '.' + domainParts[domai nParts.length - 1];
95 while (domainParts[0] === '')
96 domainParts.shift();
97 lineObj.domain = domainParts.join('.');
98 lineObj.prefix = prefix;
99
100 var mapOfSubdomains = map.get(baseDomain);
101 if (!mapOfSubdomains) {
102 mapOfSubdomains = new Map();
103 map.set(baseDomain, mapOfSubdomains);
104 }
105
106 var prefixMap = mapOfSubdomains.get(lineObj.domain);
107 if (!prefixMap) {
108 prefixMap = new Map();
109 mapOfSubdomains.set(lineObj.domain, prefixMap);
110 }
111 if (prefixMap.has(prefix))
112 console.log('Problem with: ', domain, lineObj.domain);
113 prefixMap.set(prefix, lineObj);
114 }
115
116 var outputProducts = [];
117 var outputObj = new Map();
118 for (var [baseDomain, subdomains] of map) {
119 for (var prefixes of subdomains.values()) {
120 SKIP_ENTRY: for (var lineObj of prefixes.values()) {
121 var prefix = lineObj.prefix;
122 var wildLineObj = prefixes.get('*');
123 if (wildLineObj && prefix !== '*') {
124 if (wildLineObj.name_legal_product === lineObj.name_legal_product) {
125 // Skip entry, since wild card is there and already in table.
126 continue SKIP_ENTRY;
127 }
128 }
129 var fullSubdomain = lineObj.domain;
130 var domainParts = lineObj.domain.split('.');
131 // Ignore fist one since we are on it now.
132 var previousDomainPart = domainParts.shift();
133 var ignoreEntry = false;
134
135 while (domainParts.length > 1) {
136 var subdomain = domainParts.join('.');
137 var subdomainPrefixes = subdomains.get(subdomain);
138 if (subdomainPrefixes) {
139 for (var innerLineObj of subdomainPrefixes.values()) {
140 if (innerLineObj.prefix === '' || innerLineObj.name_legal_product != = lineObj.name_legal_product)
141 continue;
142 if (innerLineObj.prefix === '*')
143 continue SKIP_ENTRY;
144 // Per chat with 3pas team. We need to check prefix on subdomain not top level domain.
145 // ie: f*.foo.bar -> [b.f00.foo.bar, true], [f00.foo.bar, true], [f0 0.b.foo.bar, false]
146 if (previousDomainPart.substr(0, innerLineObj.prefix.length) === inn erLineObj.prefix)
147 continue SKIP_ENTRY;
148 }
149 }
150 previousDomainPart = domainParts.shift();
151 }
152 var outputPart = outputObj.get(fullSubdomain);
153 if (!outputPart) {
154 outputPart = {hash: hex_sha1(fullSubdomain).substr(0, 16), prefixes: {}} ;
155 outputObj.set(fullSubdomain, outputPart);
156 }
157 outputPart.prefixes[lineObj.prefix] = registerOutputProduct(lineObj.name_l egal_product);
158 }
159 }
160 }
161
162 console.log(
163 '// Copyright 2017 The Chromium Authors. All rights reserved.\n' +
164 '// Use of this source code is governed by a BSD-style license that can be\n ' +
165 '// found in the LICENSE file.\n' +
166 '// clang-format off\n' +
167 '/* eslint-disable */\n' +
168 'ProductRegistry.register([');
169 var data = JSON.stringify(outputProducts).replace(/","/g, '",\n "');
170 console.log(' ' + data.substring(1, data.length - 1));
171 console.log('],');
172 console.log('[');
173 var outputObjArray = Array.from(outputObj.values());
174 for (var i = 0; i < outputObjArray.length; i++) {
175 var obj = outputObjArray[i];
176 var lineEnding = (i === outputObjArray.length - 1) ? '' : ',';
177 var comments = [];
178 for (var prefix in obj.prefixes)
179 comments.push('[' + outputProducts[obj.prefixes[prefix]] + ']');
180 console.log(' ' + JSON.stringify(obj) + lineEnding + ' // ' + comments.join(' '));
181 }
182 console.log(']);');
183
184
185 // items.forEach(lineObj => console.log(lineObj.name_legal_product.padStart(50), lineObj.domain.padStart(30)));
186 // console.log("With *: ", items.filter(v => v.domain.indexOf('*') !== -1).lengt h);
187 // console.log("Total: ", items.length);
188
189
190
191 // Linear but meh.
192 function registerOutputProduct(name) {
193 var index = outputProducts.indexOf(name);
194 if (index === -1) {
195 outputProducts.push(name);
196 return outputProducts.length - 1;
197 }
198 return index;
199 }
200
201 function* csvUnmarshaller(data, lineOffset) {
202 var origLen = data.length;
203 var colLength = 0;
204 var lineNo = lineOffset || 1;
205 while (data.length) {
206 var colData;
207 var match;
208 if (data[0] === '"') {
209 match = data.match(/^"((?:[^"]|"")*)"(,|\n|$)/m);
210 if (!match)
211 throw 'Bad data at line ' + lineNo + ' col: ' + colLength + ' ' + data.s ubstr(0, 15);
212 } else if (data[0] === '\'') {
213 match = data.match(/^'((?:[^']|'')*)'(,|\n|$)/m);
214 if (!match)
215 throw 'Bad data at line ' + lineNo + ' col: ' + colLength + ' ' + data.s ubstr(0, 15);
216 } else {
217 match = data.match(/^([^,\n]*)(,|\n|$)/);
218 if (!match)
219 throw 'Bad data at line ' + lineNo + ' col: ' + colLength + ' ' + data.s ubstr(0, 15);
220 match[1] = match[1] === 'NULL' ? null : match[1];
221 }
222 colLength += match[0].length;
223 if (match[2] === '\n') {
224 lineNo++;
225 colLength = 0;
226 }
227 yield [match[1], match[2] === '\n'];
228 data = data.substr(match[0].length);
229 }
230 }
231
232
233 // All sha1 helpers from here down.
234
235
236 /*
237 * A JavaScript implementation of the Secure Hash Algorithm, SHA-1, as defined
238 * in FIPS PUB 180-1
239 * Version 2.1a Copyright Paul Johnston 2000 - 2002.
240 * Other contributors: Greg Holt, Andrew Kepert, Ydnar, Lostinet
241 * Distributed under the BSD License
242 * See http://pajhome.org.uk/crypt/md5 for details.
243 */
244
245 /*
246 * These are the functions you'll usually want to call
247 * They take string arguments and return either hex or base-64 encoded strings
248 */
249 function hex_sha1(s) {
250 return binb2hex(core_sha1(str2binb(s), s.length * chrsz));
251 }
252 function b64_sha1(s) {
253 return binb2b64(core_sha1(str2binb(s), s.length * chrsz));
254 }
255 function str_sha1(s) {
256 return binb2str(core_sha1(str2binb(s), s.length * chrsz));
257 }
258 function hex_hmac_sha1(key, data) {
259 return binb2hex(core_hmac_sha1(key, data));
260 }
261 function b64_hmac_sha1(key, data) {
262 return binb2b64(core_hmac_sha1(key, data));
263 }
264 function str_hmac_sha1(key, data) {
265 return binb2str(core_hmac_sha1(key, data));
266 }
267
268 /*
269 * Perform a simple self-test to see if the VM is working
270 */
271 function sha1_vm_test() {
272 return hex_sha1('abc') == 'a9993e364706816aba3e25717850c26c9cd0d89d';
273 }
274
275 /*
276 * Calculate the SHA-1 of an array of big-endian words, and a bit length
277 */
278 function core_sha1(x, len) {
279 /* append padding */
280 x[len >> 5] |= 0x80 << (24 - len % 32);
281 x[((len + 64 >> 9) << 4) + 15] = len;
282
283 var w = Array(80);
284 var a = 1732584193;
285 var b = -271733879;
286 var c = -1732584194;
287 var d = 271733878;
288 var e = -1009589776;
289
290 for (var i = 0; i < x.length; i += 16) {
291 var olda = a;
292 var oldb = b;
293 var oldc = c;
294 var oldd = d;
295 var olde = e;
296
297 for (var j = 0; j < 80; j++) {
298 if (j < 16)
299 w[j] = x[i + j];
300 else
301 w[j] = rol(w[j - 3] ^ w[j - 8] ^ w[j - 14] ^ w[j - 16], 1);
302 var t = safe_add(safe_add(rol(a, 5), sha1_ft(j, b, c, d)), safe_add(safe_a dd(e, w[j]), sha1_kt(j)));
303 e = d;
304 d = c;
305 c = rol(b, 30);
306 b = a;
307 a = t;
308 }
309
310 a = safe_add(a, olda);
311 b = safe_add(b, oldb);
312 c = safe_add(c, oldc);
313 d = safe_add(d, oldd);
314 e = safe_add(e, olde);
315 }
316 return Array(a, b, c, d, e);
317 }
318
319 /*
320 * Perform the appropriate triplet combination function for the current
321 * iteration
322 */
323 function sha1_ft(t, b, c, d) {
324 if (t < 20)
325 return (b & c) | ((~b) & d);
326 if (t < 40)
327 return b ^ c ^ d;
328 if (t < 60)
329 return (b & c) | (b & d) | (c & d);
330 return b ^ c ^ d;
331 }
332
333 /*
334 * Determine the appropriate additive constant for the current iteration
335 */
336 function sha1_kt(t) {
337 return (t < 20) ? 1518500249 : (t < 40) ? 1859775393 : (t < 60) ? -1894007588 : -899497514;
338 }
339
340 /*
341 * Calculate the HMAC-SHA1 of a key and some data
342 */
343 function core_hmac_sha1(key, data) {
344 var bkey = str2binb(key);
345 if (bkey.length > 16)
346 bkey = core_sha1(bkey, key.length * chrsz);
347
348 var ipad = Array(16), opad = Array(16);
349 for (var i = 0; i < 16; i++) {
350 ipad[i] = bkey[i] ^ 0x36363636;
351 opad[i] = bkey[i] ^ 0x5C5C5C5C;
352 }
353
354 var hash = core_sha1(ipad.concat(str2binb(data)), 512 + data.length * chrsz);
355 return core_sha1(opad.concat(hash), 512 + 160);
356 }
357
358 /*
359 * Add integers, wrapping at 2^32. This uses 16-bit operations internally
360 * to work around bugs in some JS interpreters.
361 */
362 function safe_add(x, y) {
363 var lsw = (x & 0xFFFF) + (y & 0xFFFF);
364 var msw = (x >> 16) + (y >> 16) + (lsw >> 16);
365 return (msw << 16) | (lsw & 0xFFFF);
366 }
367
368 /*
369 * Bitwise rotate a 32-bit number to the left.
370 */
371 function rol(num, cnt) {
372 return (num << cnt) | (num >>> (32 - cnt));
373 }
374
375 /*
376 * Convert an 8-bit or 16-bit string to an array of big-endian words
377 * In 8-bit function, characters >255 have their hi-byte silently ignored.
378 */
379 function str2binb(str) {
380 var bin = Array();
381 var mask = (1 << chrsz) - 1;
382 for (var i = 0; i < str.length * chrsz; i += chrsz)
383 bin[i >> 5] |= (str.charCodeAt(i / chrsz) & mask) << (32 - chrsz - i % 32);
384 return bin;
385 }
386
387 /*
388 * Convert an array of big-endian words to a string
389 */
390 function binb2str(bin) {
391 var str = '';
392 var mask = (1 << chrsz) - 1;
393 for (var i = 0; i < bin.length * 32; i += chrsz)
394 str += String.fromCharCode((bin[i >> 5] >>> (32 - chrsz - i % 32)) & mask);
395 return str;
396 }
397
398 /*
399 * Convert an array of big-endian words to a hex string.
400 */
401 function binb2hex(binarray) {
402 var hex_tab = hexcase ? '0123456789ABCDEF' : '0123456789abcdef';
403 var str = '';
404 for (var i = 0; i < binarray.length * 4; i++) {
405 str += hex_tab.charAt((binarray[i >> 2] >> ((3 - i % 4) * 8 + 4)) & 0xF) +
406 hex_tab.charAt((binarray[i >> 2] >> ((3 - i % 4) * 8)) & 0xF);
407 }
408 return str;
409 }
410
411 /*
412 * Convert an array of big-endian words to a base-64 string
413 */
414 function binb2b64(binarray) {
415 var tab = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';
416 var str = '';
417 for (var i = 0; i < binarray.length * 4; i += 3) {
418 var triplet = (((binarray[i >> 2] >> 8 * (3 - i % 4)) & 0xFF) << 16) |
419 (((binarray[i + 1 >> 2] >> 8 * (3 - (i + 1) % 4)) & 0xFF) << 8) |
420 ((binarray[i + 2 >> 2] >> 8 * (3 - (i + 2) % 4)) & 0xFF);
421 for (var j = 0; j < 4; j++) {
422 if (i * 8 + j * 6 > binarray.length * 32)
423 str += b64pad;
424 else
425 str += tab.charAt((triplet >> 6 * (3 - j)) & 0x3F);
426 }
427 }
428 return str;
429 }
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/devtools/front_end/product_registry/ProductRegistryData.js ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698