| OLD | NEW | 
| (Empty) |  | 
 |    1 const fs = require('fs'); | 
 |    2  | 
 |    3 /* | 
 |    4 How to use: | 
 |    5 1) Get dump of data as CSV format and name it 3pas.csv same directory as this sc
     ript. | 
 |    6 2) Header fields in the CSV will be used as keys when destructing into JSON obje
     cts [ie: top row data should not have spaces or special chars] | 
 |    7 3) The two important column names are: 'name_legal_product' and 'domain'. | 
 |    8 4) There may not be a header named 'prefix'. | 
 |    9 5) 'name_legal_product' Will have it's data cleaned up a bit, so be prepared for
      it to change. | 
 |   10 6) This script tries to de-duplicate any data, so be prepared for many entries t
     o go away if it finds a shorter one. | 
 |   11 7) This script will output a javascript file in the product_registry's data form
     at. | 
 |   12 */ | 
 |   13  | 
 |   14 /* | 
 |   15  * Configurable variables. You may need to tweak these to be compatible with | 
 |   16  * the server-side, but the defaults work in most cases. | 
 |   17  */ | 
 |   18 const hexcase = 0;  /* hex output format. 0 - lowercase; 1 - uppercase        */ | 
 |   19 const b64pad = '='; /* base-64 pad character. "=" for strict RFC compliance   */ | 
 |   20 const chrsz = 8;    /* bits per input character. 8 - ASCII; 16 - Unicode      */ | 
 |   21  | 
 |   22 var data = fs.readFileSync('3pas.csv', 'utf8'); | 
 |   23 var headerLine = data.split('\n', 1)[0]; | 
 |   24 data = data.substr(headerLine.length); | 
 |   25 var headerLineOrigLength = headerLine.length; | 
 |   26  | 
 |   27 var columnNames = Array.from(csvUnmarshaller(headerLine)).map(v => v[0]); | 
 |   28 var lineObjs = []; | 
 |   29  | 
 |   30 var marshaller = csvUnmarshaller(data, 2); | 
 |   31 var lineObj = {}; | 
 |   32 var colIndex = 0; | 
 |   33 for (var [colData, isEnding] of marshaller) { | 
 |   34   if (!(columnNames[colIndex] in lineObj)) | 
 |   35     lineObj[columnNames[colIndex]] = colData; | 
 |   36   colIndex++; | 
 |   37   if (isEnding) { | 
 |   38     lineObj = {}; | 
 |   39     lineObjs.push(lineObj); | 
 |   40     colIndex = 0; | 
 |   41   } | 
 |   42 } | 
 |   43  | 
 |   44 var map = new Map(); | 
 |   45 for (var lineObj of lineObjs) { | 
 |   46   if (lineObj.domain === null || lineObj.domain === undefined || | 
 |   47       (lineObj.status_allowed !== 'allowed' && lineObj.status_allowed !== 'disal
     lowed')) | 
 |   48     continue; | 
 |   49   lineObj.domain = | 
 |   50       lineObj.domain.trim().toLowerCase().replace(/[^a-z0-9_\-*.]/g, '').replace
     (/^www\.(?=[^.]+\.[^.]+$)/, ''); | 
 |   51  | 
 |   52   lineObj.name_legal_product = lineObj.name_legal_product.trim() | 
 |   53                                    .replace(/\s\s/g, ' ') | 
 |   54                                    .replace(/[\x00-\x1F]/g, '') | 
 |   55                                    .replace(/"/g, '"') | 
 |   56                                    // The following two lines are to keep input 
     data from currupting output data. | 
 |   57                                    .replace(/","/g, '') | 
 |   58                                    .replace(/},{/g, '') | 
 |   59                                    .replace(/“|”/g, '"') | 
 |   60                                    .replace(/,$/g, '') | 
 |   61                                    .replace(/&/g, '&') | 
 |   62                                    // This is how csv escapes double quotes. | 
 |   63                                    .replace(/""/g, '"'); | 
 |   64   if (!map.has(lineObj.domain)) | 
 |   65     map.set(lineObj.domain, lineObj); | 
 |   66 } | 
 |   67  | 
 |   68 lineObjs = Array.from(map.values()); | 
 |   69  | 
 |   70 var map = new Map(); | 
 |   71 for (var lineObj of lineObjs) { | 
 |   72   if (!lineObj) | 
 |   73     continue; | 
 |   74   var domain = lineObj.domain.trim(); | 
 |   75   if (!domain.length) | 
 |   76     continue; | 
 |   77   var prefixSuffix = domain.split('*'); | 
 |   78   if (prefixSuffix.length > 2) | 
 |   79     throw 'We do not support multiple * in domains'; | 
 |   80   var prefix = ''; | 
 |   81   var suffixDomain = ''; | 
 |   82   if (prefixSuffix.length === 1) { | 
 |   83     suffixDomain = prefixSuffix[0]; | 
 |   84   } else { | 
 |   85     prefix = prefixSuffix[0]; | 
 |   86     if (prefix === '') | 
 |   87       prefix = '*'; | 
 |   88     suffixDomain = prefixSuffix[1]; | 
 |   89   } | 
 |   90  | 
 |   91   var domainParts = suffixDomain.split('.'); | 
 |   92   if (domainParts.length < 2) | 
 |   93     throw 'Invalid domain'; | 
 |   94   var baseDomain = domainParts[domainParts.length - 2] + '.' + domainParts[domai
     nParts.length - 1]; | 
 |   95   while (domainParts[0] === '') | 
 |   96     domainParts.shift(); | 
 |   97   lineObj.domain = domainParts.join('.'); | 
 |   98   lineObj.prefix = prefix; | 
 |   99  | 
 |  100   var mapOfSubdomains = map.get(baseDomain); | 
 |  101   if (!mapOfSubdomains) { | 
 |  102     mapOfSubdomains = new Map(); | 
 |  103     map.set(baseDomain, mapOfSubdomains); | 
 |  104   } | 
 |  105  | 
 |  106   var prefixMap = mapOfSubdomains.get(lineObj.domain); | 
 |  107   if (!prefixMap) { | 
 |  108     prefixMap = new Map(); | 
 |  109     mapOfSubdomains.set(lineObj.domain, prefixMap); | 
 |  110   } | 
 |  111   if (prefixMap.has(prefix)) | 
 |  112     console.log('Problem with: ', domain, lineObj.domain); | 
 |  113   prefixMap.set(prefix, lineObj); | 
 |  114 } | 
 |  115  | 
 |  116 var outputProducts = []; | 
 |  117 var outputObj = new Map(); | 
 |  118 for (var [baseDomain, subdomains] of map) { | 
 |  119   for (var prefixes of subdomains.values()) { | 
 |  120     SKIP_ENTRY: for (var lineObj of prefixes.values()) { | 
 |  121       var prefix = lineObj.prefix; | 
 |  122       var wildLineObj = prefixes.get('*'); | 
 |  123       if (wildLineObj && prefix !== '*') { | 
 |  124         if (wildLineObj.name_legal_product === lineObj.name_legal_product) { | 
 |  125           // Skip entry, since wild card is there and already in table. | 
 |  126           continue SKIP_ENTRY; | 
 |  127         } | 
 |  128       } | 
 |  129       var fullSubdomain = lineObj.domain; | 
 |  130       var domainParts = lineObj.domain.split('.'); | 
 |  131       // Ignore fist one since we are on it now. | 
 |  132       var previousDomainPart = domainParts.shift(); | 
 |  133       var ignoreEntry = false; | 
 |  134  | 
 |  135       while (domainParts.length > 1) { | 
 |  136         var subdomain = domainParts.join('.'); | 
 |  137         var subdomainPrefixes = subdomains.get(subdomain); | 
 |  138         if (subdomainPrefixes) { | 
 |  139           for (var innerLineObj of subdomainPrefixes.values()) { | 
 |  140             if (innerLineObj.prefix === '' || innerLineObj.name_legal_product !=
     = lineObj.name_legal_product) | 
 |  141               continue; | 
 |  142             if (innerLineObj.prefix === '*') | 
 |  143               continue SKIP_ENTRY; | 
 |  144             // Per chat with 3pas team. We need to check prefix on subdomain not
      top level domain. | 
 |  145             // ie: f*.foo.bar -> [b.f00.foo.bar, true], [f00.foo.bar, true], [f0
     0.b.foo.bar, false] | 
 |  146             if (previousDomainPart.substr(0, innerLineObj.prefix.length) === inn
     erLineObj.prefix) | 
 |  147               continue SKIP_ENTRY; | 
 |  148           } | 
 |  149         } | 
 |  150         previousDomainPart = domainParts.shift(); | 
 |  151       } | 
 |  152       var outputPart = outputObj.get(fullSubdomain); | 
 |  153       if (!outputPart) { | 
 |  154         outputPart = {hash: hex_sha1(fullSubdomain).substr(0, 16), prefixes: {}}
     ; | 
 |  155         outputObj.set(fullSubdomain, outputPart); | 
 |  156       } | 
 |  157       outputPart.prefixes[lineObj.prefix] = registerOutputProduct(lineObj.name_l
     egal_product); | 
 |  158     } | 
 |  159   } | 
 |  160 } | 
 |  161  | 
 |  162 console.log( | 
 |  163     '// Copyright 2017 The Chromium Authors. All rights reserved.\n' + | 
 |  164     '// Use of this source code is governed by a BSD-style license that can be\n
     ' + | 
 |  165     '// found in the LICENSE file.\n' + | 
 |  166     '// clang-format off\n' + | 
 |  167     '/* eslint-disable */\n' + | 
 |  168     'ProductRegistry.register(['); | 
 |  169 var data = JSON.stringify(outputProducts).replace(/","/g, '",\n  "'); | 
 |  170 console.log('  ' + data.substring(1, data.length - 1)); | 
 |  171 console.log('],'); | 
 |  172 console.log('['); | 
 |  173 var outputObjArray = Array.from(outputObj.values()); | 
 |  174 for (var i = 0; i < outputObjArray.length; i++) { | 
 |  175   var obj = outputObjArray[i]; | 
 |  176   var lineEnding = (i === outputObjArray.length - 1) ? '' : ','; | 
 |  177   var comments = []; | 
 |  178   for (var prefix in obj.prefixes) | 
 |  179     comments.push('[' + outputProducts[obj.prefixes[prefix]] + ']'); | 
 |  180   console.log('  ' + JSON.stringify(obj) + lineEnding + ' // ' + comments.join('
      ')); | 
 |  181 } | 
 |  182 console.log(']);'); | 
 |  183  | 
 |  184  | 
 |  185 // items.forEach(lineObj => console.log(lineObj.name_legal_product.padStart(50),
      lineObj.domain.padStart(30))); | 
 |  186 // console.log("With *: ", items.filter(v => v.domain.indexOf('*') !== -1).lengt
     h); | 
 |  187 // console.log("Total: ", items.length); | 
 |  188  | 
 |  189  | 
 |  190  | 
 |  191 // Linear but meh. | 
 |  192 function registerOutputProduct(name) { | 
 |  193   var index = outputProducts.indexOf(name); | 
 |  194   if (index === -1) { | 
 |  195     outputProducts.push(name); | 
 |  196     return outputProducts.length - 1; | 
 |  197   } | 
 |  198   return index; | 
 |  199 } | 
 |  200  | 
 |  201 function* csvUnmarshaller(data, lineOffset) { | 
 |  202   var origLen = data.length; | 
 |  203   var colLength = 0; | 
 |  204   var lineNo = lineOffset || 1; | 
 |  205   while (data.length) { | 
 |  206     var colData; | 
 |  207     var match; | 
 |  208     if (data[0] === '"') { | 
 |  209       match = data.match(/^"((?:[^"]|"")*)"(,|\n|$)/m); | 
 |  210       if (!match) | 
 |  211         throw 'Bad data at line ' + lineNo + ' col: ' + colLength + ' ' + data.s
     ubstr(0, 15); | 
 |  212     } else if (data[0] === '\'') { | 
 |  213       match = data.match(/^'((?:[^']|'')*)'(,|\n|$)/m); | 
 |  214       if (!match) | 
 |  215         throw 'Bad data at line ' + lineNo + ' col: ' + colLength + ' ' + data.s
     ubstr(0, 15); | 
 |  216     } else { | 
 |  217       match = data.match(/^([^,\n]*)(,|\n|$)/); | 
 |  218       if (!match) | 
 |  219         throw 'Bad data at line ' + lineNo + ' col: ' + colLength + ' ' + data.s
     ubstr(0, 15); | 
 |  220       match[1] = match[1] === 'NULL' ? null : match[1]; | 
 |  221     } | 
 |  222     colLength += match[0].length; | 
 |  223     if (match[2] === '\n') { | 
 |  224       lineNo++; | 
 |  225       colLength = 0; | 
 |  226     } | 
 |  227     yield [match[1], match[2] === '\n']; | 
 |  228     data = data.substr(match[0].length); | 
 |  229   } | 
 |  230 } | 
 |  231  | 
 |  232  | 
 |  233 // All sha1 helpers from here down. | 
 |  234  | 
 |  235  | 
 |  236 /* | 
 |  237  * A JavaScript implementation of the Secure Hash Algorithm, SHA-1, as defined | 
 |  238  * in FIPS PUB 180-1 | 
 |  239  * Version 2.1a Copyright Paul Johnston 2000 - 2002. | 
 |  240  * Other contributors: Greg Holt, Andrew Kepert, Ydnar, Lostinet | 
 |  241  * Distributed under the BSD License | 
 |  242  * See http://pajhome.org.uk/crypt/md5 for details. | 
 |  243  */ | 
 |  244  | 
 |  245 /* | 
 |  246  * These are the functions you'll usually want to call | 
 |  247  * They take string arguments and return either hex or base-64 encoded strings | 
 |  248  */ | 
 |  249 function hex_sha1(s) { | 
 |  250   return binb2hex(core_sha1(str2binb(s), s.length * chrsz)); | 
 |  251 } | 
 |  252 function b64_sha1(s) { | 
 |  253   return binb2b64(core_sha1(str2binb(s), s.length * chrsz)); | 
 |  254 } | 
 |  255 function str_sha1(s) { | 
 |  256   return binb2str(core_sha1(str2binb(s), s.length * chrsz)); | 
 |  257 } | 
 |  258 function hex_hmac_sha1(key, data) { | 
 |  259   return binb2hex(core_hmac_sha1(key, data)); | 
 |  260 } | 
 |  261 function b64_hmac_sha1(key, data) { | 
 |  262   return binb2b64(core_hmac_sha1(key, data)); | 
 |  263 } | 
 |  264 function str_hmac_sha1(key, data) { | 
 |  265   return binb2str(core_hmac_sha1(key, data)); | 
 |  266 } | 
 |  267  | 
 |  268 /* | 
 |  269  * Perform a simple self-test to see if the VM is working | 
 |  270  */ | 
 |  271 function sha1_vm_test() { | 
 |  272   return hex_sha1('abc') == 'a9993e364706816aba3e25717850c26c9cd0d89d'; | 
 |  273 } | 
 |  274  | 
 |  275 /* | 
 |  276  * Calculate the SHA-1 of an array of big-endian words, and a bit length | 
 |  277  */ | 
 |  278 function core_sha1(x, len) { | 
 |  279   /* append padding */ | 
 |  280   x[len >> 5] |= 0x80 << (24 - len % 32); | 
 |  281   x[((len + 64 >> 9) << 4) + 15] = len; | 
 |  282  | 
 |  283   var w = Array(80); | 
 |  284   var a = 1732584193; | 
 |  285   var b = -271733879; | 
 |  286   var c = -1732584194; | 
 |  287   var d = 271733878; | 
 |  288   var e = -1009589776; | 
 |  289  | 
 |  290   for (var i = 0; i < x.length; i += 16) { | 
 |  291     var olda = a; | 
 |  292     var oldb = b; | 
 |  293     var oldc = c; | 
 |  294     var oldd = d; | 
 |  295     var olde = e; | 
 |  296  | 
 |  297     for (var j = 0; j < 80; j++) { | 
 |  298       if (j < 16) | 
 |  299         w[j] = x[i + j]; | 
 |  300       else | 
 |  301         w[j] = rol(w[j - 3] ^ w[j - 8] ^ w[j - 14] ^ w[j - 16], 1); | 
 |  302       var t = safe_add(safe_add(rol(a, 5), sha1_ft(j, b, c, d)), safe_add(safe_a
     dd(e, w[j]), sha1_kt(j))); | 
 |  303       e = d; | 
 |  304       d = c; | 
 |  305       c = rol(b, 30); | 
 |  306       b = a; | 
 |  307       a = t; | 
 |  308     } | 
 |  309  | 
 |  310     a = safe_add(a, olda); | 
 |  311     b = safe_add(b, oldb); | 
 |  312     c = safe_add(c, oldc); | 
 |  313     d = safe_add(d, oldd); | 
 |  314     e = safe_add(e, olde); | 
 |  315   } | 
 |  316   return Array(a, b, c, d, e); | 
 |  317 } | 
 |  318  | 
 |  319 /* | 
 |  320  * Perform the appropriate triplet combination function for the current | 
 |  321  * iteration | 
 |  322  */ | 
 |  323 function sha1_ft(t, b, c, d) { | 
 |  324   if (t < 20) | 
 |  325     return (b & c) | ((~b) & d); | 
 |  326   if (t < 40) | 
 |  327     return b ^ c ^ d; | 
 |  328   if (t < 60) | 
 |  329     return (b & c) | (b & d) | (c & d); | 
 |  330   return b ^ c ^ d; | 
 |  331 } | 
 |  332  | 
 |  333 /* | 
 |  334  * Determine the appropriate additive constant for the current iteration | 
 |  335  */ | 
 |  336 function sha1_kt(t) { | 
 |  337   return (t < 20) ? 1518500249 : (t < 40) ? 1859775393 : (t < 60) ? -1894007588 
     : -899497514; | 
 |  338 } | 
 |  339  | 
 |  340 /* | 
 |  341  * Calculate the HMAC-SHA1 of a key and some data | 
 |  342  */ | 
 |  343 function core_hmac_sha1(key, data) { | 
 |  344   var bkey = str2binb(key); | 
 |  345   if (bkey.length > 16) | 
 |  346     bkey = core_sha1(bkey, key.length * chrsz); | 
 |  347  | 
 |  348   var ipad = Array(16), opad = Array(16); | 
 |  349   for (var i = 0; i < 16; i++) { | 
 |  350     ipad[i] = bkey[i] ^ 0x36363636; | 
 |  351     opad[i] = bkey[i] ^ 0x5C5C5C5C; | 
 |  352   } | 
 |  353  | 
 |  354   var hash = core_sha1(ipad.concat(str2binb(data)), 512 + data.length * chrsz); | 
 |  355   return core_sha1(opad.concat(hash), 512 + 160); | 
 |  356 } | 
 |  357  | 
 |  358 /* | 
 |  359  * Add integers, wrapping at 2^32. This uses 16-bit operations internally | 
 |  360  * to work around bugs in some JS interpreters. | 
 |  361  */ | 
 |  362 function safe_add(x, y) { | 
 |  363   var lsw = (x & 0xFFFF) + (y & 0xFFFF); | 
 |  364   var msw = (x >> 16) + (y >> 16) + (lsw >> 16); | 
 |  365   return (msw << 16) | (lsw & 0xFFFF); | 
 |  366 } | 
 |  367  | 
 |  368 /* | 
 |  369  * Bitwise rotate a 32-bit number to the left. | 
 |  370  */ | 
 |  371 function rol(num, cnt) { | 
 |  372   return (num << cnt) | (num >>> (32 - cnt)); | 
 |  373 } | 
 |  374  | 
 |  375 /* | 
 |  376  * Convert an 8-bit or 16-bit string to an array of big-endian words | 
 |  377  * In 8-bit function, characters >255 have their hi-byte silently ignored. | 
 |  378  */ | 
 |  379 function str2binb(str) { | 
 |  380   var bin = Array(); | 
 |  381   var mask = (1 << chrsz) - 1; | 
 |  382   for (var i = 0; i < str.length * chrsz; i += chrsz) | 
 |  383     bin[i >> 5] |= (str.charCodeAt(i / chrsz) & mask) << (32 - chrsz - i % 32); | 
 |  384   return bin; | 
 |  385 } | 
 |  386  | 
 |  387 /* | 
 |  388  * Convert an array of big-endian words to a string | 
 |  389  */ | 
 |  390 function binb2str(bin) { | 
 |  391   var str = ''; | 
 |  392   var mask = (1 << chrsz) - 1; | 
 |  393   for (var i = 0; i < bin.length * 32; i += chrsz) | 
 |  394     str += String.fromCharCode((bin[i >> 5] >>> (32 - chrsz - i % 32)) & mask); | 
 |  395   return str; | 
 |  396 } | 
 |  397  | 
 |  398 /* | 
 |  399  * Convert an array of big-endian words to a hex string. | 
 |  400  */ | 
 |  401 function binb2hex(binarray) { | 
 |  402   var hex_tab = hexcase ? '0123456789ABCDEF' : '0123456789abcdef'; | 
 |  403   var str = ''; | 
 |  404   for (var i = 0; i < binarray.length * 4; i++) { | 
 |  405     str += hex_tab.charAt((binarray[i >> 2] >> ((3 - i % 4) * 8 + 4)) & 0xF) + | 
 |  406         hex_tab.charAt((binarray[i >> 2] >> ((3 - i % 4) * 8)) & 0xF); | 
 |  407   } | 
 |  408   return str; | 
 |  409 } | 
 |  410  | 
 |  411 /* | 
 |  412  * Convert an array of big-endian words to a base-64 string | 
 |  413  */ | 
 |  414 function binb2b64(binarray) { | 
 |  415   var tab = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'; | 
 |  416   var str = ''; | 
 |  417   for (var i = 0; i < binarray.length * 4; i += 3) { | 
 |  418     var triplet = (((binarray[i >> 2] >> 8 * (3 - i % 4)) & 0xFF) << 16) | | 
 |  419         (((binarray[i + 1 >> 2] >> 8 * (3 - (i + 1) % 4)) & 0xFF) << 8) | | 
 |  420         ((binarray[i + 2 >> 2] >> 8 * (3 - (i + 2) % 4)) & 0xFF); | 
 |  421     for (var j = 0; j < 4; j++) { | 
 |  422       if (i * 8 + j * 6 > binarray.length * 32) | 
 |  423         str += b64pad; | 
 |  424       else | 
 |  425         str += tab.charAt((triplet >> 6 * (3 - j)) & 0x3F); | 
 |  426     } | 
 |  427   } | 
 |  428   return str; | 
 |  429 } | 
| OLD | NEW |