OLD | NEW |
| (Empty) |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 // This utility program exists to process the False Start blacklist file into | |
6 // a static hash table so that it can be efficiently queried by Chrome. | |
7 | |
8 #include <algorithm> | |
9 #include <cstdio> | |
10 #include <set> | |
11 #include <sstream> | |
12 #include <string> | |
13 #include <vector> | |
14 | |
15 #include "base/basictypes.h" | |
16 #include "base/file_util.h" | |
17 #include "base/string_util.h" | |
18 #include "net/base/ssl_false_start_blacklist.h" | |
19 | |
20 typedef std::vector<std::string> Hosts; | |
21 | |
22 // Parses |input| as a blacklist data file, and returns the set of hosts it | |
23 // contains. | |
24 Hosts ParseHosts(const std::string& input) { | |
25 Hosts hosts; | |
26 size_t line_start = 0; | |
27 bool is_comment = false; | |
28 bool non_whitespace_seen = false; | |
29 for (size_t i = 0; i <= input.size(); ++i) { | |
30 if (i == input.size() || input[i] == '\n') { | |
31 if (!is_comment && non_whitespace_seen) { | |
32 size_t len = i - line_start; | |
33 if (i > 0 && input[i - 1] == '\r') | |
34 len--; | |
35 hosts.push_back(input.substr(line_start, len)); | |
36 } | |
37 is_comment = false; | |
38 non_whitespace_seen = false; | |
39 line_start = i + 1; | |
40 } else if (input[i] != ' ' && input[i] != '\t' && input[i] != '\r') { | |
41 non_whitespace_seen = true; | |
42 if (i == line_start && input[i] == '#') | |
43 is_comment = true; | |
44 } | |
45 } | |
46 VLOG(1) << "Have " << hosts.size() << " hosts after parse"; | |
47 return hosts; | |
48 } | |
49 | |
50 // Returns |host| with any initial "www." and trailing dots removed. Partly | |
51 // based on net::StripWWW(). | |
52 std::string StripWWWAndTrailingDots(const std::string& host) { | |
53 const std::string www("www."); | |
54 const size_t start = StartsWithASCII(host, www, true) ? www.length() : 0; | |
55 const size_t end = host.find_last_not_of('.'); | |
56 return (end == std::string::npos) ? | |
57 std::string() : host.substr(start, end - start + 1); | |
58 } | |
59 | |
60 // Removes all duplicates from |hosts|. | |
61 static void RemoveDuplicateEntries(std::vector<std::string>* hosts) { | |
62 std::sort(hosts->begin(), hosts->end()); | |
63 hosts->erase(std::unique(hosts->begin(), hosts->end()), hosts->end()); | |
64 VLOG(1) << "Have " << hosts->size() << " hosts after removing duplicates"; | |
65 } | |
66 | |
67 // Returns the parent domain for |host|, or the empty string if the name is a | |
68 // top-level domain. | |
69 static std::string ParentDomain(const std::string& host) { | |
70 const size_t first_dot = host.find('.'); | |
71 return (first_dot == std::string::npos) ? | |
72 std::string() : host.substr(first_dot + 1); | |
73 } | |
74 | |
75 // Predicate which returns true when a hostname has a parent domain in the set | |
76 // of hosts provided at construction time. | |
77 class ParentInSet : public std::unary_function<std::string, bool> { | |
78 public: | |
79 explicit ParentInSet(const std::set<std::string>& hosts) : hosts_(hosts) {} | |
80 | |
81 bool operator()(const std::string& host) const { | |
82 for (std::string parent(ParentDomain(host)); !parent.empty(); | |
83 parent = ParentDomain(parent)) { | |
84 if (hosts_.count(parent)) { | |
85 VLOG(1) << "Removing " << host << " as redundant"; | |
86 return true; | |
87 } | |
88 } | |
89 return false; | |
90 } | |
91 | |
92 private: | |
93 const std::set<std::string>& hosts_; | |
94 }; | |
95 | |
96 // Removes any hosts which are subdomains of other hosts. E.g. | |
97 // "foo.example.com" would be removed if "example.com" were also included. | |
98 static void RemoveRedundantEntries(Hosts* hosts) { | |
99 std::set<std::string> hosts_set; | |
100 for (Hosts::const_iterator i(hosts->begin()); i != hosts->end(); ++i) | |
101 hosts_set.insert(*i); | |
102 hosts->erase(std::remove_if(hosts->begin(), hosts->end(), | |
103 ParentInSet(hosts_set)), hosts->end()); | |
104 VLOG(1) << "Have " << hosts->size() << " hosts after removing redundants"; | |
105 } | |
106 | |
107 // Returns true iff all |hosts| are less than 256 bytes long (not including the | |
108 // terminating NUL) and contain two or more dot-separated components. | |
109 static bool CheckLengths(const Hosts& hosts) { | |
110 for (Hosts::const_iterator i(hosts.begin()); i != hosts.end(); ++i) { | |
111 if (i->size() >= 256) { | |
112 fprintf(stderr, "Entry '%s' is too large\n", i->c_str()); | |
113 return false; | |
114 } | |
115 if (net::SSLFalseStartBlacklist::LastTwoComponents(*i).empty()) { | |
116 fprintf(stderr, "Entry '%s' contains too few labels\n", i->c_str()); | |
117 return false; | |
118 } | |
119 } | |
120 | |
121 return true; | |
122 } | |
123 | |
124 // Returns the contents of the output file to be written. | |
125 std::string GenerateOutput(const Hosts& hosts) { | |
126 // Hash each host into its appropriate bucket. | |
127 VLOG(1) << "Using " << net::SSLFalseStartBlacklist::kBuckets | |
128 << " entry hash table"; | |
129 Hosts buckets[net::SSLFalseStartBlacklist::kBuckets]; | |
130 for (Hosts::const_iterator i(hosts.begin()); i != hosts.end(); ++i) { | |
131 const uint32 hash = net::SSLFalseStartBlacklist::Hash( | |
132 net::SSLFalseStartBlacklist::LastTwoComponents(*i)); | |
133 buckets[hash & (net::SSLFalseStartBlacklist::kBuckets - 1)].push_back(*i); | |
134 } | |
135 | |
136 // Write header. | |
137 std::ostringstream output; | |
138 output << "// Copyright (c) 2011 The Chromium Authors. All rights reserved.\n" | |
139 "// Use of this source code is governed by a BSD-style license that" | |
140 " can be\n// found in the LICENSE file.\n\n// WARNING: This code is" | |
141 " generated by ssl_false_start_blacklist_process.cc.\n// Do not " | |
142 "edit.\n\n#include \"net/base/ssl_false_start_blacklist.h\"\n\n" | |
143 "namespace net {\n\nconst uint32 " | |
144 "SSLFalseStartBlacklist::kHashTable[" | |
145 << net::SSLFalseStartBlacklist::kBuckets << " + 1] = {\n 0,\n"; | |
146 | |
147 // Construct data table, writing out the size as each bucket is appended. | |
148 std::string table_data; | |
149 size_t max_bucket_size = 0; | |
150 for (size_t i = 0; i < net::SSLFalseStartBlacklist::kBuckets; i++) { | |
151 max_bucket_size = std::max(max_bucket_size, buckets[i].size()); | |
152 for (Hosts::const_iterator j(buckets[i].begin()); j != buckets[i].end(); | |
153 ++j) { | |
154 table_data.push_back(static_cast<char>(j->size())); | |
155 table_data.append(*j); | |
156 } | |
157 output << " " << table_data.size() << ",\n"; | |
158 } | |
159 output << "};\n\n"; | |
160 VLOG(1) << "Largest bucket has " << max_bucket_size << " entries"; | |
161 | |
162 // Write data table, breaking lines after 72+ (2 indent, 70+ data) characters. | |
163 output << "const char SSLFalseStartBlacklist::kHashData[] = {\n"; | |
164 for (size_t i = 0, line_length = 0; i < table_data.size(); i++) { | |
165 if (line_length == 0) | |
166 output << " "; | |
167 std::ostringstream::pos_type current_length = output.tellp(); | |
168 output << static_cast<int>(table_data[i]) << ", "; | |
169 line_length += output.tellp() - current_length; | |
170 if (i == table_data.size() - 1) { | |
171 output << "\n};\n"; | |
172 } else if (line_length >= 70) { | |
173 output << "\n"; | |
174 line_length = 0; | |
175 } | |
176 } | |
177 output << "\n} // namespace net\n"; | |
178 return output.str(); | |
179 } | |
180 | |
181 #if defined(OS_WIN) | |
182 int wmain(int argc, wchar_t* argv[], wchar_t* envp[]) { | |
183 #elif defined(OS_POSIX) | |
184 int main(int argc, char* argv[], char* envp[]) { | |
185 #endif | |
186 if (argc != 3) { | |
187 fprintf(stderr, "Usage: %s <blacklist file> <output .c file>\n", argv[0]); | |
188 return 1; | |
189 } | |
190 | |
191 // Read input file. | |
192 std::string input; | |
193 if (!file_util::ReadFileToString(FilePath(argv[1]), &input)) { | |
194 fprintf(stderr, "Failed to read input file '%s'\n", argv[1]); | |
195 return 2; | |
196 } | |
197 Hosts hosts(ParseHosts(input)); | |
198 | |
199 // Sanitize |hosts|. | |
200 std::transform(hosts.begin(), hosts.end(), hosts.begin(), | |
201 StripWWWAndTrailingDots); | |
202 RemoveDuplicateEntries(&hosts); | |
203 RemoveRedundantEntries(&hosts); | |
204 if (!CheckLengths(hosts)) | |
205 return 3; | |
206 | |
207 // Write output file. | |
208 const std::string output_str(GenerateOutput(hosts)); | |
209 if (file_util::WriteFile(FilePath(argv[2]), output_str.data(), | |
210 output_str.size()) == static_cast<int>(output_str.size())) | |
211 return 0; | |
212 fprintf(stderr, "Failed to write output file '%s'\n", argv[2]); | |
213 return 4; | |
214 } | |
OLD | NEW |