Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(491)

Unified Diff: tests/tokenizer_test.dart

Issue 10916294: switch html5lib to new pkg layout (Closed) Base URL: https://github.com/dart-lang/html5lib.git@master
Patch Set: Created 8 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tests/support.dart ('k') | tokenizer.dart » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tests/tokenizer_test.dart
diff --git a/tests/tokenizer_test.dart b/tests/tokenizer_test.dart
deleted file mode 100644
index 61b6462e3cd23d66a8cdb7b2a341634a880d4119..0000000000000000000000000000000000000000
--- a/tests/tokenizer_test.dart
+++ /dev/null
@@ -1,304 +0,0 @@
-#library('tokenizer_test');
-
-// Note: mirrors used to match the getattr usage in the original test
-#import('dart:io');
-#import('dart:json');
-#import('dart:mirrors');
-#import('package:unittest/unittest.dart');
-#import('package:unittest/vm_config.dart');
-#import('../lib/char_encodings.dart');
-#import('../lib/constants.dart', prefix: 'constants');
-#import('../lib/token.dart');
-#import('../lib/utils.dart');
-#import('../tokenizer.dart');
-#import('support.dart');
-
-/**
- * This is like [JSON.parse], but it fixes unicode surrogate pairs in the JSON.
- *
- * Without this, the test "expects" incorrect results from the tokenizer.
- * Note: Python's json module decodes these correctly, so this might point at
- * a bug in Dart's [JSON.parse].
- */
-jsonParseUnicode(String input) => jsonFixSurrogatePairs(JSON.parse(input));
-
-// TODO(jmesserly): this should probably be handled by dart:json
-jsonFixSurrogatePairs(jsonObject) {
- fixSurrogate(object) {
- if (object is String) {
- return decodeUtf16Surrogates(object);
- } else if (object is List) {
- List a = object;
- for (int i = 0; i < a.length; i++) {
- a[i] = fixSurrogate(a[i]);
- }
- } else if (object is Map) {
- Map<String, Object> m = object;
- m.forEach((key, value) {
- var fixedKey = fixSurrogate(key);
- var fixedValue = fixSurrogate(value);
- if (fixedKey !== key) {
- m.remove(key);
- m[fixedKey] = fixedValue;
- } else if (fixedValue !== value) {
- m[fixedKey] = fixedValue;
- }
- });
- }
- return object;
- }
- return fixSurrogate(jsonObject);
-}
-
-
-class TokenizerTestParser {
- String _state;
- var _lastStartTag;
- List outputTokens;
-
- TokenizerTestParser(String initialState, [lastStartTag])
- : _state = initialState,
- _lastStartTag = lastStartTag;
-
- List parse(stream, [encoding, innerHTML = false]) {
- var tokenizer = new HTMLTokenizer(stream, encoding);
- outputTokens = [];
-
- // Note: we can't get a closure of the state method. However, we can
- // create a new closure to invoke it via mirrors.
- var mtok = reflect(tokenizer);
- tokenizer.state = () => mtok.invoke(_state, const []).value.reflectee;
-
- if (_lastStartTag != null) {
- tokenizer.currentToken = new StartTagToken(_lastStartTag);
- }
-
- while (tokenizer.hasNext()) {
- var token = tokenizer.next();
- switch (token.kind) {
- case TokenKind.characters:
- processCharacters(token);
- break;
- case TokenKind.spaceCharacters:
- processSpaceCharacters(token);
- break;
- case TokenKind.startTag:
- processStartTag(token);
- break;
- case TokenKind.endTag:
- processEndTag(token);
- break;
- case TokenKind.comment:
- processComment(token);
- break;
- case TokenKind.doctype:
- processDoctype(token);
- break;
- case TokenKind.parseError:
- processParseError(token);
- break;
- }
- }
-
- return outputTokens;
- }
-
- void processDoctype(DoctypeToken token) {
- outputTokens.add(["DOCTYPE", token.name, token.publicId,
- token.systemId, token.correct]);
- }
-
- void processStartTag(StartTagToken token) {
- outputTokens.add(["StartTag", token.name,
- makeDict(token.data), token.selfClosing]);
- }
-
- void processEndTag(EndTagToken token) {
- outputTokens.add(["EndTag", token.name, token.selfClosing]);
- }
-
- void processComment(Token token) {
- outputTokens.add(["Comment", token.data]);
- }
-
- void processSpaceCharacters(Token token) {
- processCharacters(token);
- }
-
- void processCharacters(Token token) {
- outputTokens.add(["Character", token.data]);
- }
-
- void processEOF(token) {
- }
-
- void processParseError(Token token) {
- // TODO(jmesserly): when debugging test failures it can be useful to add
- // logging here like `print('ParseError $token');`. It would be nice to
- // use the actual logging library.
- outputTokens.add(["ParseError", token.data]);
- }
-}
-
-List concatenateCharacterTokens(List tokens) {
- var outputTokens = <List>[];
- for (var token in tokens) {
- if (token.indexOf("ParseError") == -1 && token[0] == "Character") {
- if (outputTokens.length > 0 &&
- outputTokens.last().indexOf("ParseError") == -1 &&
- outputTokens.last()[0] == "Character") {
-
- outputTokens.last()[1] = '${outputTokens.last()[1]}${token[1]}';
- } else {
- outputTokens.add(token);
- }
- } else {
- outputTokens.add(token);
- }
- }
- return outputTokens;
-}
-
-List normalizeTokens(List tokens) {
- // TODO: convert tests to reflect arrays
- for (int i = 0; i < tokens.length; i++) {
- var token = tokens[i];
- if (token[0] == 'ParseError') {
- tokens[i] = token[0];
- }
- }
- return tokens;
-}
-
-
-/**
- * Test whether the test has passed or failed
- *
- * If the ignoreErrorOrder flag is set to true we don't test the relative
- * positions of parse errors and non parse errors.
- */
-void expectTokensMatch(List expectedTokens, List receivedTokens,
- bool ignoreErrorOrder, [bool ignoreErrors = false, String message]) {
-
- var checkSelfClosing = false;
- for (var token in expectedTokens) {
- if (token[0] == "StartTag" && token.length == 4
- || token[0] == "EndTag" && token.length == 3) {
- checkSelfClosing = true;
- break;
- }
- }
-
- if (!checkSelfClosing) {
- for (var token in receivedTokens) {
- if (token[0] == "StartTag" || token[0] == "EndTag") {
- token.removeLast();
- }
- }
- }
-
- if (!ignoreErrorOrder && !ignoreErrors) {
- expect(receivedTokens, equals(expectedTokens), message);
- } else {
- // Sort the tokens into two groups; non-parse errors and parse errors
- var expectedParseErrors = expectedTokens.filter((t) => t == "ParseError");
- var expectedNonErrors = expectedTokens.filter((t) => t != "ParseError");
- var receivedParseErrors = receivedTokens.filter((t) => t == "ParseError");
- var receivedNonErrors = receivedTokens.filter((t) => t != "ParseError");
-
- expect(receivedNonErrors, equals(expectedNonErrors), message);
- if (!ignoreErrors) {
- expect(receivedParseErrors, equals(expectedParseErrors), message);
- }
- }
-}
-
-void runTokenizerTest(Map testInfo) {
- // XXX - move this out into the setup function
- // concatenate all consecutive character tokens into a single token
- if (testInfo.containsKey('doubleEscaped')) {
- testInfo = unescape(testInfo);
- }
-
- var expected = concatenateCharacterTokens(testInfo['output']);
- if (!testInfo.containsKey('lastStartTag')) {
- testInfo['lastStartTag'] = null;
- }
- var parser = new TokenizerTestParser(testInfo['initialState'],
- testInfo['lastStartTag']);
- var tokens = parser.parse(testInfo['input']);
- tokens = concatenateCharacterTokens(tokens);
- var received = normalizeTokens(tokens);
- var errorMsg = Strings.join(["\n\nInitial state:",
- testInfo['initialState'],
- "\nInput:", testInfo['input'],
- "\nExpected:", expected,
- "\nreceived:", tokens].map((s) => '$s'), '\n');
- var ignoreErrorOrder = testInfo['ignoreErrorOrder'];
- if (ignoreErrorOrder == null) ignoreErrorOrder = false;
-
- expectTokensMatch(expected, received, ignoreErrorOrder, true, errorMsg);
-}
-
-Map unescape(Map testInfo) {
- // Note: using JSON.parse to unescape the unicode characters in the string.
- decode(inp) => jsonParseUnicode('"${inp}"');
-
- testInfo["input"] = decode(testInfo["input"]);
- for (var token in testInfo["output"]) {
- if (token == "ParseError") {
- continue;
- } else {
- token[1] = decode(token[1]);
- if (token.length > 2) {
- for (var pair in token[2]) {
- var key = pair[0];
- var value = pair[1];
- token[2].remove(key);
- token[2][decode(key)] = decode(value);
- }
- }
- }
- }
- return testInfo;
-}
-
-
-String camelCase(String s) {
- s = s.toLowerCase();
- var result = new StringBuffer();
- for (var match in const RegExp(@"\W+(\w)(\w+)").allMatches(s)) {
- if (result.length == 0) result.add(s.substring(0, match.start()));
- result.add(match.group(1).toUpperCase());
- result.add(match.group(2));
- }
- return result.toString();
-}
-
-void main() {
- useVmConfiguration();
- getDataFiles('tokenizer', (p) => p.endsWith('.test')).then((files) {
- for (var path in files) {
-
- var text = new File(path).readAsTextSync();
- var tests = jsonParseUnicode(text);
- var testName = new Path.fromNative(path).filename.replaceAll(".test","");
- var testList = tests['tests'];
- if (testList == null) continue;
-
- group(testName, () {
- for (int index = 0; index < testList.length; index++) {
- final testInfo = testList[index];
-
- testInfo.putIfAbsent("initialStates", () => ["Data state"]);
- for (var initialState in testInfo["initialStates"]) {
- test(testInfo["description"], () {
- testInfo["initialState"] = camelCase(initialState);
- runTokenizerTest(testInfo);
- });
- }
- }
- });
- }
- });
-}
« no previous file with comments | « tests/support.dart ('k') | tokenizer.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698