OLD | NEW |
(Empty) | |
| 1 ''' |
| 2 Created on 20/01/2011 |
| 3 |
| 4 v0.1 (C) Gerald Storer |
| 5 MIT License |
| 6 |
| 7 Based on JSON.minify.js: |
| 8 https://github.com/getify/JSON.minify |
| 9 ''' |
| 10 |
| 11 import re |
| 12 |
| 13 def json_minify(json,strip_space=True): |
| 14 tokenizer=re.compile('"|(/\*)|(\*/)|(//)|\n|\r') |
| 15 in_string = False |
| 16 in_multiline_comment = False |
| 17 in_singleline_comment = False |
| 18 |
| 19 new_str = [] |
| 20 from_index = 0 # from is a keyword in Python |
| 21 |
| 22 for match in re.finditer(tokenizer,json): |
| 23 |
| 24 if not in_multiline_comment and not in_singleline_comment: |
| 25 tmp2 = json[from_index:match.start()] |
| 26 if not in_string and strip_space: |
| 27 tmp2 = re.sub('[ \t\n\r]*','',tmp2) # replace only white space d
efined in standard |
| 28 new_str.append(tmp2) |
| 29 |
| 30 from_index = match.end() |
| 31 |
| 32 if match.group() == '"' and not in_multiline_comment and not in_singleli
ne_comment: |
| 33 escaped = re.search('(\\\\)*$',json[:match.start()]) |
| 34 if not in_string or escaped is None or len(escaped.group()) % 2 == 0
: |
| 35 # start of string with ", or unescaped " character found to end
string |
| 36 in_string = not in_string |
| 37 from_index -= 1 # include " character in next catch |
| 38 |
| 39 elif match.group() == '/*' and not in_string and not in_multiline_commen
t and not in_singleline_comment: |
| 40 in_multiline_comment = True |
| 41 elif match.group() == '*/' and not in_string and in_multiline_comment an
d not in_singleline_comment: |
| 42 in_multiline_comment = False |
| 43 elif match.group() == '//' and not in_string and not in_multiline_commen
t and not in_singleline_comment: |
| 44 in_singleline_comment = True |
| 45 elif (match.group() == '\n' or match.group() == '\r') and not in_string
and not in_multiline_comment and in_singleline_comment: |
| 46 in_singleline_comment = False |
| 47 elif not in_multiline_comment and not in_singleline_comment and ( |
| 48 match.group() not in ['\n','\r',' ','\t'] or not strip_space): |
| 49 new_str.append(match.group()) |
| 50 |
| 51 new_str.append(json[from_index:]) |
| 52 return ''.join(new_str) |
| 53 |
| 54 if __name__ == '__main__': |
| 55 import json # requires Python 2.6+ to run tests |
| 56 |
| 57 def test_json(s): |
| 58 return json.loads(json_minify(s)) |
| 59 |
| 60 test1 = '''// this is a JSON file with comments |
| 61 { |
| 62 "foo": "bar", // this is cool |
| 63 "bar": [ |
| 64 "baz", "bum", "zam" |
| 65 ], |
| 66 /* the rest of this document is just fluff |
| 67 in case you are interested. */ |
| 68 "something": 10, |
| 69 "else": 20 |
| 70 } |
| 71 |
| 72 /* NOTE: You can easily strip the whitespace and comments |
| 73 from such a file with the JSON.minify() project hosted |
| 74 here on github at http://github.com/getify/JSON.minify |
| 75 */ |
| 76 ''' |
| 77 |
| 78 test1_res = '''{"foo":"bar","bar":["baz","bum","zam"],"something":10,"else":
20}''' |
| 79 |
| 80 test2 = ''' |
| 81 {"/*":"*/","//":"",/*"//"*/"/*/":// |
| 82 "//"} |
| 83 |
| 84 ''' |
| 85 test2_res = '''{"/*":"*/","//":"","/*/":"//"}''' |
| 86 |
| 87 test3 = r'''/* |
| 88 this is a |
| 89 multi line comment */{ |
| 90 |
| 91 "foo" |
| 92 : |
| 93 "bar/*"// something |
| 94 , "b\"az":/* |
| 95 something else */"blah" |
| 96 |
| 97 } |
| 98 ''' |
| 99 test3_res = r'''{"foo":"bar/*","b\"az":"blah"}''' |
| 100 |
| 101 test4 = r'''{"foo": "ba\"r//", "bar\\": "b\\\"a/*z", |
| 102 "baz\\\\": /* yay */ "fo\\\\\"*/o" |
| 103 } |
| 104 ''' |
| 105 test4_res = r'''{"foo":"ba\"r//","bar\\":"b\\\"a/*z","baz\\\\":"fo\\\\\"*/o"
}''' |
| 106 |
| 107 assert test_json(test1) == json.loads(test1_res),'Failed test 1' |
| 108 assert test_json(test2) == json.loads(test2_res),'Failed test 2' |
| 109 assert test_json(test3) == json.loads(test3_res),'Failed test 3' |
| 110 assert test_json(test4) == json.loads(test4_res),'Failed test 4' |
| 111 if __debug__: # Don't print passed message if the asserts didn't run |
| 112 print 'Passed all tests' |
OLD | NEW |