Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(180)

Side by Side Diff: third_party/logilab/common/dbf.py

Issue 10447014: Add pylint to depot_tools. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools
Patch Set: Fix unittests. Created 8 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « third_party/logilab/common/date.py ('k') | third_party/logilab/common/debugger.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # -*- coding: utf-8 -*-
2 # copyright 2003-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
3 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
4 #
5 # This file is part of logilab-common.
6 #
7 # logilab-common is free software: you can redistribute it and/or modify it unde r
8 # the terms of the GNU Lesser General Public License as published by the Free
9 # Software Foundation, either version 2.1 of the License, or (at your option) an y
10 # later version.
11 #
12 # logilab-common is distributed in the hope that it will be useful, but WITHOUT
13 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
14 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
15 # details.
16 #
17 # You should have received a copy of the GNU Lesser General Public License along
18 # with logilab-common. If not, see <http://www.gnu.org/licenses/>.
19 """This is a DBF reader which reads Visual Fox Pro DBF format with Memo field
20
21 Usage:
22
23 >>> rec = readDbf('test.dbf')
24 >>> for line in rec:
25 >>> print line['name']
26
27
28 :date: 13/07/2007
29
30 http://www.physics.ox.ac.uk/users/santoso/Software.Repository.html
31 page says code is "available as is without any warranty or support".
32 """
33
34 import struct
35 import os, os.path
36 import sys
37 import csv
38 import tempfile
39 import ConfigParser
40
41 class Dbase:
42 def __init__(self):
43 self.fdb = None
44 self.fmemo = None
45 self.db_data = None
46 self.memo_data = None
47 self.fields = None
48 self.num_records = 0
49 self.header = None
50 self.memo_file = ''
51 self.memo_header = None
52 self.memo_block_size = 0
53 self.memo_header_len = 0
54
55 def _drop_after_NULL(self, txt):
56 for i in range(0, len(txt)):
57 if ord(struct.unpack('c', txt[i])[0])==0:
58 return txt[:i]
59 return txt
60
61 def _reverse_endian(self, num):
62 if not len(num):
63 return 0
64 val = struct.unpack('<L', num)
65 val = struct.pack('>L', val[0])
66 val = struct.unpack('>L', val)
67 return val[0]
68
69 def _assign_ids(self, lst, ids):
70 result = {}
71 idx = 0
72 for item in lst:
73 id = ids[idx]
74 result[id] = item
75 idx += 1
76 return result
77
78 def open(self, db_name):
79 filesize = os.path.getsize(db_name)
80 if filesize <= 68:
81 raise IOError, 'The file is not large enough to be a dbf file'
82
83 self.fdb = open(db_name, 'rb')
84
85 self.memo_file = ''
86 if os.path.isfile(db_name[0:-1] + 't'):
87 self.memo_file = db_name[0:-1] + 't'
88 elif os.path.isfile(db_name[0:-3] + 'fpt'):
89 self.memo_file = db_name[0:-3] + 'fpt'
90
91 if self.memo_file:
92 #Read memo file
93 self.fmemo = open(self.memo_file, 'rb')
94 self.memo_data = self.fmemo.read()
95 self.memo_header = self._assign_ids(struct.unpack('>6x1H', self.memo _data[:8]), ['Block size'])
96 block_size = self.memo_header['Block size']
97 if not block_size:
98 block_size = 512
99 self.memo_block_size = block_size
100 self.memo_header_len = block_size
101 memo_size = os.path.getsize(self.memo_file)
102
103 #Start reading data file
104 data = self.fdb.read(32)
105 self.header = self._assign_ids(struct.unpack('<B 3B L 2H 20x', data), [' id', 'Year', 'Month', 'Day', '# of Records', 'Header Size', 'Record Size'])
106 self.header['id'] = hex(self.header['id'])
107
108 self.num_records = self.header['# of Records']
109 data = self.fdb.read(self.header['Header Size']-34)
110 self.fields = {}
111 x = 0
112 header_pattern = '<11s c 4x B B 14x'
113 ids = ['Field Name', 'Field Type', 'Field Length', 'Field Precision']
114 pattern_len = 32
115 for offset in range(0, len(data), 32):
116 if ord(data[offset])==0x0d:
117 break
118 x += 1
119 data_subset = data[offset: offset+pattern_len]
120 if len(data_subset) < pattern_len:
121 data_subset += ' '*(pattern_len-len(data_subset))
122 self.fields[x] = self._assign_ids(struct.unpack(header_pattern, data _subset), ids)
123 self.fields[x]['Field Name'] = self._drop_after_NULL(self.fields[x][ 'Field Name'])
124
125 self.fdb.read(3)
126 if self.header['# of Records']:
127 data_size = (self.header['# of Records'] * self.header['Record Size' ]) - 1
128 self.db_data = self.fdb.read(data_size)
129 else:
130 self.db_data = ''
131 self.row_format = '<'
132 self.row_ids = []
133 self.row_len = 0
134 for key in self.fields:
135 field = self.fields[key]
136 self.row_format += '%ds ' % (field['Field Length'])
137 self.row_ids.append(field['Field Name'])
138 self.row_len += field['Field Length']
139
140 def close(self):
141 if self.fdb:
142 self.fdb.close()
143 if self.fmemo:
144 self.fmemo.close()
145
146 def get_numrecords(self):
147 return self.num_records
148
149 def get_record_with_names(self, rec_no):
150 """
151 This function accept record number from 0 to N-1
152 """
153 if rec_no < 0 or rec_no > self.num_records:
154 raise Exception, 'Unable to extract data outside the range'
155
156 offset = self.header['Record Size'] * rec_no
157 data = self.db_data[offset:offset+self.row_len]
158 record = self._assign_ids(struct.unpack(self.row_format, data), self.row _ids)
159
160 if self.memo_file:
161 for key in self.fields:
162 field = self.fields[key]
163 f_type = field['Field Type']
164 f_name = field['Field Name']
165 c_data = record[f_name]
166
167 if f_type=='M' or f_type=='G' or f_type=='B' or f_type=='P':
168 c_data = self._reverse_endian(c_data)
169 if c_data:
170 record[f_name] = self.read_memo(c_data-1).strip()
171 else:
172 record[f_name] = c_data.strip()
173 return record
174
175 def read_memo_record(self, num, in_length):
176 """
177 Read the record of given number. The second parameter is the length of
178 the record to read. It can be undefined, meaning read the whole record,
179 and it can be negative, meaning at most the length
180 """
181 if in_length < 0:
182 in_length = -self.memo_block_size
183
184 offset = self.memo_header_len + num * self.memo_block_size
185 self.fmemo.seek(offset)
186 if in_length<0:
187 in_length = -in_length
188 if in_length==0:
189 return ''
190 return self.fmemo.read(in_length)
191
192 def read_memo(self, num):
193 result = ''
194 buffer = self.read_memo_record(num, -1)
195 if len(buffer)<=0:
196 return ''
197 length = struct.unpack('>L', buffer[4:4+4])[0] + 8
198
199 block_size = self.memo_block_size
200 if length < block_size:
201 return buffer[8:length]
202 rest_length = length - block_size
203 rest_data = self.read_memo_record(num+1, rest_length)
204 if len(rest_data)<=0:
205 return ''
206 return buffer[8:] + rest_data
207
208 def readDbf(filename):
209 """
210 Read the DBF file specified by the filename and
211 return the records as a list of dictionary.
212
213 :param: filename File name of the DBF
214 :return: List of rows
215 """
216 db = Dbase()
217 db.open(filename)
218 num = db.get_numrecords()
219 rec = []
220 for i in range(0, num):
221 record = db.get_record_with_names(i)
222 rec.append(record)
223 db.close()
224 return rec
225
226 if __name__=='__main__':
227 rec = readDbf('dbf/sptable.dbf')
228 for line in rec:
229 print '%s %s' % (line['GENUS'].strip(), line['SPECIES'].strip())
OLDNEW
« no previous file with comments | « third_party/logilab/common/date.py ('k') | third_party/logilab/common/debugger.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698