OLD | NEW |
(Empty) | |
| 1 # -*- coding: utf-8 -*- |
| 2 # copyright 2003-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
| 3 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr |
| 4 # |
| 5 # This file is part of logilab-common. |
| 6 # |
| 7 # logilab-common is free software: you can redistribute it and/or modify it unde
r |
| 8 # the terms of the GNU Lesser General Public License as published by the Free |
| 9 # Software Foundation, either version 2.1 of the License, or (at your option) an
y |
| 10 # later version. |
| 11 # |
| 12 # logilab-common is distributed in the hope that it will be useful, but WITHOUT |
| 13 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 14 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
| 15 # details. |
| 16 # |
| 17 # You should have received a copy of the GNU Lesser General Public License along |
| 18 # with logilab-common. If not, see <http://www.gnu.org/licenses/>. |
| 19 """This is a DBF reader which reads Visual Fox Pro DBF format with Memo field |
| 20 |
| 21 Usage: |
| 22 |
| 23 >>> rec = readDbf('test.dbf') |
| 24 >>> for line in rec: |
| 25 >>> print line['name'] |
| 26 |
| 27 |
| 28 :date: 13/07/2007 |
| 29 |
| 30 http://www.physics.ox.ac.uk/users/santoso/Software.Repository.html |
| 31 page says code is "available as is without any warranty or support". |
| 32 """ |
| 33 |
| 34 import struct |
| 35 import os, os.path |
| 36 import sys |
| 37 import csv |
| 38 import tempfile |
| 39 import ConfigParser |
| 40 |
| 41 class Dbase: |
| 42 def __init__(self): |
| 43 self.fdb = None |
| 44 self.fmemo = None |
| 45 self.db_data = None |
| 46 self.memo_data = None |
| 47 self.fields = None |
| 48 self.num_records = 0 |
| 49 self.header = None |
| 50 self.memo_file = '' |
| 51 self.memo_header = None |
| 52 self.memo_block_size = 0 |
| 53 self.memo_header_len = 0 |
| 54 |
| 55 def _drop_after_NULL(self, txt): |
| 56 for i in range(0, len(txt)): |
| 57 if ord(struct.unpack('c', txt[i])[0])==0: |
| 58 return txt[:i] |
| 59 return txt |
| 60 |
| 61 def _reverse_endian(self, num): |
| 62 if not len(num): |
| 63 return 0 |
| 64 val = struct.unpack('<L', num) |
| 65 val = struct.pack('>L', val[0]) |
| 66 val = struct.unpack('>L', val) |
| 67 return val[0] |
| 68 |
| 69 def _assign_ids(self, lst, ids): |
| 70 result = {} |
| 71 idx = 0 |
| 72 for item in lst: |
| 73 id = ids[idx] |
| 74 result[id] = item |
| 75 idx += 1 |
| 76 return result |
| 77 |
| 78 def open(self, db_name): |
| 79 filesize = os.path.getsize(db_name) |
| 80 if filesize <= 68: |
| 81 raise IOError, 'The file is not large enough to be a dbf file' |
| 82 |
| 83 self.fdb = open(db_name, 'rb') |
| 84 |
| 85 self.memo_file = '' |
| 86 if os.path.isfile(db_name[0:-1] + 't'): |
| 87 self.memo_file = db_name[0:-1] + 't' |
| 88 elif os.path.isfile(db_name[0:-3] + 'fpt'): |
| 89 self.memo_file = db_name[0:-3] + 'fpt' |
| 90 |
| 91 if self.memo_file: |
| 92 #Read memo file |
| 93 self.fmemo = open(self.memo_file, 'rb') |
| 94 self.memo_data = self.fmemo.read() |
| 95 self.memo_header = self._assign_ids(struct.unpack('>6x1H', self.memo
_data[:8]), ['Block size']) |
| 96 block_size = self.memo_header['Block size'] |
| 97 if not block_size: |
| 98 block_size = 512 |
| 99 self.memo_block_size = block_size |
| 100 self.memo_header_len = block_size |
| 101 memo_size = os.path.getsize(self.memo_file) |
| 102 |
| 103 #Start reading data file |
| 104 data = self.fdb.read(32) |
| 105 self.header = self._assign_ids(struct.unpack('<B 3B L 2H 20x', data), ['
id', 'Year', 'Month', 'Day', '# of Records', 'Header Size', 'Record Size']) |
| 106 self.header['id'] = hex(self.header['id']) |
| 107 |
| 108 self.num_records = self.header['# of Records'] |
| 109 data = self.fdb.read(self.header['Header Size']-34) |
| 110 self.fields = {} |
| 111 x = 0 |
| 112 header_pattern = '<11s c 4x B B 14x' |
| 113 ids = ['Field Name', 'Field Type', 'Field Length', 'Field Precision'] |
| 114 pattern_len = 32 |
| 115 for offset in range(0, len(data), 32): |
| 116 if ord(data[offset])==0x0d: |
| 117 break |
| 118 x += 1 |
| 119 data_subset = data[offset: offset+pattern_len] |
| 120 if len(data_subset) < pattern_len: |
| 121 data_subset += ' '*(pattern_len-len(data_subset)) |
| 122 self.fields[x] = self._assign_ids(struct.unpack(header_pattern, data
_subset), ids) |
| 123 self.fields[x]['Field Name'] = self._drop_after_NULL(self.fields[x][
'Field Name']) |
| 124 |
| 125 self.fdb.read(3) |
| 126 if self.header['# of Records']: |
| 127 data_size = (self.header['# of Records'] * self.header['Record Size'
]) - 1 |
| 128 self.db_data = self.fdb.read(data_size) |
| 129 else: |
| 130 self.db_data = '' |
| 131 self.row_format = '<' |
| 132 self.row_ids = [] |
| 133 self.row_len = 0 |
| 134 for key in self.fields: |
| 135 field = self.fields[key] |
| 136 self.row_format += '%ds ' % (field['Field Length']) |
| 137 self.row_ids.append(field['Field Name']) |
| 138 self.row_len += field['Field Length'] |
| 139 |
| 140 def close(self): |
| 141 if self.fdb: |
| 142 self.fdb.close() |
| 143 if self.fmemo: |
| 144 self.fmemo.close() |
| 145 |
| 146 def get_numrecords(self): |
| 147 return self.num_records |
| 148 |
| 149 def get_record_with_names(self, rec_no): |
| 150 """ |
| 151 This function accept record number from 0 to N-1 |
| 152 """ |
| 153 if rec_no < 0 or rec_no > self.num_records: |
| 154 raise Exception, 'Unable to extract data outside the range' |
| 155 |
| 156 offset = self.header['Record Size'] * rec_no |
| 157 data = self.db_data[offset:offset+self.row_len] |
| 158 record = self._assign_ids(struct.unpack(self.row_format, data), self.row
_ids) |
| 159 |
| 160 if self.memo_file: |
| 161 for key in self.fields: |
| 162 field = self.fields[key] |
| 163 f_type = field['Field Type'] |
| 164 f_name = field['Field Name'] |
| 165 c_data = record[f_name] |
| 166 |
| 167 if f_type=='M' or f_type=='G' or f_type=='B' or f_type=='P': |
| 168 c_data = self._reverse_endian(c_data) |
| 169 if c_data: |
| 170 record[f_name] = self.read_memo(c_data-1).strip() |
| 171 else: |
| 172 record[f_name] = c_data.strip() |
| 173 return record |
| 174 |
| 175 def read_memo_record(self, num, in_length): |
| 176 """ |
| 177 Read the record of given number. The second parameter is the length of |
| 178 the record to read. It can be undefined, meaning read the whole record, |
| 179 and it can be negative, meaning at most the length |
| 180 """ |
| 181 if in_length < 0: |
| 182 in_length = -self.memo_block_size |
| 183 |
| 184 offset = self.memo_header_len + num * self.memo_block_size |
| 185 self.fmemo.seek(offset) |
| 186 if in_length<0: |
| 187 in_length = -in_length |
| 188 if in_length==0: |
| 189 return '' |
| 190 return self.fmemo.read(in_length) |
| 191 |
| 192 def read_memo(self, num): |
| 193 result = '' |
| 194 buffer = self.read_memo_record(num, -1) |
| 195 if len(buffer)<=0: |
| 196 return '' |
| 197 length = struct.unpack('>L', buffer[4:4+4])[0] + 8 |
| 198 |
| 199 block_size = self.memo_block_size |
| 200 if length < block_size: |
| 201 return buffer[8:length] |
| 202 rest_length = length - block_size |
| 203 rest_data = self.read_memo_record(num+1, rest_length) |
| 204 if len(rest_data)<=0: |
| 205 return '' |
| 206 return buffer[8:] + rest_data |
| 207 |
| 208 def readDbf(filename): |
| 209 """ |
| 210 Read the DBF file specified by the filename and |
| 211 return the records as a list of dictionary. |
| 212 |
| 213 :param: filename File name of the DBF |
| 214 :return: List of rows |
| 215 """ |
| 216 db = Dbase() |
| 217 db.open(filename) |
| 218 num = db.get_numrecords() |
| 219 rec = [] |
| 220 for i in range(0, num): |
| 221 record = db.get_record_with_names(i) |
| 222 rec.append(record) |
| 223 db.close() |
| 224 return rec |
| 225 |
| 226 if __name__=='__main__': |
| 227 rec = readDbf('dbf/sptable.dbf') |
| 228 for line in rec: |
| 229 print '%s %s' % (line['GENUS'].strip(), line['SPECIES'].strip()) |
OLD | NEW |