| Index: third_party/logilab/common/dbf.py
|
| diff --git a/third_party/logilab/common/dbf.py b/third_party/logilab/common/dbf.py
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..8def2d2e98d2ef5e08734af8ff0a4df35991d3b8
|
| --- /dev/null
|
| +++ b/third_party/logilab/common/dbf.py
|
| @@ -0,0 +1,229 @@
|
| +# -*- coding: utf-8 -*-
|
| +# copyright 2003-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved.
|
| +# contact http://www.logilab.fr/ -- mailto:contact@logilab.fr
|
| +#
|
| +# This file is part of logilab-common.
|
| +#
|
| +# logilab-common is free software: you can redistribute it and/or modify it under
|
| +# the terms of the GNU Lesser General Public License as published by the Free
|
| +# Software Foundation, either version 2.1 of the License, or (at your option) any
|
| +# later version.
|
| +#
|
| +# logilab-common is distributed in the hope that it will be useful, but WITHOUT
|
| +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
| +# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
|
| +# details.
|
| +#
|
| +# You should have received a copy of the GNU Lesser General Public License along
|
| +# with logilab-common. If not, see <http://www.gnu.org/licenses/>.
|
| +"""This is a DBF reader which reads Visual Fox Pro DBF format with Memo field
|
| +
|
| +Usage:
|
| +
|
| +>>> rec = readDbf('test.dbf')
|
| +>>> for line in rec:
|
| +>>> print line['name']
|
| +
|
| +
|
| +:date: 13/07/2007
|
| +
|
| +http://www.physics.ox.ac.uk/users/santoso/Software.Repository.html
|
| +page says code is "available as is without any warranty or support".
|
| +"""
|
| +
|
| +import struct
|
| +import os, os.path
|
| +import sys
|
| +import csv
|
| +import tempfile
|
| +import ConfigParser
|
| +
|
| +class Dbase:
|
| + def __init__(self):
|
| + self.fdb = None
|
| + self.fmemo = None
|
| + self.db_data = None
|
| + self.memo_data = None
|
| + self.fields = None
|
| + self.num_records = 0
|
| + self.header = None
|
| + self.memo_file = ''
|
| + self.memo_header = None
|
| + self.memo_block_size = 0
|
| + self.memo_header_len = 0
|
| +
|
| + def _drop_after_NULL(self, txt):
|
| + for i in range(0, len(txt)):
|
| + if ord(struct.unpack('c', txt[i])[0])==0:
|
| + return txt[:i]
|
| + return txt
|
| +
|
| + def _reverse_endian(self, num):
|
| + if not len(num):
|
| + return 0
|
| + val = struct.unpack('<L', num)
|
| + val = struct.pack('>L', val[0])
|
| + val = struct.unpack('>L', val)
|
| + return val[0]
|
| +
|
| + def _assign_ids(self, lst, ids):
|
| + result = {}
|
| + idx = 0
|
| + for item in lst:
|
| + id = ids[idx]
|
| + result[id] = item
|
| + idx += 1
|
| + return result
|
| +
|
| + def open(self, db_name):
|
| + filesize = os.path.getsize(db_name)
|
| + if filesize <= 68:
|
| + raise IOError, 'The file is not large enough to be a dbf file'
|
| +
|
| + self.fdb = open(db_name, 'rb')
|
| +
|
| + self.memo_file = ''
|
| + if os.path.isfile(db_name[0:-1] + 't'):
|
| + self.memo_file = db_name[0:-1] + 't'
|
| + elif os.path.isfile(db_name[0:-3] + 'fpt'):
|
| + self.memo_file = db_name[0:-3] + 'fpt'
|
| +
|
| + if self.memo_file:
|
| + #Read memo file
|
| + self.fmemo = open(self.memo_file, 'rb')
|
| + self.memo_data = self.fmemo.read()
|
| + self.memo_header = self._assign_ids(struct.unpack('>6x1H', self.memo_data[:8]), ['Block size'])
|
| + block_size = self.memo_header['Block size']
|
| + if not block_size:
|
| + block_size = 512
|
| + self.memo_block_size = block_size
|
| + self.memo_header_len = block_size
|
| + memo_size = os.path.getsize(self.memo_file)
|
| +
|
| + #Start reading data file
|
| + data = self.fdb.read(32)
|
| + self.header = self._assign_ids(struct.unpack('<B 3B L 2H 20x', data), ['id', 'Year', 'Month', 'Day', '# of Records', 'Header Size', 'Record Size'])
|
| + self.header['id'] = hex(self.header['id'])
|
| +
|
| + self.num_records = self.header['# of Records']
|
| + data = self.fdb.read(self.header['Header Size']-34)
|
| + self.fields = {}
|
| + x = 0
|
| + header_pattern = '<11s c 4x B B 14x'
|
| + ids = ['Field Name', 'Field Type', 'Field Length', 'Field Precision']
|
| + pattern_len = 32
|
| + for offset in range(0, len(data), 32):
|
| + if ord(data[offset])==0x0d:
|
| + break
|
| + x += 1
|
| + data_subset = data[offset: offset+pattern_len]
|
| + if len(data_subset) < pattern_len:
|
| + data_subset += ' '*(pattern_len-len(data_subset))
|
| + self.fields[x] = self._assign_ids(struct.unpack(header_pattern, data_subset), ids)
|
| + self.fields[x]['Field Name'] = self._drop_after_NULL(self.fields[x]['Field Name'])
|
| +
|
| + self.fdb.read(3)
|
| + if self.header['# of Records']:
|
| + data_size = (self.header['# of Records'] * self.header['Record Size']) - 1
|
| + self.db_data = self.fdb.read(data_size)
|
| + else:
|
| + self.db_data = ''
|
| + self.row_format = '<'
|
| + self.row_ids = []
|
| + self.row_len = 0
|
| + for key in self.fields:
|
| + field = self.fields[key]
|
| + self.row_format += '%ds ' % (field['Field Length'])
|
| + self.row_ids.append(field['Field Name'])
|
| + self.row_len += field['Field Length']
|
| +
|
| + def close(self):
|
| + if self.fdb:
|
| + self.fdb.close()
|
| + if self.fmemo:
|
| + self.fmemo.close()
|
| +
|
| + def get_numrecords(self):
|
| + return self.num_records
|
| +
|
| + def get_record_with_names(self, rec_no):
|
| + """
|
| + This function accept record number from 0 to N-1
|
| + """
|
| + if rec_no < 0 or rec_no > self.num_records:
|
| + raise Exception, 'Unable to extract data outside the range'
|
| +
|
| + offset = self.header['Record Size'] * rec_no
|
| + data = self.db_data[offset:offset+self.row_len]
|
| + record = self._assign_ids(struct.unpack(self.row_format, data), self.row_ids)
|
| +
|
| + if self.memo_file:
|
| + for key in self.fields:
|
| + field = self.fields[key]
|
| + f_type = field['Field Type']
|
| + f_name = field['Field Name']
|
| + c_data = record[f_name]
|
| +
|
| + if f_type=='M' or f_type=='G' or f_type=='B' or f_type=='P':
|
| + c_data = self._reverse_endian(c_data)
|
| + if c_data:
|
| + record[f_name] = self.read_memo(c_data-1).strip()
|
| + else:
|
| + record[f_name] = c_data.strip()
|
| + return record
|
| +
|
| + def read_memo_record(self, num, in_length):
|
| + """
|
| + Read the record of given number. The second parameter is the length of
|
| + the record to read. It can be undefined, meaning read the whole record,
|
| + and it can be negative, meaning at most the length
|
| + """
|
| + if in_length < 0:
|
| + in_length = -self.memo_block_size
|
| +
|
| + offset = self.memo_header_len + num * self.memo_block_size
|
| + self.fmemo.seek(offset)
|
| + if in_length<0:
|
| + in_length = -in_length
|
| + if in_length==0:
|
| + return ''
|
| + return self.fmemo.read(in_length)
|
| +
|
| + def read_memo(self, num):
|
| + result = ''
|
| + buffer = self.read_memo_record(num, -1)
|
| + if len(buffer)<=0:
|
| + return ''
|
| + length = struct.unpack('>L', buffer[4:4+4])[0] + 8
|
| +
|
| + block_size = self.memo_block_size
|
| + if length < block_size:
|
| + return buffer[8:length]
|
| + rest_length = length - block_size
|
| + rest_data = self.read_memo_record(num+1, rest_length)
|
| + if len(rest_data)<=0:
|
| + return ''
|
| + return buffer[8:] + rest_data
|
| +
|
| +def readDbf(filename):
|
| + """
|
| + Read the DBF file specified by the filename and
|
| + return the records as a list of dictionary.
|
| +
|
| + :param: filename File name of the DBF
|
| + :return: List of rows
|
| + """
|
| + db = Dbase()
|
| + db.open(filename)
|
| + num = db.get_numrecords()
|
| + rec = []
|
| + for i in range(0, num):
|
| + record = db.get_record_with_names(i)
|
| + rec.append(record)
|
| + db.close()
|
| + return rec
|
| +
|
| +if __name__=='__main__':
|
| + rec = readDbf('dbf/sptable.dbf')
|
| + for line in rec:
|
| + print '%s %s' % (line['GENUS'].strip(), line['SPECIES'].strip())
|
|
|