third_party/logilab/common/dbf.py - Issue 10447014: Add pylint to depot_tools.

Side by Side Diff: third_party/logilab/common/dbf.py

Issue 10447014: Add pylint to depot_tools. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools

Patch Set: Fix unittests. Created 8 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 # -- coding: utf-8 --

	2 # copyright 2003-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved.

	3 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr

	4 #

	5 # This file is part of logilab-common.

	6 #

	7 # logilab-common is free software: you can redistribute it and/or modify it unde r

	8 # the terms of the GNU Lesser General Public License as published by the Free

	9 # Software Foundation, either version 2.1 of the License, or (at your option) an y

	10 # later version.

	11 #

	12 # logilab-common is distributed in the hope that it will be useful, but WITHOUT

	13 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS

	14 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more

	15 # details.

	16 #

	17 # You should have received a copy of the GNU Lesser General Public License along

	18 # with logilab-common. If not, see <http://www.gnu.org/licenses/>.

	19 """This is a DBF reader which reads Visual Fox Pro DBF format with Memo field

	20

	21 Usage:

	22

	23 >>> rec = readDbf('test.dbf')

	24 >>> for line in rec:

	25 >>> print line['name']

	26

	27

	28 :date: 13/07/2007

	29

	30 http://www.physics.ox.ac.uk/users/santoso/Software.Repository.html

	31 page says code is "available as is without any warranty or support".

	32 """

	33

	34 import struct

	35 import os, os.path

	36 import sys

	37 import csv

	38 import tempfile

	39 import ConfigParser

	40

	41 class Dbase:

	42 def __init__(self):

	43 self.fdb = None

	44 self.fmemo = None

	45 self.db_data = None

	46 self.memo_data = None

	47 self.fields = None

	48 self.num_records = 0

	49 self.header = None

	50 self.memo_file = ''

	51 self.memo_header = None

	52 self.memo_block_size = 0

	53 self.memo_header_len = 0

	54

	55 def _drop_after_NULL(self, txt):

	56 for i in range(0, len(txt)):

	57 if ord(struct.unpack('c', txt[i])[0])==0:

	58 return txt[:i]

	59 return txt

	60

	61 def _reverse_endian(self, num):

	62 if not len(num):

	63 return 0

	64 val = struct.unpack('<L', num)

	65 val = struct.pack('>L', val[0])

	66 val = struct.unpack('>L', val)

	67 return val[0]

	68

	69 def _assign_ids(self, lst, ids):

	70 result = {}

	71 idx = 0

	72 for item in lst:

	73 id = ids[idx]

	74 result[id] = item

	75 idx += 1

	76 return result

	77

	78 def open(self, db_name):

	79 filesize = os.path.getsize(db_name)

	80 if filesize <= 68:

	81 raise IOError, 'The file is not large enough to be a dbf file'

	82

	83 self.fdb = open(db_name, 'rb')

	84

	85 self.memo_file = ''

	86 if os.path.isfile(db_name[0:-1] + 't'):

	87 self.memo_file = db_name[0:-1] + 't'

	88 elif os.path.isfile(db_name[0:-3] + 'fpt'):

	89 self.memo_file = db_name[0:-3] + 'fpt'

	90

	91 if self.memo_file:

	92 #Read memo file

	93 self.fmemo = open(self.memo_file, 'rb')

	94 self.memo_data = self.fmemo.read()

	95 self.memo_header = self._assign_ids(struct.unpack('>6x1H', self.memo _data[:8]), ['Block size'])

	96 block_size = self.memo_header['Block size']

	97 if not block_size:

	98 block_size = 512

	99 self.memo_block_size = block_size

	100 self.memo_header_len = block_size

	101 memo_size = os.path.getsize(self.memo_file)

	102

	103 #Start reading data file

	104 data = self.fdb.read(32)

	105 self.header = self._assign_ids(struct.unpack('<B 3B L 2H 20x', data), [' id', 'Year', 'Month', 'Day', '# of Records', 'Header Size', 'Record Size'])

	106 self.header['id'] = hex(self.header['id'])

	107

	108 self.num_records = self.header['# of Records']

	109 data = self.fdb.read(self.header['Header Size']-34)

	110 self.fields = {}

	111 x = 0

	112 header_pattern = '<11s c 4x B B 14x'

	113 ids = ['Field Name', 'Field Type', 'Field Length', 'Field Precision']

	114 pattern_len = 32

	115 for offset in range(0, len(data), 32):

	116 if ord(data[offset])==0x0d:

	117 break

	118 x += 1

	119 data_subset = data[offset: offset+pattern_len]

	120 if len(data_subset) < pattern_len:

	121 data_subset += ' '*(pattern_len-len(data_subset))

	122 self.fields[x] = self._assign_ids(struct.unpack(header_pattern, data _subset), ids)

	123 self.fields[x]['Field Name'] = self._drop_after_NULL(self.fields[x][ 'Field Name'])

	124

	125 self.fdb.read(3)

	126 if self.header['# of Records']:

	127 data_size = (self.header['# of Records'] * self.header['Record Size' ]) - 1

	128 self.db_data = self.fdb.read(data_size)

	129 else:

	130 self.db_data = ''

	131 self.row_format = '<'

	132 self.row_ids = []

	133 self.row_len = 0

	134 for key in self.fields:

	135 field = self.fields[key]

	136 self.row_format += '%ds ' % (field['Field Length'])

	137 self.row_ids.append(field['Field Name'])

	138 self.row_len += field['Field Length']

	139

	140 def close(self):

	141 if self.fdb:

	142 self.fdb.close()

	143 if self.fmemo:

	144 self.fmemo.close()

	145

	146 def get_numrecords(self):

	147 return self.num_records

	148

	149 def get_record_with_names(self, rec_no):

	150 """

	151 This function accept record number from 0 to N-1

	152 """

	153 if rec_no < 0 or rec_no > self.num_records:

	154 raise Exception, 'Unable to extract data outside the range'

	155

	156 offset = self.header['Record Size'] * rec_no

	157 data = self.db_data[offset:offset+self.row_len]

	158 record = self._assign_ids(struct.unpack(self.row_format, data), self.row _ids)

	159

	160 if self.memo_file:

	161 for key in self.fields:

	162 field = self.fields[key]

	163 f_type = field['Field Type']

	164 f_name = field['Field Name']

	165 c_data = record[f_name]

	166

	167 if f_type=='M' or f_type=='G' or f_type=='B' or f_type=='P':

	168 c_data = self._reverse_endian(c_data)

	169 if c_data:

	170 record[f_name] = self.read_memo(c_data-1).strip()

	171 else:

	172 record[f_name] = c_data.strip()

	173 return record

	174

	175 def read_memo_record(self, num, in_length):

	176 """

	177 Read the record of given number. The second parameter is the length of

	178 the record to read. It can be undefined, meaning read the whole record,

	179 and it can be negative, meaning at most the length

	180 """

	181 if in_length < 0:

	182 in_length = -self.memo_block_size

	183

	184 offset = self.memo_header_len + num * self.memo_block_size

	185 self.fmemo.seek(offset)

	186 if in_length<0:

	187 in_length = -in_length

	188 if in_length==0:

	189 return ''

	190 return self.fmemo.read(in_length)

	191

	192 def read_memo(self, num):

	193 result = ''

	194 buffer = self.read_memo_record(num, -1)

	195 if len(buffer)<=0:

	196 return ''

	197 length = struct.unpack('>L', buffer[4:4+4])[0] + 8

	198

	199 block_size = self.memo_block_size

	200 if length < block_size:

	201 return buffer[8:length]

	202 rest_length = length - block_size

	203 rest_data = self.read_memo_record(num+1, rest_length)

	204 if len(rest_data)<=0:

	205 return ''

	206 return buffer[8:] + rest_data

	207

	208 def readDbf(filename):

	209 """

	210 Read the DBF file specified by the filename and

	211 return the records as a list of dictionary.

	212

	213 :param: filename File name of the DBF

	214 :return: List of rows

	215 """

	216 db = Dbase()

	217 db.open(filename)

	218 num = db.get_numrecords()

	219 rec = []

	220 for i in range(0, num):

	221 record = db.get_record_with_names(i)

	222 rec.append(record)

	223 db.close()

	224 return rec

	225

	226 if __name__=='__main__':

	227 rec = readDbf('dbf/sptable.dbf')

	228 for line in rec:

	229 print '%s %s' % (line['GENUS'].strip(), line['SPECIES'].strip())

OLD	NEW

« no previous file with comments | « third_party/logilab/common/date.py ('k') | third_party/logilab/common/debugger.py » ('j') | no next file with comments »