Commit 7a59cb8d authored by Sriram Karra's avatar Sriram Karra

Breakup "parse_with_encoding" into two smaller routines; more to come

parent 90a6a9b6
## ##
## Created : Sat Apr 07 18:52:19 IST 2012 ## Created : Sat Apr 07 18:52:19 IST 2012
## Last Modified : Thu Jul 19 12:52:01 IST 2012 ## Last Modified : Wed Jul 25 15:28:14 IST 2012
## ##
## Copyright (C) 2012 by Sriram Karra <karra.etc@gmail.com> ## Copyright (C) 2012 by Sriram Karra <karra.etc@gmail.com>
## ##
...@@ -196,36 +196,41 @@ class MessageStore: ...@@ -196,36 +196,41 @@ class MessageStore:
pre += lines pre += lines
return self.set_preamble(pre) return self.set_preamble(pre)
def _parse_preamble (self, fn, bbf):
ff = bbf.readline()
if re.search('coding:', ff):
# Ignore first line if such: ;; -*-coding: utf-8-emacs;-*-
self.append_preamble(ff)
ff = bbf.readline()
# Processing: ;;; file-format: 8
res = re.search(';;; file-(format|version):\s*(\d+)', ff)
if not res:
bbf.close()
raise BBDBFileFormatError('Unrecognizable format line: %s' % ff)
self.append_preamble(ff)
ver = res.group(2)
self.set_file_format(ver)
supported = self.get_db().supported_file_formats()
if not ver in supported:
bbf.close()
raise BBDBFileFormatError(('Cannot process file "%s" '
'(version %s). Supported versions '
'are: %s' % (fn, ver, supported)))
return ver
def parse_with_encoding (self, def_f, fn, encoding): def parse_with_encoding (self, def_f, fn, encoding):
"""Folder object to which the parsed contacts will be added. fn is the """Folder object to which the parsed contacts will be added. fn is the
name of the BBDB file/message store. encoding is a string representing name of the BBDB file/message store. encoding is a string representing
a text encoding such as utf-8, latin-1, etc.""" a text encoding such as utf-8, latin-1, etc."""
with codecs.open(fn, encoding=encoding) as bbf: with codecs.open(fn, encoding=encoding) as bbf:
ff = bbf.readline() ver = self._parse_preamble(fn, bbf)
if re.search('coding:', ff):
# Ignore first line if such: ;; -*-coding: utf-8-emacs;-*-
self.append_preamble(ff)
ff = bbf.readline()
# Processing: ;;; file-format: 8 ## Now fetch and set up the parsing routines specific to the file
res = re.search(';;; file-(format|version):\s*(\d+)', ff)
if not res:
bbf.close()
raise BBDBFileFormatError('Unrecognizable format line: %s' % ff)
self.append_preamble(ff)
ver = res.group(2)
self.set_file_format(ver)
supported = self.get_db().supported_file_formats()
if not ver in supported:
bbf.close()
raise BBDBFileFormatError(('Cannot process file "%s" '
'(version %s). Supported versions '
'are: %s' % (fn, ver, supported)))
## Now fetch and set up the parsign routines specific to the file
## format ## format
self._set_regexes(ver) self._set_regexes(ver)
...@@ -235,8 +240,8 @@ class MessageStore: ...@@ -235,8 +240,8 @@ class MessageStore:
ff = bbf.readline() ff = bbf.readline()
except UnicodeDecodeError, e: except UnicodeDecodeError, e:
## We got the encoding wrong. We will have to drop ## We got the encoding wrong. We will have to drop
## everything we have done, and start all over again. ## everything we have done, and start all over again. At
## At a later stage, we could optimize by skipping over ## a later stage, we could optimize by skipping over
## whatever we have read so far, but then we will need to ## whatever we have read so far, but then we will need to
## evalute if the parsed strings will be in the same ## evalute if the parsed strings will be in the same
## encoding or not. Tricky and shady business, this. ## encoding or not. Tricky and shady business, this.
...@@ -255,8 +260,7 @@ class MessageStore: ...@@ -255,8 +260,7 @@ class MessageStore:
logging.error('Could not parse BBDB record: %s', ff) logging.error('Could not parse BBDB record: %s', ff)
raise BBDBFileFormatError(('Cannot proceed with ' raise BBDBFileFormatError(('Cannot proceed with '
'processing file "%s" ') % 'processing file "%s" ') % fn)
fn)
fon = c.get_bbdb_folder() fon = c.get_bbdb_folder()
...@@ -269,8 +273,6 @@ class MessageStore: ...@@ -269,8 +273,6 @@ class MessageStore:
else: else:
def_f.add_contact(c) def_f.add_contact(c)
# self.add_contact(c)
cnt += 1 cnt += 1
return bbf, cnt return bbf, cnt
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment