Commit b55f6f0b authored by Sriram Karra's avatar Sriram Karra

Initial working version of reading and parsing BBDB into PIMDB and Contact objects.

parent f404f752
##
## Created : Sat Apr 07 20:03:04 IST 2012
## Last Modified : Sat Apr 07 22:15:05 IST 2012
##
## Copyright (C) 2012 Sriram Karra <karra.etc@gmail.com>
##
## Licensed under the GPL v3
##
import logging, os, os.path, sys
## Being able to fix the sys.path thusly makes is easy to execute this
## script standalone from IDLE. Hack it is, but what the hell.
DIR_PATH = os.path.abspath(os.path.dirname(os.path.realpath('../Gout')))
EXTRA_PATHS = [os.path.join(DIR_PATH, 'lib'), os.path.join(DIR_PATH, 'tools')]
sys.path = EXTRA_PATHS + sys.path
from state import Config
from pimdb_bb import BBPIMDB
from folder_bb import BBContactsFolder
from contact_bb import BBContact
def main (argv=None):
tests = TestBBContact(config_fn='../app_state.json',
bbfn='/Users/sriramkarra/.bbdb')
class TestBBContact:
def __init__ (self, config_fn, bbfn):
logging.debug('Getting started... Reading Config File...')
self.config = Config(config_fn)
self.bb = BBPIMDB(self.config, bbfn)
self.deff = self.bb.get_def_folder()
print "\nHurrah: Name is: ", self.deff.get_name() if self.deff else None
if __name__ == '__main__':
logging.getLogger().setLevel(logging.DEBUG)
main()
##
## Created : Fri Apr 06 19:08:32 IST 2012
## Last Modified : Sat Apr 07 22:04:42 IST 2012
##
## Copyright (C) 2012 Sriram Karra <karra.etc@gmail.com>
##
## Licensed under the GPL v3
##
## This file defines a wrapper class around a BBDB Contact entry, by extending
## the Contact abstract base Contact class. BBDB is, of course, the Insidious
## Big Brother Data Base
##
import logging, re
from contact import Contact
from utils import chompq
import folder_bb
class BBContact(Contact):
"""This class extends the Contact abstract base class to wrap a BBDB
Contact"""
def __init__ (self, folder, con=None, rec=None):
"""rec is the native string vector representation of a BBDB contact
entry on disk."""
Contact.__init__(self, folder, con)
self.set_db_config(self.get_config().get_db_config(self.get_dbid()))
self.set_email_domains(self.get_db_config()['email_domains'])
if rec:
self.set_rec(rec)
self.init_props_from_rec(rec)
##
## First the inherited abstract methods from the base classes
##
def save (self):
raise NotImplementedError
##
## Overridden methods
##
def get_name (self):
ret = self._get_prop('name')
if ret:
return ret
ret = ''
fn = self.get_firstname()
if fn:
ret += (fn + ' ')
ln = self.get_lastname()
if ln:
ret += ln
return ret
##
## Now onto the non-abstract methods.
##
def get_rec (self):
return self._get_att('rec')
def set_rec (self, rec):
return self._set_att('rec', rec)
def init_props_from_rec (self, rec):
con_re = self.get_db().get_con_re()
parse_res = re.search(con_re, rec)
if not parse_res:
logging.critical('Could not Parse BBDB contact entry: %s', rec)
logging.critical('Cannnot do anything with this chap...')
return
d = parse_res.groupdict()
self._snarf_names_from_parse_res(d)
self._snarf_aka_from_parse_res(d)
self._snarf_company_from_parse_res(d)
self._snarf_emails_from_parse_res(d)
self._snarf_postal_from_parse_res(d)
self._snarf_phones_from_parse_res(d)
def _snarf_names_from_parse_res (self, pr):
n = pr['firstname']
if n:
self.set_firstname(chompq(n))
n = pr['lastname']
if n:
self.set_lastname(chompq(n))
# FIXME: Just what the hell is an 'Affix'? Just use the first one and
# ditch the rest.
affix = pr['affix']
if affix:
self.set_suffix(chompq(affix[0]))
def _snarf_aka_from_parse_res (self, pr):
self.add_custom('aka', pr['aka'])
def _snarf_company_from_parse_res (self, pr):
cs = pr['company']
if cs:
## The first company goes into the Company field, the rest we will
## push into the custom field
str_re = self.get_db().get_str_re()
cs = re.findall(str_re, cs)
self.set_company(chompq(cs[0]))
self.add_custom('company', cs[1:])
def _snarf_emails_from_parse_res (self, pr):
ems = pr['emails']
if ems:
str_re = self.get_db().get_str_re()
ems = re.findall(str_re, ems)
ems = [chompq(x) for x in ems]
domains = self.get_email_domains()
for em in ems:
home, work, other = self._classify_email_addr(em, domains)
## Note that the following implementation means if the same
## domain is specified in more than one category, it ends up
## being copied to every category. In effect this means when
## this is synched to google contacts, say, the GC entry will
## have the same email address twice for the record
if home:
self.add_email_home(em)
elif work:
self.add_email_work(em)
elif other:
self.add_email_other(em)
else:
self.add_email_work(em)
def _classify_email_addr (self, addr, domains):
"""Return a tuple of (home, work, other) booleans classifying if the
specified address falls within one of the domains."""
res = {'home' : False, 'work' : False, 'other' : False}
for cat in res.keys():
try:
for domain in domains[cat]:
if re.search((domain + '$'), addr):
res[cat] = True
except KeyError, e:
logging.warning('Invalid email_domains specification.')
return (res['home'], res['work'], res['other'])
def _snarf_postal_from_parse_res (self, pr):
adr_re = self.get_db().get_adr_re()
addrs = re.findall(adr_re, pr['addrs'])
for addr in addrs:
label, val = addr[:2]
add = '[' + label + ' ' + val + ']'
res = re.search(adr_re, add)
if res:
addict = {}
fields = res.groupdict()
streets = fields['streets']
if streets:
addict.update({'street' : '\n'.join(streets)})
city = fields['city']
if city:
addict.update({'city' : city})
state = fields['state']
if state:
addict.update({'state' : state})
country = fields['country']
if country:
addict.update({'country' : country})
pin = fields['zip']
if pin:
addict.update({'zip' : pin})
self.add_postal(label, addict)
else:
logging.error('bb:snarf_postal(): Huh? No match for addr.')
def _snarf_phones_from_parse_res (self, pr):
## FIXME: Need to fix this, for sure. LIke right now.
pass
def _snarf_notes_from_parse_res (self, pr):
## FIXME: Need to fix this, for sure. LIke right now.
pass
##
## Created : Sat Apr 07 20:03:04 IST 2012
## Last Modified : Sat Apr 07 22:14:55 IST 2012
##
## Copyright (C) 2012 Sriram Karra <karra.etc@gmail.com>
##
## Licensed under the GPL v3
##
import logging, re
from folder import Folder
from contact_bb import BBContact
class BBDBFileFormatError(Exception):
pass
class BBContactsFolder(Folder):
def __init__ (self, db, fn):
Folder.__init__(self, db)
self.set_name(fn)
self.contacts = {}
self.read_contacts()
##
## Implementation of the abstract methods inherited from Folder
##
def get_batch_size (self):
return 1000
def prep_sync_lists (self, destid, sl, updated_min=None, cnt=0):
raise NotImplementedError
def find_item (self, itemid):
raise NotImplementedError
def find_items (self, itemids):
raise NotImplementedError
def batch_create (self, src_sl, src_dbid, items):
raise NotImplementedError
def batch_update (self, sync_list, src_dbid, items):
raise NotImplementedError
def writeback_sync_tags (self, items):
raise NotImplementedError
def bulk_clear_sync_flags (self, dbids):
raise NotImplementedError
def __str__ (self):
ret = 'Contacts'
return ('%s.\tName: %s;\tGID: %s;\t' % (ret, self.get_name(),
self.get_itemid()))
##
## Internal and helper routines
##
def add_contact (self, itemid, bbc):
self.contacts.update({itemid : bbc})
def read_contacts (self, fn=None):
if not fn:
fn = self.get_name()
with open(fn) as bbf:
bbf.readline()
# Ignore first line which is: ;; -*-coding: utf-8-emacs;-*-
ff = bbf.readline()
# Processing: ;;; file-format: 8
res = re.search(';;; file-(format|version):\s*(\d+)', ff)
if not res:
bbf.close()
raise BBDBFileFormatError('Unrecognizable format line: %s' % ff)
ver = int(res.group(2))
if ver < 7:
bbf.close()
raise BBDBFileFormatError(('Need minimum file format ver 7. ' +
'. File version is: %d' ) % ver)
bbf.readline()
# Ignore the user-fields line. What's the point of that anyway...
while True:
ff = bbf.readline()
if ff == '':
break
c = BBContact(self, rec=ff.rstrip())
self.add_contact(c.get_itemid(), c)
logging.debug('Successfully read and processed: %s', c.get_name())
# str(c))
bbf.close()
##
## Created : Sat Apr 07 18:52:19 IST 2012
## Last Modified : Sat Apr 07 21:01:02 IST 2012
##
## Copyright (C) 2012 by Sriram Karra <karra.etc@gmail.com>
##
## Licensed under the GPL v3
##
import logging, re
from pimdb import PIMDB
from folder import Folder
from folder_bb import BBContactsFolder
class BBPIMDB(PIMDB):
"""Wrapper class over the BBDB, by implementing the PIMDB abstract
class."""
def __init__ (self, config, def_fn):
PIMDB.__init__(self, config)
self.set_def_fn(def_fn)
self._set_regexes()
self.set_folders()
##
## First implementation of the abstract methods of PIMDB.
##
def get_dbid (self):
"""See the documentation in class PIMDB"""
return 'bb'
def new_folder (self, fname, type):
"""See the documentation in class PIMDB"""
raise NotImplementedError
def del_folder (self, gid):
"""See the documentation in class PIMDB"""
raise NotImplementedError
def set_folders (self):
"""See the documentation in class PIMDB"""
f = BBContactsFolder(self, self.get_def_fn())
if f:
self.add_contacts_folder(f)
self.set_def_folder(Folder.CONTACT_t, f)
def set_def_folders (self):
"""See the documentation in class PIMDB"""
## We are already doing the needful above...
pass
def set_sync_folders (self):
"""See the documentation in class PIMDB"""
raise NotImplementedError
##
## Now the non-abstract methods and internal methods
##
def get_def_fn (self):
return self._get_att('def_fn')
def set_def_fn (self, fn):
return self._set_att('def_fn', fn)
def get_con_re (self):
return self._get_att('con_re')
def set_con_re (self, reg):
return self._set_att('con_re', reg)
def get_str_re (self):
return self._get_att('str_re')
def set_str_re (self, reg):
return self._set_att('str_re', reg)
def get_adr_re (self):
return self._get_att('adr_re')
def set_adr_re (self, reg):
return self._set_att('adr_re', reg)
def get_ph_re (self):
return self._get_att('ph_re')
def set_ph_re (self, reg):
return self._set_att('ph_re', reg)
def get_note_re (self):
return self._get_att('note_re')
def set_note_re (self, reg):
return self._set_att('note_re', reg)
def get_notes_re (self):
return self._get_att('notes_re')
def set_notes_re (self, reg):
return self._set_att('notes_re', reg)
def _set_regexes (self):
res = {'string' : '"[^"]*"|nil',
'ws' : '\s*'}
re_str_ar = 'nil|\(((' + res['string'] + ')' + res['ws'] + ')*\)'
res.update({'string_array' : re_str_ar})
## Phones
re_ph_vec = ('\[\s*(?P<phlabel>' + res['string'] +
')\s*(?P<number>(' +
res['string'] + ')|'+
'(\d\d\d\s+\d\d\d\s+\d\d\d\d?\s+\d+' +
')\s*)\]')
re_phs = 'nil|(\(\s*(' + re_ph_vec + '\s*)+)\)'
res.update({'ph_vec' : re_phs})
## Addresses
re_ad_vec = ('\[\s*(?P<adlabel>' + res['string'] + ')\s*(' +
'(?P<streets>' + res['string_array'] + ')\s*' +
'(?P<city>' + res['string'] + ')\s*' +
'(?P<state>' + res['string'] + ')\s*' +
'(?P<zip>(' + res['string'] + ')|(' + '\d\d\d\d\d))\s*' +
'(?P<country>' + res['string'] + ')' +
')\s*\]')
re_ads = 'nil|\(\s*(' + re_ad_vec + '\s*)+\)'
res.update({'ad_vec' : re_ads})
re_note = ('\((?P<field>[^()]+)\s*\.\s*(?P<value>' +
res['string'] + '|\d+)+\)')
re_notes = '\((' + re_note + '\s*)+\)'
res.update({'note' : re_note})
res.update({'notes' : re_notes})
## A full contact entry
re_con = ('\[\s*' +
'(?P<firstname>' + res['string'] + ')\s*' +
'(?P<lastname>' + res['string'] + ')\s*' +
'(?P<affix>' + res['string_array'] + ')\s*' +
'(?P<aka>' + res['string_array'] + ')\s*' +
'(?P<company>' + res['string_array'] + ')\s*' +
'(?P<phones>' + res['ph_vec'] + ')\s*' +
'(?P<addrs>' + res['ad_vec'] + ')\s*' +
'(?P<emails>' + res['string_array'] + ')\s*' +
'(?P<notes>' + res['notes'] + ')\s*' +
'(?P<cache>' + res['string'] + ')\s*' +
'\s*\]')
## Now save some of the regexes for later use...
self.set_con_re(re_con)
self.set_str_re(res['string'])
self.set_adr_re(re_ad_vec)
self.set_ph_re(re_ph_vec)
self.set_note_re(res['note'])
self.set_notes_re(res['notes'])
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment