Commit 3b788bce authored by Ana Guerrero López's avatar Ana Guerrero López

Import Upstream version 3.1

parent 90a7ba5b
......@@ -29,8 +29,9 @@ from gzip import GzipFile
from types import UnicodeType
from sqlobject import *
from sqlobject.sqlbuilder import ISNOTNULL
from imdb.parser.sql import soundex
from imdb.parser.sql import soundex, get_movie_data
from imdb.parser.sql.dbschema import *
from imdb.utils import analyze_title, analyze_name, \
build_name, build_title, normalizeName, _articles
......@@ -497,10 +498,11 @@ class _BaseCache(dict):
def fetchsome(curs, size=20000):
"""Yes, I've read the Python Cookbook! :-)"""
while 1:
res = CURS.fetchmany(size)
res = curs.fetchmany(size)
if not res: break
for r in res: yield r
class MoviesCache(_BaseCache):
"""Manage the movies list."""
className = 'MoviesCache'
......@@ -547,7 +549,8 @@ class MoviesCache(_BaseCache):
if series_d['year'] is None: del series_d['year']
else: series_d['year'] = str(series_d['year'])
mdict['episode of'] = series_d
title = build_title(mdict, canonical=1, ptdf=1)
title = build_title(mdict, canonical=1, ptdf=1,
_emptyString='')
dict.__setitem__(self, title, x[0])
self.counter = counter(Title.select().count() + 1)
Title.sqlmeta.cacheValues = _oldcacheValues
......@@ -572,7 +575,8 @@ class MoviesCache(_BaseCache):
kind = tget('kind')
if kind == 'episode':
#series title
stitle = build_title(tget('episode of'), canonical=1)
stitle = build_title(tget('episode of'), canonical=1,
_emptyString='')
episodeOf = self.addUnique(stitle)
del t['episode of']
year = self.movieYear.get(v)
......@@ -822,7 +826,7 @@ def doCast(fp, roleid, rolename):
for item in ll[1:]:
if not item: continue
if item[0] == '[':
role = item[1:-1]
role = item[1:].rstrip(']')
if role[-1:] == ')':
nidx = role.find('(')
if nidx != -1:
......@@ -1423,7 +1427,6 @@ CACHE_MID = MoviesCache()
CACHE_PID = PersonsCache()
def _cmpfunc(x, y):
"""Sort a list of tuples, by the length of the first item (in reverse)."""
lx = len(x[0])
......@@ -1457,22 +1460,102 @@ def readConstants():
CCAST_TYPES[x.kind] = x.id
def notNULLimdbID(cls):
"""Return a list of dictionaries for titles or names for which a
imdbID is present in the database."""
if cls is Title: cname = 'movies'
else: cname = 'people'
print 'SAVING imdbID values for %s...' % cname,
sys.stdout.flush()
try:
tons = cls.select(ISNOTNULL(cls.q.imdbID))
except:
print 'SKIPPING: no data.'
return []
results = []
_kdict = {}
try:
for x in KindType.select():
_kdict[x.id] = x.kind
except:
print 'SKIPPING: no data.'
return []
for t in tons:
if cls is Title:
md = get_movie_data(t.id, _kdict)
else:
md = {'name': t.name}
if t.imdbIndex is not None:
md['imdbIndex'] = t.imdbIndex
md['imdbID'] = t.imdbID
results.append(md)
print 'DONE! (%d entries)' % len(results)
return results
def restoreImdbID(tons, cls):
"""Restore imdbID for movies or people."""
if cls is Title:
CACHE = CACHE_MID
cname = 'movies'
else:
CACHE = CACHE_PID
cname = 'people'
print 'RESTORING imdbID values for %s...' % cname,
sys.stdout.flush()
count = 0
for t in tons:
if cls is Title:
t_str = build_title(t, canonical=1, ptdf=1)
else:
t_str = build_name(t, canonical=1)
t_str = t_str.encode('utf_8')
db_mopID = CACHE.get(t_str)
if db_mopID is None:
continue
try:
mop_in_db = cls.get(db_mopID)
try:
mop_in_db.imdbID = t['imdbID']
except:
continue
except SQLObjectNotFound:
continue
count += 1
print 'DONE! (restored %d entries out of %d)' % (count, len(tons))
# begin the iterations...
def run():
print 'RUNNING imdbpy2sql.py'
# Storing imdbIDs for movies and persons.
try:
movies_imdbIDs = notNULLimdbID(Title)
except:
movies_imdbIDs = []
print 'WARNING: failed to read imdbIDs for movies'
try:
people_imdbIDs = notNULLimdbID(Name)
except:
people_imdbIDs = []
print 'WARNING: failed to read imdbIDs for people'
# Truncate the current database.
print 'DROPPING current database...',
sys.stdout.flush()
dropTables()
print 'done!'
print 'DONE!'
# Rebuild the database structure.
print 'CREATING new tables...',
sys.stdout.flush()
createTables()
print 'DONE!'
t('dropping and recreating the database')
# Read the constants.
readConstants()
print 'done!'
t('dropping and recreating the database')
# Populate the CACHE_MID instance.
readMovieList()
......@@ -1522,6 +1605,18 @@ def run():
completeCast()
t('completeCast()')
# Restoring imdbIDs for movies and persons.
try:
restoreImdbID(movies_imdbIDs, Title)
del movies_imdbIDs
except:
print 'WARNING: failed to restore imdbIDs for movies'
try:
restoreImdbID(people_imdbIDs, Name)
del people_imdbIDs
except:
print 'WARNING: failed to restore imdbIDs for people'
# Flush caches.
CACHE_MID.flush()
CACHE_MID.clear()
......
......@@ -4,8 +4,28 @@ share the copyright over some portions of the code:
NAME: Giuseppe "Cowo" Corbelli
EMAIL: <cowo --> lugbs.linux.it>
DESCRIPTION: provided a lot of code and hints to integrate IMDbPY
CONTRIBUTION: provided a lot of code and hints to integrate IMDbPY
with SQLObject, working on the imdbpy2sql.py script and the dbschema.py
module.
Actually, besides Giuseppe and me, these other people are listed
as developers for the IMDbPY project on sourceforge and may share
copyright on some (minor) portions of the code:
NAME: Martin Kirst
EMAIL: <martin.kirst --> s1998.tu-chemnitz.de>
CONTRIBUTION: has done an important refactoring of the imdbpyweb
program and shares with me the copyright on the whole program.
NAME: H. Turgut Uyar
EMAIL: <uyar --> itu.edu.tr>
CONTRIBUTION: has created some tests for the test-suite.
NAME: Jesper Nøhr
EMAIL: <jesper --> noehr.org>
CONTRIBUTION: provided extensive testing and some patches for
the 'http' data access system.
......@@ -15,11 +15,22 @@ I'd like to thank the following people for their help:
* Jesper Nøhr for a lot of testing, especially on the 'sql'.
* Jon Sabo for a bug report about unicode and the imdbpy2sql.py script
and some feedback.
* Andrew Pendleton for a report about a very hideous bug in
the imdbpy2sql.py (garbage in the plain text data files + programming
errors + utf8 strings + postgres).
* Ataru Moroboshi ;-) for a bug report about role/duty and notes.
* Ivan Kedrin for a bug report about the analyze_title function.
* Hadley Rich for reporting bugs and providing patches for troubles
parsing tv series' episodes and searching for tv series' titles.
* Jamie R. Rytlewski for a suggestion about saving imbIDs in 'sql'.
* Vincent Crevot, for a bug report about unicode support.
* Jay Klein for a bug report and testing to fix a nasty bug in the
......
Changelog for IMDbPY
====================
* What's the new in release 3.1 "The Snake King" (18 Jul 2007)
[global]
- the IMDbPYweb account now returns a single item, when a search
returns only one "good enough" match (this is the IMDb's default).
- updated the documentation.
- updated list of contributors and developers.
[http]
- supported the new result page for searches.
- supported the 'synopsis' page.
- supported the 'parents guide' page.
- fixed a bug retrieving notes about a movie's connections.
- fixed a bug for python2.2 (s60 mobile phones).
- fixed a bug with 'Production Notes/Status'.
- fixed a bug parsing role/duty and notes (also for httpThin).
- fixed a bug retrieving user ratings.
- fixed a bug (un)setting the proxy.
- fixed 2 bugs in movie/person news.
- fixed a bug in movie faqs.
- fixed a bug in movie taglines.
- fixed a bug in movie quotes.
- fixed a bug in movie title, in "full cast and crew" page.
- fixed 2 bugs in persons' other works.
[sql]
- hypothetical fix for a unicode problem in the imdbpy2sql.py script.
- now the 'imdbID' fields in the Title and Name tables are restored,
updating from an older version.
- fixed a nasty bug handling utf-8 strings in the imdbpy2sql.py script.
[mobile]
- supported the new result page for searches.
- fixed a bug for python2.2 (s60 mobile phones).
- fixed a bug searching for persons with single match and no
messages in the board.
- fixed a bug parsing role/duty and notes.
* What's the new in release 3.0 "Spider-Man 3" (03 May 2007)
[global]
- IMDbPY now works with the new IMDb's site design; a new account is
......
......@@ -63,7 +63,8 @@ the movies, only the main information are retrieved (see the 'httpThin'
notes). It should be, at usage time, from 2 to 20 times faster than
the "http"/"httpThin" data access system.
This code is still BETA! Please report me bugs/ideas/hints...
This code still needs tests on mobile phones!
Please report any bugs/ideas/hints...
Usage:
from imdb import IMDb
......@@ -79,7 +80,12 @@ Usage:
A GUI for Series 60 smart phones, developed by Tero Saarni,
is available at:
http://kotisivu.mtv3.fi/terosaarni/python/imdbpygui/
http://imdbpy.sourceforge.net/?page=mobile
On some mobile phone a pair of modules can be missing, and
you have to install it manually as libraries; you can find
these two modules (sgmllib.py and htmlentitydefs.py) here:
http://imdbpy.sourceforge.net/symbiangui/mobile-imdbpy-modules-0.1.tar.gz
THE "HTTPTHIN" DATA ACCESS SYSTEM
......
......@@ -3,19 +3,12 @@
========================
On 19 February 2007, IMDb introduced a complete redesign of their
web site. This means that the 'http' and 'mobile' parser are no
more able to parse the new html; as a temporary solution, the account
used by IMDbPY was set to "use previous layout", meaning that - for
a certain amount of time - the current IMDbPY version (2.9) will work.
web site.
This (2.9) will be the last version of IMDbPY to parse the old layout:
from now on, on the CVS, the development will be geared to use the new
layout - and a new IMDb's account will be used.
Conclusion: if you find a bug in 'http' or 'mobile' in this release,
please report it anyway (it can also affect the new code), but consider
that a bit of time will be needed, to fix everything.
Even better, help the development subscribing to the mailing list:
http://imdbpy.sourceforge.net/?page=devel
Since release 3.0, IMDbPY uses a new account to access the IMDb
web site, parsing the new layout.
Older version still access the old layout, so they are still (more
or less) working; obviously only the new layout is sopport from
now on.
......@@ -97,7 +97,7 @@ complete plain text data files set (as of 12 Nov 2006, with about
database | time in minutes: total (insert data/create indexes)
----------------------+-----------------------------------------------------
MySQL 5.0 MyISAM | 115 (95/20)
MYSQL 5.0 InnoDB | ??? (80/???)
MySQL 5.0 InnoDB | ??? (80/???)
| maybe I've not cofigurated it properly: it
| looks like the creation of the indexes will
| take more than 2 or 3 hours.
......@@ -116,6 +116,35 @@ If you have different experiences, please tell me!
The imdbpy2sql.py will print a lot of debug information on standard output;
you can save it in a file, appending (without quotes) "2>&1 | tee output.txt"
[sqlite failure]
It seems that, with older versions of the python-sqlite package, the first
run may fail; if you get a DatabaseError exception saying "no such table",
try running again the command with the same arguments.
[data truncated]
If you get an insane amount (hundreds or thousands, on various text
columns) of warnings like these lines:
imdbpy2sql.py:727: Warning: Data truncated for column 'person_role' at row 4979
CURS.executemany(self.sqlString, self.converter(self.values()))
you probably have a problem with the configuration of your database.
The error came from strings that get cut at the first non-ASCII char (and
so you're losing a lot of information).
To obviate at this problem, you must be sure that your database
server is set up properly, with the use library/client configured
to communicate with the server in a consistent way.
E.g., for MySQL you can set:
character-set-server = utf8
default-collation = utf8_unicode_ci
default-character-set = utf8
of even:
character-set-server = latin1
default-collation = latin1_bin
default-character-set = latin1
[adult titles]
Beware that, while running, the imdbpy2sql.py script will output a lot
of strings containing both person names and movie titles. The script
......@@ -125,14 +154,6 @@ screaming 'daddy! daddy! what kind of animals trains Rocco in the
documentary "Rocco: Animal Trainer 17"???'... well it's not my fault! ;-)
SQLITE NOTE
===========
It seems that, with older versions of the python-sqlite package, the first
run may fail; if you get a DatabaseError exception saying "no such table",
try running again the command with the same arguments.
SQL USAGE
=========
......
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
......@@ -121,6 +121,7 @@ class Movie(_Container):
'merchandise': 'merchandising links',
'sales': 'merchandising links',
'faq': 'faqs',
'parental guide': 'parents guide',
'frequently asked questions': 'faqs'}
keys_tomodify_list = ('plot', 'trivia', 'alternate versions', 'goofs',
......
......@@ -345,6 +345,13 @@ class IMDbBase:
title = title.encode('utf-8')
params = 'q=%s;s=pt' % str(urllib.quote_plus(title))
content = self._searchIMDb(params)
if content and content[:512].find('<title>IMDb Title') != -1:
# Sometimes (e.g.: for titles with a "+" in it) a list
# of results is returned even for Exact Primary searches;
# this try to deal with it, hoping that a "normal" query
# will result in just on title.
params = 's=tt&q=%s' % str(urllib.quote_plus(title))
content = self._searchIMDb(params)
if not content: return None
from imdb.parser.http.searchMovieParser import BasicMovieParser
mparser = BasicMovieParser()
......@@ -362,6 +369,9 @@ class IMDbBase:
name = name.encode('utf-8')
params = 'q=%s;s=pn' % str(urllib.quote_plus(name))
content = self._searchIMDb(params)
if content and content[:512].find('<title>IMDb Name') != -1:
params = 's=nm&q=%s' % str(urllib.quote_plus(name))
content = self._searchIMDb(params)
if not content: return None
from imdb.parser.http.searchPersonParser import BasicPersonParser
pparser = BasicPersonParser()
......
......@@ -24,6 +24,7 @@ along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
"""
import sys
from urllib import FancyURLopener, quote_plus
from codecs import lookup
......@@ -43,7 +44,8 @@ from movieParser import movie_parser, plot_parser, movie_awards_parser, \
soundclips_parser, videoclips_parser, news_parser, \
photosites_parser, amazonrev_parser, business_parser, \
literature_parser, sales_parser, episodes_parser, \
eprating_parser, movie_faqs_parser, airing_parser
eprating_parser, movie_faqs_parser, airing_parser, \
synopsis_parser, parentsguide_parser
from searchMovieParser import search_movie_parser
from personParser import maindetails_parser, bio_parser, \
otherworks_parser, person_awards_parser, \
......@@ -53,6 +55,7 @@ from personParser import maindetails_parser, bio_parser, \
from searchPersonParser import search_person_parser
from utils import ParserBase
PY_VERSION = sys.version_info[:2]
# Misc URLs
imdbURL_movie = 'http://akas.imdb.com/title/tt%s/'
......@@ -73,14 +76,42 @@ class IMDbURLopener(FancyURLopener):
"""Fetch web pages and handle errors."""
def __init__(self, *args, **kwargs):
FancyURLopener.__init__(self, *args, **kwargs)
# Headers to add to every request.
# XXX: IMDb's web server doesn't like urllib-based programs,
# so lets fake to be Mozilla.
# Wow! I'm shocked by my total lack of ethic! <g>
self.addheaders = [('User-agent', 'Mozilla/5.0')]
# This class is used also to perform "Exact Primary [Title|Name]"
# searches, and so by default the cookie is set.
self.set_header('User-agent', 'Mozilla/5.0')
# XXX: This class is used also to perform "Exact Primary
# [Title|Name]" searches, and so by default the cookie is set.
c_header = 'id=%s; uu=%s' % (_cookie_id, _cookie_uu)
self.addheaders.append(('Cookie', c_header))
self.set_header('Cookie', c_header)
def get_proxy(self):
"""Return the used proxy, or an empty string."""
return self.proxies.get('http', '')
def set_proxy(self, proxy):
"""Set the proxy."""
if not proxy:
if self.proxies.has_key('http'):
del self.proxies['http']
else:
if not proxy.lower().startswith('http://'):
proxy = 'http://%s' % proxy
self.proxies['http'] = proxy
def set_header(self, header, value, _overwrite=True):
"""Set a default header."""
if _overwrite:
self.del_header(header)
self.addheaders.append((header, value))
def del_header(self, header):
"""Remove a default header."""
for index in xrange(len(self.addheaders)):
if self.addheaders[index][0] == header:
del self.addheaders[index]
break
def retrieve_unicode(self, url, size=-1):
"""Retrieves the given URL, and returns a unicode string,
......@@ -89,11 +120,15 @@ class IMDbURLopener(FancyURLopener):
encode = None
try:
if size != -1:
self.addheader('Range', 'bytes=0-%d' % size)
self.set_header('Range', 'bytes=0-%d' % size)
uopener = self.open(url)
content = uopener.read(size=size)
kwds = {}
if PY_VERSION > (2, 3):
kwds['size'] = size
content = uopener.read(**kwds)
# Maybe the server is so nice to tell us the charset...
server_encode = uopener.info().getparam('charset')
# look at the content-type HTML meta tag.
# Otherwise, look at the content-type HTML meta tag.
if server_encode is None and content:
first_bytes = content[:512]
begin_h = first_bytes.find('text/html; charset=')
......@@ -109,19 +144,18 @@ class IMDbURLopener(FancyURLopener):
pass
uopener.close()
if size != -1:
for index in xrange(len(self.addheaders)):
if self.addheaders[index][0] == 'Range':
del self.addheaders[index]
self.del_header('Range')
self.close()
except IOError, e:
if size != -1:
for index in xrange(len(self.addheaders)):
if self.addheaders[index][0] == 'Range':
del self.addheaders[index]
# Ensure that the Range header is removed.
self.del_header('Range')
raise IMDbDataAccessError, {'errcode': e.errno,
'errmsg': str(e.strerror),
'url': url,
'proxy': self.proxies.get('http', '')}
'proxy': self.get_proxy(),
'exception type': 'IOError',
'original exception': e}
if encode is None:
encode = 'latin_1'
# The detection of the encoding is error prone...
......@@ -135,14 +169,19 @@ class IMDbURLopener(FancyURLopener):
'errcode': errcode,
'errmsg': errmsg,
'headers': headers,
'proxy': self.proxies.get('http', '')}
'error type': 'http_error_default',
'proxy': self.get_proxy()}
def open_unknown(self, fullurl, data=None):
raise IMDbDataAccessError, {'fullurl': fullurl,
'data': str(data),
'proxy': self.proxies.get('http', '')}
'error type': 'open_unknown',
'proxy': self.get_proxy()}
def open_unknown_proxy(self, proxy, fullurl, data=None):
raise IMDbDataAccessError, {'proxy': str(proxy),
'fullurl': fullurl,
'error type': 'open_unknown_proxy',
'data': str(data)}
......@@ -151,12 +190,11 @@ class IMDbHTTPAccessSystem(IMDbBase):
accessSystem = 'http'
urlOpener = IMDbURLopener()
def __init__(self, isThin=0, adultSearch=1, proxy=-1,
*arguments, **keywords):
"""Initialize the access system."""
IMDbBase.__init__(self, *arguments, **keywords)
self.urlOpener = IMDbURLopener()
# When isThin is set, we're parsing the "maindetails" page
# of a movie (instead of the "combined" page) and movie/person
# references are not collected if no defaultModFunct is provided.
......@@ -171,7 +209,8 @@ class IMDbHTTPAccessSystem(IMDbBase):
from imdb.utils import modNull
self._defModFunct = modNull
self.do_adult_search(adultSearch)
if proxy != -1: self.set_proxy(proxy)
if proxy != -1:
self.set_proxy(proxy)
def _normalize_movieID(self, movieID):
"""Normalize the given movieID."""
......@@ -199,6 +238,10 @@ class IMDbHTTPAccessSystem(IMDbBase):
"""
return personID
def get_proxy(self):
"""Return the used proxy or an empty string."""
return self.urlOpener.get_proxy()
def set_proxy(self, proxy):
"""Set the web proxy to use.
......@@ -207,17 +250,7 @@ class IMDbHTTPAccessSystem(IMDbBase):
If set, the value of the environment variable HTTP_PROXY is
automatically used.
"""
if not proxy:
if self.urlOpener.proxies.has_key('http'):
del self.urlOpener.proxies['http']
else:
if not proxy.lower().startswith('http://'):
proxy = 'http://%s' % proxy
self.urlOpener.proxies['http'] = proxy
def get_proxy(self):
"""Return the used proxy or an empty string."""
return self.urlOpener.proxies.get('http', '')
self.urlOpener.set_proxy(proxy)
def do_adult_search(self, doAdult,
cookie_id=_cookie_id, cookie_uu=_cookie_uu):
......@@ -225,13 +258,11 @@ class IMDbHTTPAccessSystem(IMDbBase):
search results; cookie_id and cookie_uu are optional
parameters to select a specific account (see your cookie
or cookies.txt file."""
for index in xrange(len(self.urlOpener.addheaders)):
if self.urlOpener.addheaders[index][0] == 'Cookie':
del self.urlOpener.addheaders[index]
break
if doAdult:
c_header = 'id=%s; uu=%s' % (cookie_id, cookie_uu)
self.urlOpener.addheaders += [('Cookie', c_header)]
self.urlOpener.set_header('Cookie', c_header)
else:
self.urlOpener.del_header('Cookie')
def _retrieve(self, url, size=-1):
"""Retrieve the given URL."""
......@@ -430,6 +461,14 @@ class IMDbHTTPAccessSystem(IMDbBase):
cont = self._retrieve(imdbURL_movie % movieID + 'tvschedule')
return airing_parser.parse(cont)
def get_movie_synopsis(self, movieID):
cont = self._retrieve(imdbURL_movie % movieID + 'synopsis')
return synopsis_parser.parse(cont)
def get_movie_parents_guide(self, movieID):
cont = self._retrieve(imdbURL_movie % movieID + 'parentalguide')
return parentsguide_parser.parse(cont)
def _search_person(self, name, results):
# The URL of the query.
# XXX: To retrieve the complete results list:
......
This diff is collapsed.
......@@ -427,30 +427,48 @@ class HTMLOtherWorksParser(ParserBase):
self._ow = []
self._cow = u''
self._dostrip = 0
self._seen_hr = 0
self._seen_h5 = 0
self._seen_left_div = 0
def get_data(self):
"""Return the dictionary."""
if not self._ow: return {}
return {self.kind: self._ow}
def start_dd(self, attrs):
self._in_ow = 1
def end_dd(self): pass
def start_b(self, attrs): pass
def end_b(self):
if self._seen_hr: return
if self.kind == 'agent' and self._in_content and self._cow:
self._cow += '::'
self._dostrip = 1
def start_h5(self, attrs): pass
def end_h5(self):
self._seen_h5 = 1
def start_div(self, attrs):
cls = self.get_attr_value(attrs, 'class')
if cls and cls.strip().lower() == 'left':
self._seen_left_div = 1
def end_div(self): pass
def do_hr(self, attrs):
self._seen_hr = 1
def do_br(self, attrs):
if self._seen_hr: return
self._cow = self._cow.strip()
if self._in_content and self._cow:
self._ow.append(self._cow.strip())
self._ow.append(self._cow)
self._cow = u''
def _handle_data(self, data):
if not self._seen_h5: return
if self._seen_hr or self._seen_left_div: return
if self._in_content:
if self._dostrip:
data = data.lstrip()
......
......@@ -25,7 +25,7 @@ along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
"""
from imdb.utils import analyze_title
from imdb.utils import analyze_title, analyze_name
from utils import ParserBase
from imdb.Movie import Movie
......@@ -151,79 +151,108 @@ class BasicMovieParser(ParserBase):
class HTMLSearchMovieParser(ParserBase):
"""Parse the html page that the IMDb web server shows when the
"new search system" is used."""
"new search system" is used, for both movies and persons."""
# Customizations for movie and person parsers.
_k = {
'movie':
{'analyze_f': analyze_title,
'link': '/title',
'in title': 'imdb title'},
'person':
{'analyze_f': analyze_name,
'link': '/name',
'in title': 'imdb name'},
}
def _init(self):
"""Initialize the parser."""
self.kind = 'movie'
self._basic_parser = BasicMovieParser
def _reset(self):
"""Reset the parser."""
self._results = []
self._begin_list = 0
self.