Upgrading to GitLab 11.10.0. Expect errors and see debian-infrastructure-announce@lists.debian.org for further information.

Commit 5842f76f authored by Ana Guerrero López's avatar Ana Guerrero López

Import Upstream version 4.8.2

parent 6d743aa2
Metadata-Version: 1.0
Name: IMDbPY
Version: 4.7
Summary: Python package to access the IMDb's database
Home-page: http://imdbpy.sf.net/
Author: Davide Alberani
Author-email: da@erlug.linux.it
License: GPL
Download-URL: http://imdbpy.sf.net/?page=download
Description: IMDbPY is a Python package useful to retrieve and
manage the data of the IMDb movie database about movies, people,
characters and companies.
Platform-independent and written in pure Python (and few C lines),
it can retrieve data from both the IMDb's web server and a local copy
of the whole database.
IMDbPY package can be very easily used by programmers and developers
to provide access to the IMDb's data to their programs.
Some simple example scripts - useful for the end users - are included
in this package; other IMDbPY-based programs are available at the
home page: http://imdbpy.sf.net/
Keywords: imdb,movie,people,database,cinema,film,person,cast,actor,actress,director,sql,character,company,package,plain text data files,keywords,top250,bottom100,xml
Platform: any
Classifier: Development Status :: 5 - Production/Stable
Classifier: Environment :: Console
Classifier: Environment :: Web Environment
Classifier: Environment :: Handhelds/PDA's
Classifier: Intended Audience :: Developers
Classifier: Intended Audience :: End Users/Desktop
Classifier: License :: OSI Approved :: GNU General Public License (GPL)
Classifier: Natural Language :: English
Classifier: Natural Language :: Italian
Classifier: Natural Language :: Turkish
Classifier: Programming Language :: Python
Classifier: Programming Language :: C
Classifier: Operating System :: OS Independent
Classifier: Topic :: Database :: Front-Ends
Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content :: CGI Tools/Libraries
Classifier: Topic :: Software Development :: Libraries :: Python Modules
MANIFEST.in
ez_setup.py
setup.cfg
setup.py
./bin/get_character.py
./bin/get_company.py
./bin/get_first_character.py
./bin/get_first_company.py
./bin/get_first_movie.py
./bin/get_first_person.py
./bin/get_keyword.py
./bin/get_movie.py
./bin/get_person.py
./bin/get_top_bottom_movies.py
./bin/imdbpy2sql.py
./bin/search_character.py
./bin/search_company.py
./bin/search_keyword.py
./bin/search_movie.py
./bin/search_person.py
IMDbPY.egg-info/PKG-INFO
IMDbPY.egg-info/SOURCES.txt
IMDbPY.egg-info/dependency_links.txt
IMDbPY.egg-info/not-zip-safe
IMDbPY.egg-info/requires.txt
IMDbPY.egg-info/top_level.txt
docs/AUTHOR.txt
docs/CONTRIBUTORS.txt
docs/CREDITS.txt
docs/Changelog.txt
docs/DISCLAIMER.txt
docs/FAQS.txt
docs/GPL.txt
docs/INSTALL.txt
docs/LICENSE.txt
docs/README.adult
docs/README.companies
docs/README.currentRole
docs/README.devel
docs/README.info2xml
docs/README.keywords
docs/README.local
docs/README.locale
docs/README.logging
docs/README.mobile
docs/README.newparsers
docs/README.package
docs/README.redesign
docs/README.series
docs/README.sqldb
docs/README.txt
docs/README.users
docs/README.utf8
docs/TODO.txt
docs/imdbpy.cfg
docs/imdbpy47.dtd
docs/imdbpyPowered.png
docs/imdbpyico.png
docs/imdbpyico.xpm
docs/imdbpyico16x16.ico
docs/imdbpyico32x32.ico
docs/imdbpywin.bmp
docs/goodies/README.txt
docs/goodies/applydiffs.sh
docs/goodies/reduce.sh
imdb/Character.py
imdb/Company.py
imdb/Movie.py
imdb/Person.py
imdb/__init__.py
imdb/_compat.py
imdb/_exceptions.py
imdb/_logging.py
imdb/articles.py
imdb/helpers.py
imdb/utils.py
imdb/locale/__init__.py
imdb/locale/__init__.pyc
imdb/locale/generatepot.py
imdb/locale/imdbpy-en.po
imdb/locale/imdbpy-it.po
imdb/locale/imdbpy-tr.po
imdb/locale/imdbpy.pot
imdb/locale/msgfmt.py
imdb/locale/msgfmt.pyc
imdb/locale/rebuildmo.py
imdb/locale/rebuildmo.pyc
imdb/locale/en/LC_MESSAGES/imdbpy.mo
imdb/locale/it/LC_MESSAGES/imdbpy.mo
imdb/locale/tr/LC_MESSAGES/imdbpy.mo
imdb/parser/__init__.py
imdb/parser/http/__init__.py
imdb/parser/http/characterParser.py
imdb/parser/http/companyParser.py
imdb/parser/http/movieParser.py
imdb/parser/http/personParser.py
imdb/parser/http/searchCharacterParser.py
imdb/parser/http/searchCompanyParser.py
imdb/parser/http/searchKeywordParser.py
imdb/parser/http/searchMovieParser.py
imdb/parser/http/searchPersonParser.py
imdb/parser/http/topBottomParser.py
imdb/parser/http/utils.py
imdb/parser/http/bsouplxml/__init__.py
imdb/parser/http/bsouplxml/_bsoup.py
imdb/parser/http/bsouplxml/bsoupxpath.py
imdb/parser/http/bsouplxml/etree.py
imdb/parser/http/bsouplxml/html.py
imdb/parser/mobile/__init__.py
imdb/parser/sql/__init__.py
imdb/parser/sql/alchemyadapter.py
imdb/parser/sql/cutils.c
imdb/parser/sql/dbschema.py
imdb/parser/sql/objectadapter.py
\ No newline at end of file
SQLObject
SQLAlchemy
sqlalchemy-migrate
lxml
\ No newline at end of file
Metadata-Version: 1.0
Name: IMDbPY
Version: 4.7
Summary: Python package to access the IMDb's database
Home-page: http://imdbpy.sf.net/
Author: Davide Alberani
Author-email: da@erlug.linux.it
License: GPL
Download-URL: http://imdbpy.sf.net/?page=download
Description: IMDbPY is a Python package useful to retrieve and
manage the data of the IMDb movie database about movies, people,
characters and companies.
Platform-independent and written in pure Python (and few C lines),
it can retrieve data from both the IMDb's web server and a local copy
of the whole database.
IMDbPY package can be very easily used by programmers and developers
to provide access to the IMDb's data to their programs.
Some simple example scripts - useful for the end users - are included
in this package; other IMDbPY-based programs are available at the
home page: http://imdbpy.sf.net/
Keywords: imdb,movie,people,database,cinema,film,person,cast,actor,actress,director,sql,character,company,package,plain text data files,keywords,top250,bottom100,xml
Platform: any
Classifier: Development Status :: 5 - Production/Stable
Classifier: Environment :: Console
Classifier: Environment :: Web Environment
Classifier: Environment :: Handhelds/PDA's
Classifier: Intended Audience :: Developers
Classifier: Intended Audience :: End Users/Desktop
Classifier: License :: OSI Approved :: GNU General Public License (GPL)
Classifier: Natural Language :: English
Classifier: Natural Language :: Italian
Classifier: Natural Language :: Turkish
Classifier: Programming Language :: Python
Classifier: Programming Language :: C
Classifier: Operating System :: OS Independent
Classifier: Topic :: Database :: Front-Ends
Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content :: CGI Tools/Libraries
Classifier: Topic :: Software Development :: Libraries :: Python Modules
......@@ -5,7 +5,7 @@ imdbpy2sql.py script.
This script puts the data of the plain text data files into a
SQL database.
Copyright 2005-2010 Davide Alberani <da@erlug.linux.it>
Copyright 2005-2011 Davide Alberani <da@erlug.linux.it>
2006 Giuseppe "Cowo" Corbelli <cowo --> lugbs.linux.it>
This program is free software; you can redistribute it and/or modify
......@@ -72,8 +72,9 @@ HELP = """imdbpy2sql.py usage:
IMDB_PTDF_DIR = None
# URI used to connect to the database.
URI = None
# ORM to use.
# ORM to use (list of options) and actually used (string).
USE_ORM = None
USED_ORM = None
# List of tables of the database.
DB_TABLES = []
# Max allowed recursion, inserting data.
......@@ -292,17 +293,18 @@ for idx, mod in enumerate(USE_ORM):
globals()[t._imdbpyName] = t
if _gotError:
warnings.warn('falling back to "%s".' % mod)
USED_ORM = mod
break
except ImportError, e:
if idx+1 >= nrMods:
raise IMDbError, 'unable to use any ORM in %s: %s' % (
str(USE_ORM), str(e))
raise IMDbError('unable to use any ORM in %s: %s' % (
str(USE_ORM), str(e)))
else:
warnings.warn('unable to use "%s": %s' % (mod, str(e)))
_gotError = True
continue
else:
raise IMDbError, 'unable to use any ORM in %s' % str(USE_ORM)
raise IMDbError('unable to use any ORM in %s' % str(USE_ORM))
#-----------------------
......@@ -515,6 +517,13 @@ except AttributeError, e:
IntegrityError = Exception
connectObject = conn.getConnection()
# XXX: fix for a problem that should be fixed in objectadapter.py (see it).
if URI and URI.startswith('sqlite') and USED_ORM == 'sqlobject':
major = sys.version_info[0]
minor = sys.version_info[1]
if major > 2 or (major == 2 and minor > 5):
connectObject.text_factory = str
# Cursor object.
CURS = connectObject.cursor()
......@@ -918,7 +927,6 @@ class _BaseCache(dict):
except Exception, e:
if isinstance(e, KeyboardInterrupt):
raise
raise
print 'WARNING: unknown exception caught committing the data'
print 'WARNING: to the database; report this as a bug, since'
print 'WARNING: many data (%d items) were lost: %s' % \
......@@ -1297,10 +1305,10 @@ class SQLData(dict):
d={}, flushEvery=20000, counterInit=1):
if not sqlString:
if not (table and cols):
raise TypeError, '"table" or "cols" unspecified'
raise TypeError('"table" or "cols" unspecified')
sqlString, converter = createSQLstr(table, cols)
elif converter is None:
raise TypeError, '"sqlString" or "converter" unspecified'
raise TypeError('"sqlString" or "converter" unspecified')
dict.__init__(self)
self.counterInit = counterInit
self.counter = counterInit
......@@ -2743,7 +2751,7 @@ def restoreCSV():
# begin the iterations...
def run():
print 'RUNNING imdbpy2sql.py'
print 'RUNNING imdbpy2sql.py using the %s ORM' % USED_ORM
executeCustomQueries('BEGIN')
......
......@@ -21,6 +21,10 @@ Actually, besides Turgut, Giuseppe and me, these other people are
listed as developers for the IMDbPY project on sourceforge and may
share copyright on some (minor) portions of the code:
NAME: Alberto Malagoli
CONTRIBUTION: developed the new web site, and detain the copyright of it.
NAME: Martin Kirst
EMAIL: <martin.kirst --> s1998.tu-chemnitz.de>
CONTRIBUTION: has done an important refactoring of the imdbpyweb
......
......@@ -21,6 +21,46 @@ of help, and also for the wonderful http://bitbucket.org)
Below, a list of persons who contributed with bug reports, small
patches and hints (kept in a reverse order since IMDbPY 4.5):
* Arfrever Frehtes Taifersar Arahesis for a patch for locales.
* Gustaf Nilsson for bug reports about BeautifulSoup.
* Jernej Kos for patches to handle "in production" information
and birth/death years.
* Saravanan Thirumuruganathan for a bug report about genres in mobile.
* Paul Koan, for a bug report about DVD pages and movie references.
* Greg Walters for a report about a bug with queries with too
many results.
* Olav Kolbu for tests and report about how the IMDb.com servers
reply to queries made with and without cookies.
* Jef "ofthelit", for a patch for the reduce.sh script bug
reports for Windows.
* "Darklow" for an impressive amount of tests and reports about
a bug about data parsing in the plain text data files.
* Reiner Herrmann for benchmarks using SSD hard drives.
* Thomas Stewart for some tests and reports about a bug
with charset in the plain text data files.
* Ju-Hee Bae for an important series of bug reports about
the problems derived by the last IMDb's redesign.
* Luis Liras and Petite Abeille for a report and a bugfix about
imdbpy2sql.py used with SQLite and SQLObject.
* Kevin S. Anthony for a bug report about episodes list.
* Bhupinder Singh for a bug report about exception handling in Python 2.4.
* Ronald Hatcher for a bug report on the GAE environment.
* Ramusus for a lot of precious bug reports.
* Laurent Vergne for a hint about InnoDB, MyISAM and foreign keys.
......
Changelog for IMDbPY
====================
* What's the new in release 4.8.2 "The Big Bang Theory" (02 Nov 2011)
[general]
- fixed install path of locales.
[http]
- removed debug code.
* What's the new in release 4.8 "Super" (01 Nov 2011)
[general]
- fix for a problem managing exceptions with Python 2.4.
- converted old-style exceptions to instances.
- enanchements for the reduce.sh script.
- added notes about problems connecting to IMDb's web servers.
- improvements in the parsers of movie titles.
- improvements in the parser of person names.
[http]
- potential fix for GAE environment.
- handled the new style of "in production" information.
- fix for 'episodes' list.
- fix for 'episodes rating'.
- fix for queries that returned too many results.
- fix for wrong/missing references.
- removed no more available information set "amazon
reviews" and "dvd".
- fix for cast of tv series.
- fix for title of tv series.
- now the beautiful parses work again.
[httpThin]
- removed "httpThin", falling back to "http".
[mobile]
- fix for missing headshots.
- fix for rating and number of votes.
- fix for missing genres.
- many other fixes to keep up-to-date with the IMDb site.
[sql]
- fix for a nasty bug parsing notes about character names.
- fixes for SQLite with SQLOjbect.
* What's the new in release 4.7 "Saw VI" (23 Jan 2011)
[http]
- first fixes for the new set of parsers.
......
......@@ -86,7 +86,7 @@ Q6: using a sql database, how can I convert a movieID (whose value
is valid only locally) to an imdbID (the ID used by the imdb.com site)?
A6: various functions can be used to convert a movieID (or personID or
other IDs) to the imdbID used by the seb site.
other IDs) to the imdbID used by the web site.
Example of code:
from imdb import IMDb
......@@ -98,7 +98,7 @@ A6: various functions can be used to convert a movieID (or personID or
It goes without saying that get_imdbMovieID has some sibling
methods: get_imdbPersonID, get_imdbCompanyID and get_imdbCharacterID.
Also notice that the get_imdbID method is smater, and takes any kind
Also notice that the get_imdbID method is smarter, and takes any kind
of instance (the other functions need a movieID, personID, ...)
Another method that will try to retrieve the imdbID is get_imdbURL,
......
......@@ -50,7 +50,9 @@ E.g.:
The do_adult_search() method of the http and mobile data access system
also takes another couple of arguments: "cookie_id" and "cookie_uu", so
that you can select _your own_ IMDb's account.
that you can select _your own_ IMDb's account; if cookie_id is set to
None, no cookies are sent. These parameters can also be set in
the imdbpy.cfg configuration file.
For the strings to use, see your "cookie" or "cookie.txt" file.
Obviously you need to activate the "adult movies" option for
your account; see http://imdb.com/find/preferences?_adult=1
......
IMDbPY HTTP CONNECTION
======================
HTTP is the default data access system of IMDbPY, meaning that by default
data are requested at the IMDb web servers.
For other kinds of data access, see README.sqldb and README.mobile.
By default IMDbPY uses its own account to access the IMDb web server (this
is done to enable searches on adult titles); if you want to uses your own
account, see README.adult.
CONNECTION PROBLEMS
===================
It has been reported some kind of problems connecting to the IMDb servers;
the problem seems to be related to the use of our cookie and the geographical
location of the user.
If you experience such a problem, report it and try to disable the use of the
cookie (to do so, see README.adult).
......@@ -164,9 +164,14 @@ complete plain text data files set (as of 11 Apr 2008, with more than
| Timed with the "--sqlite-transactions" command
| line option; otherwise it's _really_ slow: even
| 35 hours or more.
SQLite 3.7 | 65/13 - with --sqlite-transactions and using a SSD hard disk
SQL Server | about 3 or 4 hours.
If you have different experiences, please tell me!
As expected, the most important things that you can do to improve performances are:
1. use an in-memory filesystem or an SSD disk.
2. use the -c /path/to/empty/dir argument to use CSV files.
3. follow the specific notes about your database server.
NOTES
......
......@@ -39,6 +39,11 @@ with - respectively - these commands (as root):
easy_install IMDbPY
pip install IMDbPY
Using easy_install and pip, the dependencies will be automatically
satisfied. Third-party packages may be downloaded, and if not
otherwise specified (see below), C extensions compiled (this means
that you need the python-dev package installed).
If, for some reason, it doesn't work, you can copy the "./imdb"
directory in the local site-packages directory of the python
major version you're using, but remember that you'll not satisfy
......@@ -106,6 +111,7 @@ package may be required for certain functionality:
All of them should probably be "recommended" (or at least "suggested")
dependencies.
To compile the C module, you also need the python-dev package.
As of IMDbPY 4.0, the installer is based on setuptools.
......
#!/bin/sh
#!/bin/bash
#
# reduce.sh: Bash script useful to create a "slimmed down" version of the
# IMDb's plain text data files.
......@@ -11,6 +11,10 @@
# This program is released under the terms of the GNU GPL 2 or later license.
#
# Cygwin packages to install (Windows):
# - util-unix for rev
# - gzip for gzip, zcat, zgrep
# Directory with the plain text data file.
ORIG_DIR="."
# Directory where "reduced" files will be stored; it will be create if needed.
......@@ -38,10 +42,10 @@ do
CONSIDER="`expr $LINES / $DIV_BY`"
FULL_CONS="$CONSIDER"
CONSIDER="`expr $CONSIDER / 2`"
NEWNAME="`echo "$file" | rev | cut -c 4- | rev`"
NEWNAME="`echo "$file" | rev | cut -c 4- | rev `"
# Tries to keep enough lines from the top of the file.
MIN_TOP_LINES="`zgrep -m 1 "^-----------------------------------------" -n "$file" | cut -d : -f 1`"
MIN_TOP_LINES="`zgrep -n -m 1 "^-----------------------------------------" "$file" | cut -d : -f 1`"
if test -z "$MIN_TOP_LINES" ; then
MIN_TOP_LINES=0
fi
......@@ -86,7 +90,7 @@ do
else
MIN_TOP_LINES="`expr $MIN_TOP_LINES + 60`"
fi
if test $MIN_TOP_LINES -gt $CONSIDER ; then
if test "$MIN_TOP_LINES" -gt "$CONSIDER" ; then
TOP_CONSIDER=$MIN_TOP_LINES
else
TOP_CONSIDER=$CONSIDER
......
<!--
XML Document Type Definition for IMDbPY 4.6.
XML Document Type Definition for IMDbPY 4.8.
http://imdbpy.sf.net/dtd/imdbpy46.dtd
http://imdbpy.sf.net/dtd/imdbpy48.dtd
Copyright 2009 H. Turgut Uyar <uyar@tekir.org>
2009-2010 Davide Alberani <da@erlug.linux.it>
2009-2011 Davide Alberani <da@erlug.linux.it>
-->
......@@ -49,6 +49,7 @@
| costume-department
| costume-designer
| countries
| country-codes
| cover-url
| crazy-credits
| creator
......@@ -76,6 +77,7 @@
| kind
| laboratory
| languages
| language-codes
| laserdisc
| literature
| locations
......@@ -446,6 +448,8 @@
<!ATTLIST complete-crew %common.attrs;>
<!ELEMENT countries (item)*>
<!ATTLIST countries %common.attrs;>
<!ELEMENT country-codes (item)*>
<!ATTLIST country-codes %common.attrs;>
<!ELEMENT crazy-credits (item)*>
<!ATTLIST crazy-credits %common.attrs;>
<!ELEMENT demographic (item)*>
......@@ -470,6 +474,8 @@
<!ATTLIST laboratory %common.attrs;>
<!ELEMENT languages (item)*>
<!ATTLIST languages %common.attrs;>
<!ELEMENT language-codes (item)*>
<!ATTLIST language-codes %common.attrs;>
<!ELEMENT locations (item)*>
<!ATTLIST locations %common.attrs;>
<!ELEMENT magazine-cover-photo (item)*>
......
......@@ -86,8 +86,12 @@ class Character(_Container):
def set_name(self, name):
"""Set the name of the character."""
# XXX: convert name to unicode, if it's a plain string?
d = analyze_name(name, canonical=0)
self.data.update(d)
try:
d = analyze_name(name, canonical=0)
self.data.update(d)
except:
# TODO: catch only IMDbPYParserError and issue a warning.
pass
def _additional_keys(self):
"""Valid keys to append to the data.keys() list."""
......
This diff is collapsed.
......@@ -67,6 +67,6 @@ if os.name == 'e32':
return (int(res[0]), int(res[1]), int(res[2]),
0, 0, 0, 0, 1, 0)
except:
raise ValueError, u'error in IMDbPY\'s ad-hoc strptime!'
raise ValueError('error in IMDbPY\'s ad-hoc strptime!')
time.strptime = strptime
......@@ -33,7 +33,7 @@ class IMDbError(Exception):
self._logger.critical('%s exception raised; args: %s; kwds: %s',
self.__class__.__name__, args, kwargs,
exc_info=True)
super(IMDbError, self).__init__(*args, **kwargs)
Exception.__init__(self, *args, **kwargs)
class IMDbDataAccessError(IMDbError):
"""Exception raised when is not possible to access needed data."""
......@@ -43,4 +43,3 @@ class IMDbParserError(IMDbError):
"""Exception raised when an error occurred parsing the data."""
pass
This diff is collapsed.
......@@ -357,7 +357,7 @@ class PredicateFilter:
first = node[attribute_name]
return first.startswith(self.arguments[2])
elif self.arguments[1] == 'text()':
first = node.contents[0]
first = node.contents and node.contents[0]
if isinstance(first, BeautifulSoup.NavigableString):
return first.startswith(self.arguments[2])
return False
......
This diff is collapsed.
......@@ -8,7 +8,7 @@ E.g., for "Mel Gibson" the referred pages would be:
biography: http://akas.imdb.com/name/nm0000154/bio
...and so on...
Copyright 2004-2010 Davide Alberani <da@erlug.linux.it>
Copyright 2004-20101 Davide Alberani <da@erlug.linux.it>
2008 H. Turgut Uyar <uyar@tekir.org>
This program is free software; you can redistribute it and/or modify
......@@ -62,24 +62,12 @@ class DOMHTMLMaindetailsParser(DOMParserBase):
_containsObjects = True
_birth_attrs = [Attribute(key='birth date',
path={
'day': ".//a[starts-with(@href, " \
"'/date/')]/text()",
'year': ".//a[starts-with(@href, " \
"'/search/name?birth_year=')]/text()"
},
postprocess=build_date),
path='.//time[@itemprop="birthDate"]/@datetime'),
Attribute(key='birth place',
path=".//a[starts-with(@href, " \
"'/search/name?birth_place=')]/text()")]
_death_attrs = [Attribute(key='death date',
path={
'day': ".//a[starts-with(@href, " \
"'/date/')]/text()",
'year': ".//a[starts-with(@href, " \
"'/search/name?death_year=')]/text()"
},
postprocess=build_date),
path='.//time[@itemprop="deathDate"]/@datetime'),
Attribute(key='death place',
path=".//a[starts-with(@href, " \
"'/search/name?death_place=')]/text()")]
......@@ -495,45 +483,6 @@ class DOMHTMLPersonGenresParser(DOMParserBase):
return {self.kind: data}
from movieParser import _parse_merchandising_link
class DOMHTMLPersonSalesParser(DOMParserBase):
"""Parser for the "merchandising links" page of a given person.
The page should be provided as a string, as taken from
the akas.imdb.com server. The final result will be a
dictionary, with a key for every relevant section.
Example:
sparser = DOMHTMLPersonSalesParser()
result = sparser.parse(sales_html_string)
"""
extractors = [
Extractor(label='merchandising links',
group="//span[@class='merch_title']",
group_key=".//text()",
path="./following-sibling::table[1]/" \
"/td[@class='w_rowtable_colshop']//tr[1]",
attrs=Attribute(key=None,
multi=True,
path={
'link': "./td[2]/a[1]/@href",
'text': "./td[1]/img[1]/@alt",
'cover': "./ancestor::td[1]/../" \
"td[1]/a[1]/img[1]/@src",
},
postprocess=_parse_merchandising_link)),
]
preprocessors = [
(re.compile('(<a name="[^"]+" )/>', re.I), r'\1></a>')
]
def postprocess_data(self, data):
if len(data) == 0:
return {}
return {'merchandising links': data}
from movieParser import DOMHTMLTechParser
from movieParser import DOMHTMLOfficialsitesParser
from movieParser import DOMHTMLAwardsParser
......@@ -554,6 +503,5 @@ _OBJECTS = {
'person_keywords_parser': ((DOMHTMLPersonGenresParser,),
{'kind': 'keywords'}),
'news_parser': ((DOMHTMLNewsParser,), None),
'sales_parser': ((DOMHTMLPersonSalesParser,), None)
}
......@@ -154,14 +154,18 @@ class DOMHTMLSearchMovieParser(DOMParserBase):
# Horrible hack to support AKAs.
if data and data['data'] and len(data['data'][0]) == 3 and \
isinstance(data['data'][0], tuple):
data['data'] = [x for x in data['data'] if x[0] and x[1]]
for idx, datum in enumerate(data['data']):
if not isinstance(datum, tuple):
continue
if not datum[0] and datum[1]:
continue
if datum[2] is not None:
akas = filter(None, datum[2].split('::'))
if self._linkPrefix == '/title/tt':
akas = [a.replace('" - ', '::').rstrip() for a in akas]
akas = [a.replace('aka "', '', 1).lstrip() for a in akas]
akas = [a.replace('aka "', '', 1).replace('aka "',
'', 1).lstrip() for a in akas]
datum[1]['akas'] = akas
data['data'][idx] = (datum[0], datum[1])