Commit 7c5df7e6 authored by Ana Guerrero López's avatar Ana Guerrero López

Import Upstream version 3.3

parent 9e0f4d0f
#!/usr/bin/env python
"""
get_character.py
Usage: get_character "characterID"
Show some info about the character with the given imdbID (e.g. '0000001'
for "Jesse James".
"""
# Parameters to initialize the IMDb class.
IMDB_PARAMS = {
# The used access system. 'web' means that you're retrieving data
# from the IMDb web server.
'accessSystem': 'web'
#'accessSystem': 'mobile'
# XXX: if you've a local installation of the IMDb database,
# comment the above line and uncomment the following two.
#'accessSystem': 'local',
#'dbDirectory': '/usr/local/imdb' # or, in a Windows environment:
#'dbDirectory': 'D:/imdb-20060107'
# XXX: parameters for a SQL installation.
#'accessSystem': 'sql',
#'uri': 'mysql://userName:yourPassword@localhost/dbName'
}
import sys
# Import the IMDbPY package.
try:
import imdb
except ImportError:
print 'You bad boy! You need to install the IMDbPY package!'
sys.exit(1)
if len(sys.argv) != 2:
print 'Only one argument is required:'
print ' %s "imdbID"' % sys.argv[0]
sys.exit(2)
imdbID = sys.argv[1]
i = imdb.IMDb(**IMDB_PARAMS)
out_encoding = sys.stdout.encoding or sys.getdefaultencoding()
try:
# Get a character object with the data about the character identified by
# the given imdbID.
character = i.get_character(imdbID)
except imdb.IMDbError, e:
print "Probably you're not connected to Internet. Complete error report:"
print e
sys.exit(3)
if not character:
print 'It seems that there\'s no character with imdbID "%s"' % imdbID
sys.exit(4)
# XXX: this is the easier way to print the main info about a character;
# calling the summary() method of a character object will returns a string
# with the main information about the character.
# Obviously it's not really meaningful if you want to know how
# to access the data stored in a character object, so look below; the
# commented lines show some ways to retrieve information from a
# character object.
print character.summary().encode(out_encoding, 'replace')
#!/usr/bin/env python
"""
get_first_character.py
Usage: get_first_character "character name"
Search for the given name and print the best matching result.
"""
# Parameters to initialize the IMDb class.
IMDB_PARAMS = {
# The used access system. 'web' means that you're retrieving data
# from the IMDb web server.
'accessSystem': 'web'
#'accessSystem': 'mobile'
# XXX: if you've a local installation of the IMDb database,
# comment the above line and uncomment the following two.
#'accessSystem': 'local',
#'dbDirectory': '/usr/local/imdb' # or, in a Windows environment:
#'dbDirectory': 'D:/imdb-20060107'
# XXX: parameters for a SQL installation.
#'accessSystem': 'sql',
#'uri': 'mysql://userName:yourPassword@localhost/dbName'
}
import sys
# Import the IMDbPY package.
try:
import imdb
except ImportError:
print 'You bad boy! You need to install the IMDbPY package!'
sys.exit(1)
if len(sys.argv) != 2:
print 'Only one argument is required:'
print ' %s "character name"' % sys.argv[0]
sys.exit(2)
name = sys.argv[1]
i = imdb.IMDb(**IMDB_PARAMS)
in_encoding = sys.stdin.encoding or sys.getdefaultencoding()
out_encoding = sys.stdout.encoding or sys.getdefaultencoding()
name = unicode(name, in_encoding, 'replace')
try:
# Do the search, and get the results (a list of character objects).
results = i.search_character(name)
except imdb.IMDbError, e:
print "Probably you're not connected to Internet. Complete error report:"
print e
sys.exit(3)
if not results:
print 'No matches for "%s", sorry.' % name.encode(out_encoding, 'replace')
sys.exit(0)
# Print only the first result.
print ' Best match for "%s"' % name.encode(out_encoding, 'replace')
# This is a character instance.
character = results[0]
# So far the character object only contains basic information like the
# name; retrieve main information:
i.update(character)
print character.summary().encode(out_encoding, 'replace')
This diff is collapsed.
#!/usr/bin/env python
"""
search_character.py
Usage: search_character "character name"
Search for the given name and print the results.
"""
# Parameters to initialize the IMDb class.
IMDB_PARAMS = {
# The used access system. 'web' means that you're retrieving data
# from the IMDb web server.
'accessSystem': 'web'
#'accessSystem': 'mobile'
# XXX: if you've a local installation of the IMDb database,
# comment the above line and uncomment the following two.
#'accessSystem': 'local',
#'dbDirectory': '/usr/local/imdb' # or, in a Windows environment:
#'dbDirectory': 'D:/imdb-20060107'
# XXX: parameters for a SQL installation.
#'accessSystem': 'sql',
#'uri': 'mysql://userName:yourPassword@localhost/dbName'
}
import sys
# Import the IMDbPY package.
try:
import imdb
except ImportError:
print 'You bad boy! You need to install the IMDbPY package!'
sys.exit(1)
if len(sys.argv) != 2:
print 'Only one argument is required:'
print ' %s "character name"' % sys.argv[0]
sys.exit(2)
name = sys.argv[1]
i = imdb.IMDb(**IMDB_PARAMS)
in_encoding = sys.stdin.encoding or sys.getdefaultencoding()
out_encoding = sys.stdout.encoding or sys.getdefaultencoding()
name = unicode(name, in_encoding, 'replace')
try:
# Do the search, and get the results (a list of character objects).
results = i.search_character(name)
except imdb.IMDbError, e:
print "Probably you're not connected to Internet. Complete error report:"
print e
sys.exit(3)
# Print the results.
print ' %s result%s for "%s":' % (len(results),
('', 's')[len(results) != 1],
name.encode(out_encoding, 'replace'))
print 'characterID\t: imdbID : name'
# Print the long imdb name for every character.
for character in results:
print '%s\t\t: %s : %s' % (character.characterID,
i.get_imdbID(character),
character['long imdb name'].encode(out_encoding, 'replace'))
......@@ -13,11 +13,19 @@ I'd like to thank the following people for their help:
* H. Turgut Uyar for a number of bug reports and a lot of work on
the test-suite.
* Jesper Nøhr for a lot of testing, especially on the 'sql'.
* Jesper Nøhr for a lot of testing, especially on 'sql'.
* Ken R. Garland for a bug report about 'cover url' and a lot of
other hints.
* Steven Ovits for hints and tests with Microsoft SQL Server, SQLExpress
and preliminary work on supporting diff files.
* Arnab for a bug report in the imdbpy2sql.py script.
* Elefterios Stamatogiannakis for the hint about transactions and SQLite,
to obtain an impressive improvement in performances.
* Jon Sabo for a bug report about unicode and the imdbpy2sql.py script
and some feedback.
......
Changelog for IMDbPY
====================
* What's the new in release 3.3 "Heroes" (18 Nov 2007)
[general]
- first support for character pages; only for "http" and "mobile", so far.
- support for multiple characters.
- introduced an helper function to pretty-print objects.
- added README.currentRole.
- fixed minor bug in the __hash__ method of the _Container class.
- fixed changes to some key names for movies.
- introduced the search_character.py, get_character.py and
get_first_character.py example scripts.
[http]
- full support for character pages.
- fixed a bug retrieving some 'cover url'.
- fixed a bug with multi-paragraphs biographies.
- parsers are now instanced on demand.
- accessSystem and modFunct are correctly set for every Movie, Person
and Character object instanced.
[mobile]
- full support for character pages.
[sql]
- extended functionality of the custom queries support for the
imdbpy2sql.py script to circumvent a problem with MS SQLServer.
- introducted the "--mysql-innodb" and "--ms-sqlserver" shortcuts
for the imdbpy2sql.py script.
- introduced the "--sqlite-transactions" shortcut to activate
transaction using SQLite which, otherwise, would have horrible
performances.
- fixed a minor bug with top/bottom ratings, in the imdbpy2sql.py script.
[local]
- filtered out some crap in the "quotes" plain text data files, which
also affected sql, importing the data.
* What's the new in release 3.2 "Videodrome" (25 Sep 2007)
[global]
- now there's an unique place where "akas.imdb.com" is set, in the
......
THE currentRole ATTRIBUTE AND THE Character CLASS
=================================================
Since version 3.3, IMDbPY supports the character pages of the IMDb
database; this required some substantial changes to how actors'
and acresses' roles were handled.
So far, only the "http", "httpThin" and "mobile" data access systems
can manage the character pages.
The currentRole instance attribute can be found in every instance
of Person, Movie and Character classes, even if actually the Character
never uses it.
The currentRole of a Person object is set to a Character instance,
inside a list of person who acted in a given movie.
The currentRole of a Movie object is set to a Character instance,
inside a list of movies played be given person.
The currentRole of a Movie object is set to a Person instance,
inside a list of movies in which a given character was portrayed.
Schema:
movie['cast'][0].currentRole -> a Character object.
|
+-> a Person object.
person['actor'][0].currentRole -> a Character object.
|
+-> a Movie object.
character['filmography'][0].currentRole -> a Person object.
|
+-> a Movie object.
The roleID attribute can be used to access/set the characterID
or personID instance attribute of the current currentRole.
Building Movie or Person objects, you can pass the currentRole
parameter and the roleID parameter (to set the ID).
The currentRole parameter can be an object (Character or Person),
an unicode string (in which case a Character or Person object is
automatically instanced) or a list of objects or strings (to
handle multiple characters played by the same actor/actress in
a movie, or character played by more then a single actor/actress
in the same movie).
Anyway, currentRole objects (Character or Person instances) can
be pretty-printed easily: calling unicode(CharacterOrPersonObject)
will return a good-old-unicode string, like expected in the previous
version of IMDbPY.
GOODIES
=======
To help getting the required information from Movie, Person and
Character objects, in the "helpers" module there's a new factory
function, makeObject2Txt, which can be used to create your
pretty-printing function.
It takes some optional parameters: movieTxt, personTxt, characterTxt;
in these strings %(value)s items are replaced with object['value'] or
with obj.value (if the first is not present).
E.g.:
import imdb
myPrint = imdb.helpers.makeObject2Txt(personTxt=u'%(name)s ... %(currentRole)s')
i = imdb.IMDb()
m = i.get_movie('0057012')
ps = m['cast'][0]
print myPrint(ps)
# The output will be something like:
Peter Sellers ... Group Captain Lionel Mandrake / President Merkin Muffley / Dr. Strangelove
Portions of the formatting string can be stripped conditionally: if
the specified condition is false, they will be cancelled.
E.g.:
myPrint = imdb.helpers.makeObject2Txt(personTxt='<if personID><a href=/person/%(personID)s></if personID>%(long imdb name)s<if personID></a></if personID><if currentRole> ... %(currentRole)s<if notes> %(notes)s</if notes></if currentRole>'
Another useful argumento is 'applyToValues': if set to a function,
it will be applied to every value before the substitution; it can
be useful to format strings for html output.
......@@ -18,6 +18,7 @@ imdb (package)
+-> _exceptions
+-> Movie
+-> Person
+-> Character
+-> utils
+-> helpers
+-> parser (package)
......@@ -26,8 +27,10 @@ imdb (package)
| |
| +-> movieParser
| +-> personParser
| +-> characterParser
| +-> searchMovieParser
| +-> searchPersonParser
| +-> searchCharacterParser
| +-> utils
|
+-> local (package)
......@@ -54,6 +57,8 @@ imdb (package): contains the IMDb function, the IMDbBase class and imports
_exceptions: defines the exceptions internally used.
Movie: contains the Movie class, used to describe and manage a movie.
Person: contains the Person class, used to describe and manage a person.
Character: contains the Character class, used to describe and manage
a character.
utils: miscellaneous utilities used by many IMDbPY modules.
parser (package): a package containing a package for every data access system
implemented.
......@@ -66,10 +71,14 @@ http.movieParser: parse html strings from the pages on the IMDb web server about
a movie; returns dictionaries of {key: value}
http.personParser: parse html strings from the pages on the IMDb web server
about a person; returns dictionaries.
http.characterParser: parse html strings from the pages on the IMDb web server
about a character; returns dictionaries.
http.searchMovieParser: parse an html string, result of a query for a movie
title.
http.searchPersonParser: parse an html string, result of a query for a person
name.
http.searchCharacterParser: parse an html string, result of a query for a
character name.
http.utils: miscellaneous utilities used only by the http package.
The modules under the parser.local package are the same of the
......@@ -109,15 +118,16 @@ IMDbPY-based programs.
===================
I wanted to stay independent from the source of the data for a given
movie/person, and so the imdb.IMDb function returns an instance of a class
that provides specific methods to access a given data source (web server,
local installation, SQL database, etc.)
movie/person/character, and so the imdb.IMDb function returns an instance
of a class that provides specific methods to access a given data
source (web server, local installation, SQL database, etc.)
Unfortunately that means that the movieID in the Movie class and the
personID in the Person class are dependent on the data access system
used. So, when a Movie or Person object is instantiated, the accessSystem
instance variable is set to a string used to identify the used data access
system.
Unfortunately that means that the movieID in the Movie class, the
personID in the Person class and the characterID in the Character class
are dependent on the data access system used.
So, when a Movie, a Person or a Character object is instantiated, the
accessSystem instance variable is set to a string used to identify the
used data access system.
HOW TO EXTEND
......@@ -130,6 +140,9 @@ of the imdb.IMDb class which must define at least the following methods:
list of (movieID, {movieData}) tuples.
_search_person(name) - to search for a given name; must return a
list of (movieID, {personData}) tuples.
_search_character(name) - to search for a given character's name; must
return a list of (characterID, {characterData})
tuples.
get_movie_*(movieID) - a set of methods, one for every set of information
defined for a Movie object; should return
a dictionary with the relative information.
......@@ -142,13 +155,21 @@ of the imdb.IMDb class which must define at least the following methods:
get_person_*(personID) - a set of methods, one for every set of information
defined for a Person object; should return
a dictionary with the relative information.
get_character_*(characterID) - a set of methods, one for every set of
information defined for a Character object;
should return a dictionary with the relative
information.
get_imdbMovieID(movieID) - must convert the given movieID to a string
representing the imdbID, as used by the IMDb web
server (e.g.: '0094226' for Brian De Palma's
"The Untouchables").
get_imdbPersonID(personID) - must convert the given personID a string
get_imdbPersonID(personID) - must convert the given personID to a string
representing the imdbID, as used by the IMDb web
server (e.g.: '0000154' for "Mel Gibson").
get_imdbCharacterID(characterID) - must convert the given characterID to a
string representing the imdbID, as used by
the IMDb web server (e.g.: '0000001' for
"Jesse James").
_normalize_movieID(movieID) - must convert the provided movieID in a
format suitable for internal use (e.g.:
convert a string to a long int).
......@@ -161,9 +182,11 @@ of the imdb.IMDb class which must define at least the following methods:
Rationale: a movieID can be passed from the
command line, or from a web browser.
_normalize_personID(personID) - idem.
_normalize_characterID(characterID) - idem.
_get_real_movieID(movieID) - return the true movieID; useful to handle
title aliases.
_get_real_personID(personID) - idem.
_get_real_characterID(characterID) - idem.
The class should raise the appropriate exceptions, when needed;
IMDbDataAccessError must be raised when you cannot access the resource
......
......@@ -10,10 +10,11 @@ Sections in this file:
* GENERAL USAGE
* THE Movie CLASS
* THE Person CLASS
* THE Character CLASS
* INFORMATION SETS
* Person OBJECTS INSIDE A Movie CLASS AND Movie OBJECTS INSIDE A Person OBJECT
* THE (NOT-SO-)"UNIVERSAL" '::' SEPARATOR
* MOVIE TITLES AND PERSON NAMES REFERENCES
* MOVIE TITLES AND PERSON/CHARACTER NAMES REFERENCES
* EXCEPTIONS
* OTHER SOURCES OF INFO
......@@ -42,7 +43,8 @@ where '/dir/with/files' is the directory which contains the *.data,
Now you've the "imdb_access" object, instance of a subclass
of the imdb.IMDbBase class, which can be used to search for a given
title/name and to retrieve information about the referred movie/person.
title/name and to retrieve information about the referred movie,
person or character.
The IMDb function can be called with a 'accessSystem' keyword argument,
that must be a string representing the type of data access you want
......@@ -74,8 +76,9 @@ you can access movie data through the e-mail interface, etc. etc.
| 'htmlThin'| information are gathered; useful
| | for systems with limited bandwidth.
The imdb_access object has five main methods: search_movie(title),
get_movie(movieID), search_person(name), get_person(personID)
The imdb_access object has seven main methods: search_movie(title),
get_movie(movieID), search_person(name), get_person(personID),
search_character(name), get_character(characterID)
and update(MovieOrPersonObject)
Methods description:
......@@ -107,35 +110,38 @@ found in the Movie module; a Movie object presents basically the same
interface of a Python's dictionary, so you can access, for example, the
list of actors and actress using the syntax: movieObject['cast']
The search_person(name) and get_person(personID) methods work the same
way as search_movie(title) and get_movie(movieID).
The search_person(name), get_person(personID) search_character(name)
and get_character(characterID) methods work the same way as
search_movie(title) and get_movie(movieID).
The get_imdbMovieID(movieID) and get_imdbPersonID(personID) take,
respectively, a movieID and a personID and return the relative imdbID;
it's safer to use the get_imdbID(MovieOrPersonObject) method.
The get_imdbMovieID(movieID), get_imdbPersonID(personID) and
get_imdbCharacterID(characterID) take, respectively, a movieID, a personID
and a movieID and return the relative imdbID; it's safer to use the
get_imdbID(MovieOrPersonOrCharacterObject) method.
The title2imdbID(title) and name2imdbID(name) take, respectively,
a movie title (in the plain text data files format) and a person name,
and return the relative imdbID; when possibile it's safer to use
the get_imdbID(MovieOrPersonObject) method.
The title2imdbID(title), name2imdbID(name) and character2imdbID(name)take,
respectively, a movie title (in the plain text data files format), a person
name and a character name, and return the relative imdbID; when possibile
it's safer to use the get_imdbID(MovieOrPersonOrCharacterObject) method.
These functions _always_ need to connect to the IMDb's web site.
The get_imdbID(MovieOrPersonObject) method returns the imdbID for
the given Movie or Person object.
The get_imdbID(MovieOrPersonOrCharacterObject) method returns the imdbID
for the given Movie, Person or Character object.
The get_imdbURL(MovieOrPersonObject) method returns a string with the main
IMDb URL for the given Movie or Person object; it tries to do its best
to retrieve the URL.
The get_imdbURL(MovieOrPersonOrCharacterObject) method returns a string
with the main IMDb URL for the given Movie, Person or Character object; it
tries to do its best to retrieve the URL.
The update(MovieOrPersonObject) method takes an instance of a Movie
or Person class and retrieve every available information.
Remember that the search_movie(title) and search_person(name) methods
will return a list Movie or Person objects with only basic information,
like the movie title or the person name, so update() can be used to
retrieve every other information.
The update(MovieOrPersonOrCharacterObject) method takes an instance of
a Movie, Person or Character class and retrieve every available information.
Remember that the search_movie(title), search_person(name) and
search_character(name) methods will return a list of Movie, Person or
Character objects with only basic information, like the movie title or
the person/character name, so update() can be used to retrieve every other
information.
By default a "reasonable" set of information are retrieved ('main',
'filmography' and 'biography' for a Person object, and 'main' and 'plot'
for a Movie object).
'filmography' and 'biography' for a Person/Character objects, and 'main'
and 'plot' for a Movie object).
Example:
i = IMDb()
......@@ -209,7 +215,8 @@ kind; string; one in ('movie', 'tv series', 'tv mini series', 'video game',
imdbIndex; string; the roman number for movies with the same title/year.
director; Person list; a list of director's name (e.g.: ['Brian De Palma'])
cast; Person list; list of actor/actress, with the currentRole instance
variable set to a string which describe his role/duty.
variable set to a Character object which describe his
role/duty.
cover url; string; the link to the image of the poster.
writer; Person list; list of writers ['Oscar Fraley (novel)']
plot; list; list of plots and authors of the plot.
......@@ -272,6 +279,13 @@ th syntax is:
An analogous method is defined for the Movie class, and it's
called isSameTitle(otherMovieObject)
THE Person CLASS
================
It works mostly like the Person class. :-)
For more information about the "currentRole" attribute, see the
README.currentRole file.
INFORMATION SETS
================
......@@ -295,7 +309,7 @@ this person an so on.
By default only important information are retrieved/updated (i.e.:
for a Movie object, only the 'main' and 'plot' information sets;
for a Person object only 'main', 'filmography', 'biography'.
for a Person/Character object only 'main', 'filmography', 'biography'.
Example:
i = imdb.IMDb(accessSystem='http')
......@@ -313,9 +327,9 @@ Another example:
print p['other works']
To see which information sets are available and what are the defaults,
see the all_info and default_info instance variable of Movie and Person
classes. Each object instance of Movie or Person, also have a
current_info instance variable, to remember the information sets
see the all_info and default_info instance variable of Movie, Person
and Character classes. Each object instance of Movie, Person or Character,
also have a current_info instance variable, to remember the information sets
already retrieved.
......@@ -328,6 +342,7 @@ and so on.
For people in the cast (actors/actresses), the "currentRole" instance
variable is set to the name of the character they played (e.g.: "Roy Neary"
for the role played by Richard Dreyfuss in Close Encounters of the Third Kind).
In fact, in this case currentRole will be a Character instance.
Another instance variable of a Person object is "notes", used to store
miscellaneous information (like an aka name for the actor, an "uncredited"
......@@ -336,6 +351,18 @@ It's also used, for non-cast people, to describe the specific task of
the person (e.g.: "assistant dialogue staff" for a person of the sound
departement).
It's possible to test, with the Python "in" statement, if a person worked
in a given movie, or vice-versa; the following are all valid tests:
movie in person
movie in character
person in movie
person in character
character in movie
character in person
Considerations similar to the above ones, can be done for Character
instances: please read the README.currentRole file for more information.
E.g.:
# retrieve data for Steven Spielberg's "Close Encounters of the Third Kind"
import imdb
......@@ -406,26 +433,26 @@ It's easier to understand if you look at it; look at the output of:
print m['akas']
MOVIE TITLES AND PERSON NAMES REFERENCES
========================================
MOVIE TITLES AND PERSON/CHARACTER NAMES REFERENCES
==================================================
Sometimes in Movie and Person attributes, there're strings with
references to other movies or persons (e.g.: in the plot, in
Sometimes in Movie, Person and Character attributes, there're strings
with references to other movies or persons (e.g.: in the plot, in
the biography, etc.).
These references are stored in the Movie or Person instances;
in the strings you'll find values like _A Movie (2003)_ (qv)
or 'A Person' (qv); accessing these string (like movie['plot']
or person['biography']), these strings are modified using
a provided function, which must take, as arguments, the
These references are stored in the Movie, Person and Character
instances; in the strings you'll find values like _A Movie (2003)_ (qv)
or 'A Person' (qv) or '#A Character# (qv)'; accessing these
string (like movie['plot'] or person['biography']), these strings are
modified using a provided function, which must take, as arguments, the
string and two dictionary with titles and names references;
by default the (qv) strings are converted in the "normal"
format ("A Movie (2003)" and "A Person").
format ("A Movie (2003)", "A Person" and "A Character").
You can find some examples of these functions in the
imdb.utils module.
The function used to modify the strings can be set with
the defaultModFunct parameter of the IMDb class or
with the modFunct parameter of the get_movie and get_person
methods.
with the modFunct parameter of the get_movie, get_person
and get_character methods.
E.g.:
import imdb
i = imdb.IMDb(defaultModFunct=imdb.utils.modHtmlLinks)
......
......@@ -9,6 +9,6 @@ Since release 3.0, IMDbPY uses a new account to access the IMDb
web site, parsing the new layout.
Older version still access the old layout, so they are still (more
or less) working; obviously only the new layout is sopport from
or less) working; obviously only the new layout is supported by
now on.
......@@ -5,9 +5,6 @@ Since January 2006, IMDb changed the way it handles TV episodes:
now every episode is treated as full title.
Starting with version 2.5, also IMDbPY supports this new behavior.
The implementation is beta, and still subject to change.
Please try it and comment it.
TITLES
======
......
......@@ -107,9 +107,11 @@ complete plain text data files set (as of 12 Nov 2006, with about
| looks like the creation of the indexes will
| take more than 2 or 3 hours. But see NOTES below.
PostgreSQL 8.1 | 190 (177/13)
SQLite 3.2 | not tested: it seems way too slow: maybe 35 _hours_
| to complete; maybe I've misconfigured or I'm
| misusing it.
SQLite 3.2 | ??? (80/???)
| with the "--sqlite-transactions" command line
| option; otherwise it's _really_ slow: even
| 35 hours or more.
SQL Server | about 3 or 4 hours.
If you have different experiences, please tell me!
......@@ -121,6 +123,7 @@ If you have different experiences, please tell me!
The imdbpy2sql.py will print a lot of debug information on standard output;
you can save it in a file, appending (without quotes) "2>&1 | tee output.txt"
[MySQL InnoDB]
InnoDB is abysmal slow for our purposes: my suggestion is to always
use MyISAM tables and - if you really want to use InnoDB - convert
......@@ -132,12 +135,34 @@ I recommend to set innodb_file_per_table to "true".
Beware that the conversion will be extremely slow (some hours), but
still faster than using InnoDB from the begin.
You can use the "--mysql-innodb" command line option to force the
creation of a datbase with MyISAM tables, converted at the end
into InnoDB.
[Microsoft SQL Server/SQLExpress]
If you get and error about how wrong and against nature is the
blasphemous act of inserting indentity keys, you can try to fix it
with the new custom queries support; see ADVANCED FEATURES below.
As a shortcut, you can use the "--ms-sqlserver" command line option
to set all the needed options.
You probably need SQLObject 0.10 (in the svn repository, as I'm
writing this).
[SQLite speed-up]
For some reason, SQLite is really slow, except when used with
transactions; you can use the '--sqlite-transactions' command
line option to obtain acceptable performances.
[SQLite failure]
It seems that, with older versions of the python-sqlite package, the first
run may fail; if you get a DatabaseError exception saying "no such table",
try running again the command with the same arguments.
[data truncated]
If you get an insane amount (hundreds or thousands, on various text
columns) of warnings like these lines:
......@@ -203,6 +228,16 @@ database (so it doesn't make much sense to use it with BEGIN, BEFORE_DROP
or BEFORE_CREATE time...), replacing the "%(table)s" text in the QUERY