Commit 0a46e0c0 authored by Richard Curnow's avatar Richard Curnow

Add -Q and nochecks options to skip integrity checks for performance

Original rev  :
parent 3124a09f
......@@ -2,6 +2,7 @@ NEW IN VERSION 0.16
* Home directory and environment variable expansion in the .mairixrc file
* Add -Q flag to skip integrity checks.
......@@ -402,7 +402,7 @@ static void import_toktable2(char *data, unsigned int hash_key, int n_msgs, stru
struct database *new_database_from_file(char *db_filename)/*{{{*/
struct database *new_database_from_file(char *db_filename, int do_integrity_checks)/*{{{*/
/* Read existing database from file for doing incremental update */
struct database *result;
......@@ -492,7 +492,9 @@ struct database *new_database_from_file(char *db_filename)/*{{{*/
if (do_integrity_checks) {
return result;
......@@ -1130,7 +1132,7 @@ static void recode_toktable2(struct toktable2 *tbl, int *new_idx)/*{{{*/
int cull_dead_messages(struct database *db)/*{{{*/
int cull_dead_messages(struct database *db, int do_integrity_checks)/*{{{*/
/* Return true if any culled */
......@@ -1139,7 +1141,9 @@ int cull_dead_messages(struct database *db)/*{{{*/
/* Check db is OK before we start on this. (Check afterwards is done in the
* writer.c code.) */
if (do_integrity_checks) {
if (verbose) {
fprintf(stderr, "Culling dead messages\n");
......@@ -43,6 +43,7 @@ static char *mboxen = NULL;
static char *mfolder = NULL;
static char *database_path = NULL;
static enum folder_type output_folder_type = FT_MAILDIR;
static int skip_integrity_checks = 0;
static int file_exists(char *name)/*{{{*/
......@@ -183,6 +184,7 @@ static void parse_rc_file(char *name)/*{{{*/
else if (!strncasecmp(p, "mfolder=", 8)) mfolder = copy_value(p);
else if (!strncasecmp(p, "database=", 9)) database_path = copy_value(p);
else if (!strncasecmp(p, "nochecks", 8)) skip_integrity_checks = 1;
else {
if (verbose) {
fprintf(stderr, "Unrecognized option at line %d in %s\n", lineno, name);
......@@ -376,6 +378,7 @@ int main (int argc, char **argv)/*{{{*/
int do_help = 0;
int do_raw_output = 0;
int do_dump = 0;
int do_integrity_checks = 1;
setlocale(LC_CTYPE, "");
......@@ -400,6 +403,8 @@ int main (int argc, char **argv)/*{{{*/
do_dump = 1;
} else if (!strcmp(*argv, "-r") || !strcmp(*argv, "--raw-output")) {
do_raw_output = 1;
} else if (!strcmp(*argv, "-Q") || !strcmp(*argv, "--no-integrity-checks")) {
do_integrity_checks = 0;
} else if (!strcmp(*argv, "-v") || !strcmp(*argv, "--verbose")) {
verbose = 1;
} else if (!strcmp(*argv, "-h") ||
......@@ -460,6 +465,10 @@ int main (int argc, char **argv)/*{{{*/
mfolder = arg_mfolder;
if (skip_integrity_checks) {
do_integrity_checks = 0;
if (!folder_base) {
fprintf(stderr, "No folder_base/MAIRIX_FOLDER_BASE set\n");
......@@ -513,7 +522,7 @@ int main (int argc, char **argv)/*{{{*/
/* Try to open existing database */
if (file_exists(database_path)) {
if (verbose) printf("Reading existing database...\n");
db = new_database_from_file(database_path);
db = new_database_from_file(database_path, do_integrity_checks);
if (verbose) printf("Loaded %d existing messages\n", db->n_msgs);
} else {
if (verbose) printf("Starting new database\n");
......@@ -524,11 +533,11 @@ int main (int argc, char **argv)/*{{{*/
any_updates = update_database(db, msgs->paths, msgs->n);
if (do_purge) {
any_purges = cull_dead_messages(db);
any_purges = cull_dead_messages(db, do_integrity_checks);
if (1 || any_updates || any_purges) {
/* For now write it every time. This is obviously the most reliable method. */
write_database(db, database_path);
write_database(db, database_path, do_integrity_checks);
#if 0
......@@ -274,13 +274,13 @@ void add_token2_in_file(int file_index, unsigned int hash_key, char *tok_text, s
/* In db.c */
struct database *new_database(void);
struct database *new_database_from_file(char *db_filename);
struct database *new_database_from_file(char *db_filename, int do_integrity_checks);
void free_database(struct database *db);
void maybe_grow_message_arrays(struct database *db);
void tokenise_message(int file_index, struct database *db, struct rfc822 *msg);
int update_database(struct database *db, struct msgpath *sorted_paths, int n_paths);
void check_database_integrity(struct database *db);
int cull_dead_messages(struct database *db);
int cull_dead_messages(struct database *db, int do_integrity_checks);
/* In mbox.c */
void build_mbox_lists(struct database *db, const char *folder_base, const char *mboxen_paths);
......@@ -300,7 +300,7 @@ void free_globber(struct globber *old);
int is_glob_match(struct globber *g, const char *s);
/* In writer.c */
void write_database(struct database *db, char *filename);
void write_database(struct database *db, char *filename, int do_integrity_checks);
/* In search.c */
int search_top(int do_threads, int do_augment, char *database_path, char *folder_base, char *mfolder, char **argv, enum folder_type ft, int verbose);
......@@ -398,6 +398,10 @@ this is unlikely to be much of a problem in reality.
@emph{mairix} can support a maximum of 65536 separate mboxes, and a maximum of
65536 messages within any one mbox.
@item nochecks
This takes no arguments. If a line starting with @samp{nochecks} is present,
it is the equivalent of specifying the @samp{-Q} flag to every indexing run.
@item mfolder
This defines the name of the @emph{match} folder (within the directory
specified by @samp{base}) into which the search mode writes its output.
......@@ -498,8 +502,17 @@ need to do.
The command line syntax is
For indexing mode:
mairix [-f path] [-p] [-v] [-Q]
@end example
For search mode
mairix [-f path] [-p] [-v] [-t] [-d] [-a] [-r] [-o mfolder] [expr1] ... [exprn]
mairix [-f path] [-t] [-v] [-a] [-r] [-o mfolder] expr1 [expr2] ... [exprn]
@end example
For database dump mode
mairix [-f path] -d
@end example
The @samp{-f} or @samp{--rcfile} flag allows a different path to the
......@@ -522,6 +535,13 @@ queries when using @samp{-a}, mairix will try to create the same symlink
multiple times. This prevents the same message being shown multiple times in
the match folder.)
The @samp{-Q} or @samp{--no-integrity-checks} flag is used in indexing mode.
Normally, mairix will do various integrity checks on the database after loading
it in, and before writing the modified database out again. The checking helps
to detect mairix bugs much earlier, but it has a performance penalty. This
flag skips the checks, at the cost of some loss in robustness. See also the
@samp{nochecks} directive in @ref{mairixrc}.
The @samp{-t} or @samp{--threads} option applies to search mode. Normally,
only the messages matching all the specified expressions are included in the
@emph{match folder} that is built. With the @samp{-t} flag, any message in
......@@ -579,7 +599,7 @@ The search mode runs when there is at least one search expression. Search
expressions can take forms such as (in increasing order of complexity):
@itemize @bullet
@item A date expression. The format for specifying the date is described in section @xref{date_syntax}.
@item A date expression. The format for specifying the date is described in section @ref{date_syntax}.
@item A size expression. This matches all messages whose size in bytes is in a
particular range. For example, to match all messages bigger than 1 Megabyte
......@@ -548,14 +548,16 @@ static char *write_toktable2(struct toktable2 *tab, struct write_map_toktable2 *
return cdata;
void write_database(struct database *db, char *filename)/*{{{*/
void write_database(struct database *db, char *filename, int do_integrity_checks)/*{{{*/
int file_len;
char *data, *cdata;
unsigned int *uidata;
struct write_map map;
if (do_integrity_checks) {
if (!verify_mbox_size_constraints(db)) {
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment