Commit 25b17ff5 authored by Richard Curnow's avatar Richard Curnow

Corresponds to CVS V0.1

Original rev  : rc@rc0.org.uk--historical/mairix--history--0--base-0
parents
This diff is collapsed.
See section 2 of the mairix.txt file.
#########################################################################
#
# $Header: /cvs/src/mairix/Attic/Makefile,v 1.1 2002/07/03 22:15:58 richard Exp $
#
# =======================================================================
#
# mairix - message index builder and finder for maildir folders.
#
# Copyright (C) Richard P. Curnow 2002
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
#
# =======================================================================
#########################################################################
# Edit the following variables as required
CC=gcc
#CFLAGS=-O2
#CFLAGS=-O2 -pg
CFLAGS=-Wall -g
prefix=/usr/local
bindir=$(prefix)/bin
mandir=$(prefix)/man
man1dir=$(mandir)/man1
#########################################################################
# Things below this point shouldn't need to be edited.
OBJ = mairix.o db.o rfc822.o tok.o hash.o dirscan.o writer.o \
reader.o search.o stats.o
all : mairix
mairix : $(OBJ)
$(CC) -o mairix $(CFLAGS) $(OBJ)
%.o : %.c
$(CC) -c $(CFLAGS) $<
clean:
-rm -f *~ *.o mairix *.s core mairix.txt mairix.html mairix.dvi mairix.ps mairix.pdf mairix.info
-rm -f mairix.cp mairix.fn mairix.aux mairix.log mairix.ky mairix.pg mairix.toc mairix.tp mairix.vr
install:
[ -d $(prefix) ] || mkdir -p $(prefix)
[ -d $(bindir) ] || mkdir -p $(bindir)
[ -d $(mandir) ] || mkdir -p $(mandir)
[ -d $(man1dir) ] || mkdir -p $(man1dir)
cp -f mairix $(bindir)
chmod 555 $(bindir)/mairix
docs : mairix.info mairix.txt mairix.html mairix.dvi mairix.pdf
mairix.info : mairix.texi
makeinfo mairix.texi
mairix.txt : mairix.texi
makeinfo --no-split --number-sections --no-headers mairix.texi > mairix.txt
mairix.html : mairix.texi
makeinfo --no-split --number-sections --html mairix.texi > mairix.html
mairix.dvi : mairix.texi
tex mairix.texi
tex mairix.texi
mairix.ps : mairix.dvi
dvips mairix.dvi -o
mairix.pdf : mairix.texi
pdftex mairix.texi
pdftex mairix.texi
mairix is a program for indexing and searching email messages stored in maildir
folders.
* Indexing is fast. It runs incrementally on new messages - any particular
message only gets scanned once in the lifetime of the index file.
* The search mode populates a "virtual" maildir folder with symlinks which
point to the real messages. This folder can be opened as usual in your mail
program.
* The search mode is very fast.
* Indexing and searching works on the basis of words. The index file tabulates
which words occur in which parts (particular headers + body) of which
messages.
See also the mairix.txt file.
*********************************************************************
Copyright (C) Richard P. Curnow 2002
This program is free software; you can redistribute it and/or modify
it under the terms of version 2 of the GNU General Public License as
published by the Free Software Foundation.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*********************************************************************
Suggestions, bug reports, experiences, praise, complaints etc to the author
please, at <rc@rc0.org.uk>
The website for mairix is http://www.rc0.org.uk/mairix
#!/usr/bin/env perl
system("make mairix.txt");
unlink "build_kit";
This diff is collapsed.
/*
$Header: /cvs/src/mairix/dirscan.c,v 1.1 2002/07/03 22:15:59 richard Exp $
mairix - message index builder and finder for maildir folders.
**********************************************************************
* Copyright (C) Richard P. Curnow 2002
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*
**********************************************************************
*/
/* Traverse a directory tree and find maildirs, then list files in them. */
#include "mairix.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <dirent.h>
struct msgpath_array *new_msgpath_array(void)/*{{{*/
{
struct msgpath_array *result;
result = new(struct msgpath_array);
result->paths = NULL;
result->n = 0;
result->max = 0;
return result;
}
/*}}}*/
void free_msgpath_array(struct msgpath_array *x)/*{{{*/
{
int i;
if (x->paths) {
for (i=0; i<x->n; i++) {
if (x->paths[i].path) free(x->paths[i].path);
}
free(x->paths);
}
free(x);
}
/*}}}*/
static void add_file_to_list(char *x, unsigned long mtime, size_t message_size, struct msgpath_array *arr) {/*{{{*/
char *y = new_string(x);
if (arr->n == arr->max) {
arr->max += 1024;
arr->paths = grow_array(struct msgpath, arr->max, arr->paths);
}
arr->paths[arr->n].path = y;
arr->paths[arr->n].mtime = mtime;
arr->paths[arr->n].size = message_size;
++arr->n;
return;
}
/*}}}*/
static void get_message_paths(char *folder_base, char *mdir, struct msgpath_array *arr)/*{{{*/
{
char *subdir, *fname;
int i;
static char *subdirs[] = {"new", "cur"};
DIR *d;
struct dirent *de;
struct stat sb;
int folder_base_len = strlen(folder_base);
int mdir_len = strlen(mdir);
/* FIXME : just store mdir-rooted paths in array and have common prefix elsewhere. */
subdir = new_array(char, folder_base_len + mdir_len + 6);
fname = new_array(char, folder_base_len + mdir_len + 8 + NAME_MAX);
for (i=0; i<2; i++) {
strcpy(subdir, folder_base);
strcat(subdir, "/");
strcat(subdir, mdir);
strcat(subdir, "/");
strcat(subdir, subdirs[i]);
d = opendir(subdir);
if (d) {
while ((de = readdir(d))) {
strcpy(fname, subdir);
strcat(fname, "/");
strcat(fname, de->d_name);
if (stat(fname, &sb) >= 0) {
if (S_ISREG(sb.st_mode)) {
add_file_to_list(fname, sb.st_mtime, sb.st_size, arr);
}
}
}
closedir(d);
}
}
free(subdir);
free(fname);
return;
}
/*}}}*/
static int has_child_dir(char *folder_base, char *parent, char *buffer, char *child)/*{{{*/
{
struct stat sb;
int result = 0;
strcpy(buffer, folder_base);
strcat(buffer, "/");
strcat(buffer, parent);
strcat(buffer, "/");
strcat(buffer, child);
if (stat(buffer,&sb) >= 0) {
if (S_ISDIR(sb.st_mode)) {
result = 1;
}
}
return result;
}
/*}}}*/
static int looks_like_maildir(char *folder_base, char *name)/*{{{*/
{
char *child_name;
char *full_path;
struct stat sb;
int result = 0;
child_name = (char *) malloc(strlen(folder_base) + strlen(name) + 6);
full_path = new_array(char, strlen(folder_base) + strlen(name) + 2);
strcpy(full_path, folder_base);
strcat(full_path, "/");
strcat(full_path, name);
if (stat(full_path, &sb) >= 0) {
if (S_ISDIR(sb.st_mode)) {
if (has_child_dir(folder_base, name, child_name, "new") &&
has_child_dir(folder_base, name, child_name, "cur") &&
has_child_dir(folder_base, name, child_name, "tmp")) {
result = 1;
}
}
}
free(child_name);
free(full_path);
return result;
}
/*}}}*/
static void scan_directory(char *folder_base, char *this_folder, struct msgpath_array *arr)/*{{{*/
{
DIR *d;
struct dirent *de;
struct stat sb;
char *fname, *sname;
char *name;
int folder_base_len = strlen(folder_base);
int this_folder_len = strlen(this_folder);
name = new_array(char, folder_base_len + this_folder_len + 2);
strcpy(name, folder_base);
strcat(name, "/");
strcat(name, this_folder);
if (looks_like_maildir(folder_base, this_folder)) {
get_message_paths(folder_base, this_folder, arr);
}
fname = new_array(char, strlen(name) + 2 + NAME_MAX);
sname = new_array(char, this_folder_len + 2 + NAME_MAX);
d = opendir(name);
while ((de = readdir(d))) {
if (!strcmp(de->d_name, ".") ||
!strcmp(de->d_name, "..")) {
continue;
}
strcpy(fname, name);
strcat(fname, "/");
strcat(fname, de->d_name);
strcpy(sname, this_folder);
strcat(sname, "/");
strcat(sname, de->d_name);
if (stat(fname, &sb) >= 0) {
if (S_ISDIR(sb.st_mode)) {
scan_directory(folder_base, sname, arr);
}
}
}
free(fname);
free(sname);
closedir(d);
free(name);
return;
}
/*}}}*/
static int message_compare(const void *a, const void *b)/*{{{*/
{
struct msgpath *aa = (struct msgpath *) a;
struct msgpath *bb = (struct msgpath *) b;
return strcmp(aa->path, bb->path);
}
/*}}}*/
static void sort_message_list(struct msgpath_array *arr)/*{{{*/
{
qsort(arr->paths, arr->n, sizeof(struct msgpath), message_compare);
}
/*}}}*/
struct msgpath_array *build_message_list(char *folder_base, char *folders)/*{{{*/
{
struct msgpath_array *result;
char *left_to_do;
result = new_msgpath_array();
left_to_do = folders;
do {
char *colon;
char *this_folder;
int len;
colon = strchr(left_to_do, ':');
if (colon) {
len = colon - left_to_do;
this_folder = new_array(char, len + 1);
memcpy(this_folder, left_to_do, len);
this_folder[len] = '\0';
left_to_do = colon + 1;
} else {
this_folder = new_string(left_to_do);
while (*left_to_do) ++left_to_do;
}
len = strlen(this_folder);
if ((len >= 4) &&
!strcmp(this_folder + (len - 3), "...")) {
/* Multiple folder */
this_folder[len - 3] = '\0';
scan_directory(folder_base, this_folder, result);
} else {
/* Single folder */
if (looks_like_maildir(folder_base, this_folder)) {
get_message_paths(folder_base, this_folder, result);
}
}
free(this_folder);
} while (*left_to_do);
sort_message_list(result);
return result;
}
/*}}}*/
#ifdef TEST
int main (int argc, char **argv)
{
int i;
struct msgpath_array *arr;
arr = build_message_list(".");
for (i=0; i<arr->n; i++) {
printf("%08lx %s\n", arr->paths[i].mtime, arr->paths[i].path);
}
free_msgpath_array(arr);
return 0;
}
#endif
# Set this to the directory where your maildir folders live
base=/home/richard/mail
# Set this to a list of folders within 'base'. 3 dots at the end
# means there are sub-folders within this folder.
folders=new-mail:new-chrony:new-lojban:new-jbofihe:recent...:ancient...
# Set this to the folder within 'base' where you want the search mode
# to write its output
vfolder=vfolder
# Set this to the path where the index database file will be kept
database=/home/richard/mail/mairix_database
/* Hash function */
#include "mairix.h"
/*
--------------------------------------------------------------------
lookup2.c, by Bob Jenkins, December 1996, Public Domain.
hash(), hash2(), hash3, and mix() are externally useful functions.
Routines to test the hash are included if SELF_TEST is defined.
You can use this free for any purpose. It has no warranty.
--------------------------------------------------------------------
*/
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
#define hashsize(n) ((unsigned int)1<<(n))
#define hashmask(n) (hashsize(n)-1)
/*
--------------------------------------------------------------------
mix -- mix 3 32-bit values reversibly.
For every delta with one or two bit set, and the deltas of all three
high bits or all three low bits, whether the original value of a,b,c
is almost all zero or is uniformly distributed,
* If mix() is run forward or backward, at least 32 bits in a,b,c
have at least 1/4 probability of changing.
* If mix() is run forward, every bit of c will change between 1/3 and
2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
mix() was built out of 36 single-cycle latency instructions in a
structure that could supported 2x parallelism, like so:
a -= b;
a -= c; x = (c>>13);
b -= c; a ^= x;
b -= a; x = (a<<8);
c -= a; b ^= x;
c -= b; x = (b>>13);
...
Unfortunately, superscalar Pentiums and Sparcs can't take advantage
of that parallelism. They've also turned some of those single-cycle
latency instructions into multi-cycle latency instructions. Still,
this is the fastest good hash I could find. There were about 2^^68
to choose from. I only looked at a billion or so.
--------------------------------------------------------------------
*/
#define mix(a,b,c) \
{ \
a -= b; a -= c; a ^= (c>>13); \
b -= c; b -= a; b ^= (a<<8); \
c -= a; c -= b; c ^= (b>>13); \
a -= b; a -= c; a ^= (c>>12); \
b -= c; b -= a; b ^= (a<<16); \
c -= a; c -= b; c ^= (b>>5); \
a -= b; a -= c; a ^= (c>>3); \
b -= c; b -= a; b ^= (a<<10); \
c -= a; c -= b; c ^= (b>>15); \
}
/* same, but slower, works on systems that might have 8 byte ub4's */
#define mix2(a,b,c) \
{ \
a -= b; a -= c; a ^= (c>>13); \
b -= c; b -= a; b ^= (a<< 8); \
c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \
a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \
b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \
c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \
a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \
b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \
c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \
}
/*
--------------------------------------------------------------------
hash() -- hash a variable-length key into a 32-bit value
k : the key (the unaligned variable-length array of bytes)
len : the length of the key, counting by bytes
level : can be any 4-byte value
Returns a 32-bit value. Every bit of the key affects every bit of
the return value. Every 1-bit and 2-bit delta achieves avalanche.
About 36+6len instructions.
The best hash table sizes are powers of 2. There is no need to do
mod a prime (mod is sooo slow!). If you need less than 32 bits,
use a bitmask. For example, if you need only 10 bits, do
h = (h & hashmask(10));
In which case, the hash table should have hashsize(10) elements.
If you are hashing n strings (ub1 **)k, do it like this:
for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
code any way you wish, private, educational, or commercial. It's free.
See http://burlteburtle.net/bob/hash/evahash.html
Use for hash table lookup, or anything where one collision in 2^32 is
acceptable. Do NOT use for cryptographic purposes.
--------------------------------------------------------------------
*/
unsigned int hashfn( unsigned char *k, unsigned int length, unsigned int initval)
{
register unsigned int a,b,c,len;
/* Set up the internal state */
len = length;
a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
c = initval; /* the previous hash value */
/*---------------------------------------- handle most of the key */
while (len >= 12)
{
a += (k[0] +((unsigned int)k[1]<<8) +((unsigned int)k[2]<<16) +((unsigned int)k[3]<<24));
b += (k[4] +((unsigned int)k[5]<<8) +((unsigned int)k[6]<<16) +((unsigned int)k[7]<<24));
c += (k[8] +((unsigned int)k[9]<<8) +((unsigned int)k[10]<<16)+((unsigned int)k[11]<<24));
mix(a,b,c);
k += 12; len -= 12;
}
/*------------------------------------- handle the last 11 bytes */
c += length;
switch(len) /* all the case statements fall through */
{
case 11: c+=((unsigned int)k[10]<<24);
case 10: c+=((unsigned int)k[9]<<16);
case 9 : c+=((unsigned int)k[8]<<8);
/* the first byte of c is reserved for the length */
case 8 : b+=((unsigned int)k[7]<<24);
case 7 : b+=((unsigned int)k[6]<<16);
case 6 : b+=((unsigned int)k[5]<<8);
case 5 : b+=k[4];
case 4 : a+=((unsigned int)k[3]<<24);
case 3 : a+=((unsigned int)k[2]<<16);
case 2 : a+=((unsigned int)k[1]<<8);
case 1 : a+=k[0];
/* case 0: nothing left to add */
}
mix(a,b,c);
/*-------------------------------------------- report the result */
return c;
}
/*
$Header: /cvs/src/mairix/mairix.c,v 1.1 2002/07/03 22:15:59 richard Exp $
mairix - message index builder and finder for maildir folders.
**********************************************************************
* Copyright (C) Richard P. Curnow 2002
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*
**********************************************************************
*/
#include "mairix.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <pwd.h>
#include <unistd.h>
#include <ctype.h>
int verbose = 0;
static char *folder_base = NULL;
static char *folders = NULL;
static char *vfolder = NULL;
static char *database_path = NULL;
static int file_exists(char *name)/*{{{*/
{
struct stat sb;
if (stat(name, &sb) < 0) {
return 0;
}
return 1;
}
/*}}}*/
static char *copy_value(char *text)/*{{{*/
{
char *p;
for (p = text; *p && (*p != '='); p++) ;
if (!*p) return NULL;
p++;
return new_string(p);
}
/*}}}*/
static void parse_rc_file(char *name)/*{{{*/
{
FILE *in;
char line[1024], *p;
int len, lineno;
int all_blank;
int used_default_name = 0;
if (!name) {
/* open default file */
struct passwd *pw;
char *home;
pw = getpwuid(getuid());
home = pw->pw_dir;
if (!pw) {
fprintf(stderr, "Cannot lookup passwd entry for this user\n");
exit(1);
}
home = pw->pw_dir;
name = new_array(char, strlen(home) + 12);
strcpy(name, home);
strcat(name, "/.mairixrc");
used_default_name = 1;
}
in = fopen(name, "r");
if (!in) {
fprintf(stderr, "Cannot open %s, exiting\n", name);
exit(1);
}
lineno = 0;
while(fgets(line, sizeof(line), in)) {
lineno++;
len = strlen(line);
if (len > sizeof(line) - 4) {
fprintf(stderr, "Line %d in %s too long, exiting\n", lineno, name);
exit(1);
}
if (line[len-1] == '\n') {
line[len-1] = '\0';
}
/* Strip trailing comments. */
for (p=line; *p && !strchr("#!;%", *p); p++) ;
if (*p) *p = '\0';
/* Discard blank lines */
all_blank = 1;
for (p=line; *p; p++) {
if (!isspace(*p)) {
all_blank = 0;
break;
}
}
if (all_blank) continue;
/* Now a real line to parse */
if (!strncasecmp(p, "base", 4)) folder_base = copy_value(p);
else if (!strncasecmp(p, "folders", 7)) folders = copy_value(p);
else if (!strncasecmp(p, "vfolder", 7)) vfolder = copy_value(p);
else if (!strncasecmp(p, "database", 8)) database_path = copy_value(p);
else {
fprintf(stderr, "Unrecognized option at line %d in %s\n", lineno, name);
}
}
fclose(in);
if (used_default_name) free(name);
}
/*}}}*/
static int compare_strings(const void *a, const void *b)/*{{{*/
{
const char **aa = (const char **) a;
const char **bb = (const char **) b;
return strcmp(*aa, *bb);
}
/*}}}*/
static int check_message_list_for_duplicates(struct msgpath_array *msgs)/*{{{*/
{
char **sorted_paths;
int i, n;
int result;
n = msgs->n;
sorted_paths = new_array(char *, n);
for (i=0; i<n; i++) {
sorted_paths[i] = msgs->paths[i].path;
}
qsort(sorted_paths, n, sizeof(char *), compare_strings);
#if 0
for (i=0; i<n; i++) {
printf("%4d : %s\n", i, sorted_paths[i]);
}
#endif
result = 0;
for (i=1; i<n; i++) {
if (!strcmp(sorted_paths[i-1], sorted_paths[i])) {
result = 1;
break;
}
}
free(sorted_paths);
return result;
}
/*}}}*/
/* Notes on folder management: