mairix.h 10.5 KB
Newer Older
Richard Curnow's avatar
Richard Curnow committed
1 2 3 4
/*
  mairix - message index builder and finder for maildir folders.

 **********************************************************************
Richard Curnow's avatar
Richard Curnow committed
5
 * Copyright (C) Richard P. Curnow  2002,2003,2004,2005
Richard Curnow's avatar
Richard Curnow committed
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of version 2 of the GNU General Public License as
 * published by the Free Software Foundation.
 * 
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
 * 
 **********************************************************************
 */


#ifndef MAIRIX_H
#define MAIRIX_H

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
31 32
#include <sys/types.h>
#include <sys/stat.h>
Richard Curnow's avatar
Richard Curnow committed
33 34 35 36

#include "memmac.h"

struct msgpath {/*{{{*/
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
  /* The 'selector' for this union is the corresponding entry of type 'enum
   * message_type' */
  union {
    struct {
      char *path;
      size_t size;  /* size of the message in bytes */
      time_t mtime; /* mtime of message file on disc */
    } mpf; /* message per file */
    struct {
      int file_index; /* index into table of mbox files */
      int msg_index;  /* index of message within the file */
    } mbox; /* for messages in mbox format folders */
  } src;

  /* Now fields that are common to both types of message. */
Richard Curnow's avatar
Richard Curnow committed
52 53 54 55 56 57
  time_t date;  /* representation of Date: header in message */
  int tid;      /* thread-id */
  /* + other stuff eventually */
};
/*}}}*/

58 59 60 61 62 63 64
enum message_type {/*{{{*/
  MTY_DEAD,     /* msg no longer exists, i.e. don't report in searches,
                   prune it on a '-p' run. */
  MTY_FILE,     /* msg <-> file in 1-1 correspondence e.g. maildir, MH */
  MTY_MBOX      /* multiple msgs per file : MBOX format file */
};
/*}}}*/
Richard Curnow's avatar
Richard Curnow committed
65
struct msgpath_array {/*{{{*/
66
  enum message_type *type;
Richard Curnow's avatar
Richard Curnow committed
67 68 69 70 71 72
  struct msgpath *paths;
  int n;
  int max;
};
/*}}}*/

73
struct matches {/*{{{*/
Richard Curnow's avatar
Richard Curnow committed
74 75 76 77
  unsigned char *msginfo;
  int n; /* bytes in use */
  int max; /* bytes allocated */
  unsigned long highest;
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
};
/*}}}*/
struct token {/*{{{*/
  char *text;
  unsigned long hashval;
  /* to store delta-compressed info of which msgpaths match the token */
  struct matches match0;
};
/*}}}*/
struct token2 {/*{{{*/
  char *text;
  unsigned long hashval;
  /* to store delta-compressed info of which msgpaths match the token */
  struct matches match0;
  struct matches match1;
Richard Curnow's avatar
Richard Curnow committed
93 94 95 96 97 98 99 100 101 102
};
/*}}}*/
struct toktable {/*{{{*/
  struct token **tokens;
  int n; /* # in use */
  int size; /* # allocated */
  unsigned int mask; /* for masking down hash values */
  int hwm; /* number to have before expanding */
};
/*}}}*/
103 104 105 106 107 108 109 110
struct toktable2 {/*{{{*/
  struct token2 **tokens;
  int n; /* # in use */
  int size; /* # allocated */
  unsigned int mask; /* for masking down hash values */
  int hwm; /* number to have before expanding */
};
/*}}}*/
Richard Curnow's avatar
Richard Curnow committed
111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153

enum content_type {/*{{{*/
  CT_TEXT_PLAIN,
  CT_TEXT_HTML,
  CT_TEXT_OTHER,
  CT_MESSAGE_RFC822,
  CT_OTHER
};
/*}}}*/
struct rfc822;
struct attachment {/*{{{*/
  struct attachment *next;
  struct attachment *prev;
  enum content_type ct;
  union attachment_body {
    struct normal_attachment_body {
      int len;
      char *bytes;
    } normal;
    struct rfc822 *rfc822;
  } data;
};
/*}}}*/
struct headers {/*{{{*/
  char *to;
  char *cc;
  char *from;
  char *subject;

  /* The following are needed to support threading */
  char *message_id;
  char *in_reply_to;
  char *references;

  time_t date;
};
/*}}}*/
struct rfc822 {/*{{{*/
  struct headers hdrs;
  struct attachment atts;
};
/*}}}*/

154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
typedef char checksum_t[16];

struct mbox {/*{{{*/
  /* If path==NULL, this indicates that the mbox is dead, i.e. no longer
   * exists. */
  char *path;
  /* As read in from database (i.e. current last time mairix scan was run.) */
  time_t file_mtime;
  size_t file_size;
  /* As found in the filesystem now. */
  time_t current_mtime;
  size_t current_size;
  /* After reconciling a loaded database with what's on the disc, this entry
     stores how many of the msgs that used to be there last time are still
     present at the head of the file.  Thus, all messages beyond that are
     treated as dead, and scanning starts at that point to find 'new' messages
     (whch may actually be old ones that have moved, but they're treated as
     new.) */
  int n_old_msgs_valid;

  int n_so_far; /* Used during database load. */
  
  int n_msgs;   /* Number of entries in 'start' and 'len' */
  int max_msgs; /* Allocated size of 'start' and 'len' */
  /* File offset to the start of each message (first line of real header, not to mbox 'From ' line) */
  off_t *start;
  /* Length of each message */
  size_t *len;
  /* Checksums on whole messages. */
  checksum_t *check_all;

};
/*}}}*/
Richard Curnow's avatar
Richard Curnow committed
187 188 189 190 191
struct database {/*{{{*/
  /* Used to hold an entire mapping between an array of filenames, each
     containing a single message, and the sets of tokens that occur in various
     parts of those messages */

192 193 194 195 196 197 198 199 200 201 202 203 204
  enum message_type *type;
  struct msgpath *msgs; /* Paths to messages */
  int n_msgs; /* Number in use */
  int max_msgs; /* Space allocated */

  struct mbox *mboxen;
  int n_mboxen; /* number in use. */
  int max_mboxen; /* space allocated */

  /* Seed for hashing in the token tables.  Randomly created for
   * each new database - avoid DoS attacks through carefully
   * crafted messages. */
  unsigned int hash_key;
Richard Curnow's avatar
Richard Curnow committed
205 206 207 208 209 210 211 212

  /* Token tables */
  struct toktable *to;
  struct toktable *cc;
  struct toktable *from;
  struct toktable *subject;
  struct toktable *body;

213 214 215 216 217
  /* Encoding chain 0 stores all msgids appearing in the following message headers:
   * Message-Id, In-Reply-To, References.  Used for thread reconciliation.
   * Encoding chain 1 stores just the Message-Id.  Used for search by message ID.
  */
  struct toktable2 *msg_ids;
Richard Curnow's avatar
Richard Curnow committed
218 219 220
};
/*}}}*/

221 222
enum folder_type {/*{{{*/
  FT_MAILDIR,
223
  FT_MH,
224
  FT_MBOX,
225
  FT_RAW
226 227 228
};
/*}}}*/

229 230 231 232 233 234 235
struct string_list {/*{{{*/
  struct string_list *next;
  struct string_list *prev;
  char *data;
};
/*}}}*/

236 237 238 239 240 241 242
struct msg_src {
  enum {MS_FILE, MS_MBOX} type;
  char *filename;
  off_t start;
  size_t len;
};

Richard Curnow's avatar
Richard Curnow committed
243 244
extern int verbose; /* cmd line -v switch */

245 246 247 248 249 250 251 252
/* Lame fix for systems where NAME_MAX isn't defined after including the above
 * set of .h files (Solaris, FreeBSD so far).  Probably grossly oversized but
 * it'll do. */

#if !defined(NAME_MAX)
#define NAME_MAX 4096
#endif

253 254 255 256 257 258 259 260 261 262 263
/* In glob.c */
struct globber;
struct globber_array;

struct globber *make_globber(const char *wildstring);
void free_globber(struct globber *old);
int is_glob_match(struct globber *g, const char *s);
struct globber_array *colon_sep_string_to_globber_array(const char *in);
int is_globber_array_match(struct globber_array *ga, const char *s);
void free_globber_array(struct globber_array *in);

Richard Curnow's avatar
Richard Curnow committed
264 265 266 267 268
/* In hash.c */
unsigned int hashfn( unsigned char *k, unsigned int length, unsigned int initval);

/* In dirscan.c */
struct msgpath_array *new_msgpath_array(void);
269
int is_integer_string(char *x);
Richard Curnow's avatar
Richard Curnow committed
270
void free_msgpath_array(struct msgpath_array *x);
271 272
void string_list_to_array(struct string_list *list, int *n, char ***arr);
void split_on_colons(const char *str, int *n, char ***arr);
273 274
void build_message_list(char *folder_base, char *folders, enum folder_type ft,
    struct msgpath_array *msgs, struct globber_array *omit_globs);
275 276
int filter_is_maildir(const char *path, struct stat *sb);
int filter_is_mh(const char *path, struct stat *sb);
Richard Curnow's avatar
Richard Curnow committed
277 278 279 280
  
/* In rfc822.c */
struct rfc822 *make_rfc822(char *filename);
void free_rfc822(struct rfc822 *msg);
281
struct rfc822 *data_to_rfc822(struct msg_src *src, char *data, int length);
282
void create_ro_mapping(const char *filename, unsigned char **data, int *len);
Richard Curnow's avatar
Richard Curnow committed
283 284 285

/* In tok.c */
struct toktable *new_toktable(void);
286 287 288
struct toktable2 *new_toktable2(void);
void free_token(struct token *x);
void free_token2(struct token2 *x);
Richard Curnow's avatar
Richard Curnow committed
289
void free_toktable(struct toktable *x);
290
void free_toktable2(struct toktable2 *x);
291
void add_token_in_file(int file_index, unsigned int hash_key, unsigned char *tok_text, struct toktable *table);
292 293 294
void check_and_enlarge_encoding(struct matches *m);
void insert_index_on_encoding(struct matches *m, int idx);
void add_token2_in_file(int file_index, unsigned int hash_key, char *tok_text, struct toktable2 *table, int add_to_chain1);
Richard Curnow's avatar
Richard Curnow committed
295 296 297

/* In db.c */
struct database *new_database(void);
298
struct database *new_database_from_file(char *db_filename, int do_integrity_checks);
Richard Curnow's avatar
Richard Curnow committed
299
void free_database(struct database *db);
300 301
void maybe_grow_message_arrays(struct database *db);
void tokenise_message(int file_index, struct database *db, struct rfc822 *msg);
Richard Curnow's avatar
Richard Curnow committed
302 303
int update_database(struct database *db, struct msgpath *sorted_paths, int n_paths);
void check_database_integrity(struct database *db);
304
int cull_dead_messages(struct database *db, int do_integrity_checks);
Richard Curnow's avatar
Richard Curnow committed
305

306
/* In mbox.c */
307 308
void build_mbox_lists(struct database *db, const char *folder_base,
    const char *mboxen_paths, struct globber_array *omit_globs);
309 310 311 312 313 314
int add_mbox_messages(struct database *db);
void compute_checksum(const unsigned char *data, size_t len, checksum_t *csum);
void cull_dead_mboxen(struct database *db);
unsigned int encode_mbox_indices(unsigned int mb, unsigned int msg);
void decode_mbox_indices(unsigned int index, unsigned int *mb, unsigned int *msg);
int verify_mbox_size_constraints(struct database *db);
315
void glob_and_expand_paths(const char *folder_base, char **paths_in, int n_in, char ***paths_out, int *n_out, int (*filter)(const char *, struct stat *), struct globber_array *omit_globs);
316 317 318 319 320 321 322 323
int filter_is_file(const char *x, struct stat *sb);

/* In glob.c */
struct globber;

struct globber *make_globber(const char *wildstring);
void free_globber(struct globber *old);
int is_glob_match(struct globber *g, const char *s);
324

Richard Curnow's avatar
Richard Curnow committed
325
/* In writer.c */
326
void write_database(struct database *db, char *filename, int do_integrity_checks);
Richard Curnow's avatar
Richard Curnow committed
327 328

/* In search.c */
329
int search_top(int do_threads, int do_augment, char *database_path, char *complete_mfolder, char **argv, enum folder_type ft, int verbose);
Richard Curnow's avatar
Richard Curnow committed
330 331 332 333
  
/* In stats.c */
void get_db_stats(struct database *db);

334 335 336
/* In dates.c */
int scan_date_string(char *in, time_t *start, int *has_start, time_t *end, int *has_end);

337 338 339
/* In dumper.c */
void dump_database(char *filename);

340 341 342
/* In strexpand.c */
char *expand_string(const char *p);

343 344 345 346
/* In dotlock.c */
void lock_database(char *path, int forced_unlock);
void unlock_database(void);

Richard Curnow's avatar
Richard Curnow committed
347
#endif /* MAIRIX_H */