Commit 5c76c2dd authored by Richard P. Curnow's avatar Richard P. Curnow

Transition scanner inputs and drivers to new dfasyn

The dfasyn/ directory and other tidying-up is to follow...
parent 33e46eba
......@@ -59,11 +59,14 @@ mairix : $(OBJ)
%.o : %.c memmac.h mairix.h reader.h Makefile
$(CC) -c $(CFLAGS) $(CPPFLAGS) -o $@ $<
datescan.c : datescan.nfa dfasyn/dfasyn
dfasyn/dfasyn -o datescan.c -v -u datescan.nfa
datescan.c : datescan.nfa ../dfasyn/dfasyn
../dfasyn/dfasyn -o datescan.c -ho datescan_new.h -r datescan.report -v -u datescan.nfa
fromcheck.c : fromcheck.nfa dfasyn/dfasyn fromcheck.h
dfasyn/dfasyn -o fromcheck.c -v -u fromcheck.nfa
fromcheck.c : fromcheck.nfa ../dfasyn/dfasyn fromcheck.h
../dfasyn/dfasyn -o fromcheck.c -ho fromcheck_new.h -r fromcheck.report -v -u fromcheck.nfa
../dfasyn/dfasyn:
echo "Assume dfasyn is up to date"
mbox.o : fromcheck.h
......
......@@ -26,7 +26,7 @@
#include <ctype.h>
#include <assert.h>
#include "mairix.h"
#include "dates.h"
#include "datescan_new.h"
static enum DATESCAN_TYPE discover_type(char *first, char *last)/*{{{*/
{
......@@ -35,19 +35,7 @@ static enum DATESCAN_TYPE discover_type(char *first, char *last)/*{{{*/
char *p;
p = first;
while (p < last) {
switch (*p) {
case '0': token = 0; break;
case '1': token = 1; break;
case '2': token = 2; break;
case '3': token = 3; break;
case '4' ... '9':
token = 4; break;
case 'a' ... 'z':
case 'A' ... 'Z':
token = 5; break;
default:
token = -1; break;
}
token = datescan_char2tok[(int)*(unsigned char*)p];
current_state = datescan_next_state(current_state, token);
if (current_state < 0) break;
p++;
......@@ -56,7 +44,7 @@ static enum DATESCAN_TYPE discover_type(char *first, char *last)/*{{{*/
if (current_state < 0) {
return DS_FAILURE;
} else {
return datescan_exitval[current_state];
return datescan_attr[current_state];
}
}
/*}}}*/
......
......@@ -23,94 +23,88 @@
# Stuff to pass through verbatim
%{
#include "dates.h"
#include "datescan_new.h"
%}
Tokens D0 D1 D2 D3 D49 A
Abbrev D01 = D0 | D1
Abbrev D12 = D1 | D2
Abbrev D29 = D2 | D3 | D49
Abbrev D19 = D1 | D29
Abbrev D09 = D0 | D19
Abbrev D = D09
Abbrev A = [a-zA-Z]
BLOCK day
BLOCK day {
State in
D12 ; D09 -> out
D3 ; D01 -> out
ENDBLOCK
[12] ; [0-9] -> out
[3] ; [01] -> out
}
# Match 2 digit year
BLOCK year
BLOCK year {
State in
D0 ; D09 -> out
D3 ; D29 -> out
D49 ; D09 -> out
ENDBLOCK
[04-9] ; [0-9] -> out
[3] ; [2-9] -> out
}
BLOCK month
BLOCK month {
State in
A ; A ; A -> out
ENDBLOCK
}
BLOCK scaled
BLOCK scaled {
State in
D -> in, after_value
[0-9] -> in, after_value
State after_value
A -> out
ENDBLOCK
}
BLOCK ccyy
BLOCK ccyy {
State in
D19 ; D ; D ; D -> out
ENDBLOCK
[1-9] ; [0-9] ; [0-9] ; [0-9] -> out
}
BLOCK main
BLOCK main {
State in
D19 = DS_D
[1-9] = DS_D
<day:in->out> = DS_D
<year:in->out> = DS_Y
<ccyy:in->out> = DS_Y
D ; D ; D ; D ; D ; D = DS_YYMMDD
D ; D ; D ; D ; D ; D ; D ; D = DS_YYMMDD
[0-9] ; [0-9] ; [0-9] ; [0-9] ; [0-9] ; [0-9] = DS_YYMMDD
[0-9] ; [0-9] ; [0-9] ; [0-9] ;
[0-9] ; [0-9] ; [0-9] ; [0-9] = DS_YYMMDD
<scaled:in->out> = DS_SCALED
<month:in->out> = DS_M
D19 ; <month:in->out> = DS_DM
[1-9] ; <month:in->out> = DS_DM
<day:in->out> ; <month:in->out> = DS_DM
<month:in->out> ; D19 = DS_MD
<month:in->out> ; [1-9] = DS_MD
<month:in->out> ; <day:in->out> = DS_MD
<year:in->out> ; <month:in->out> = DS_YM
<month:in->out> ; <year:in->out> = DS_MY
<ccyy:in->out> ; <month:in->out> = DS_YM
<month:in->out> ; <ccyy:in->out> = DS_MY
<year:in->out> ; <month:in->out> ; D19 = DS_YMD
<year:in->out> ; <month:in->out> ; [1-9] = DS_YMD
<year:in->out> ; <month:in->out> ; <day:in->out> = DS_YMD
D19 ; <month:in->out> ; <year:in->out> = DS_DMY
[1-9] ; <month:in->out> ; <year:in->out> = DS_DMY
<day:in->out> ; <month:in->out> ; <year:in->out> = DS_DMY
<ccyy:in->out> ; <month:in->out> ; D19 = DS_YMD
<ccyy:in->out> ; <month:in->out> ; [1-9] = DS_YMD
<ccyy:in->out> ; <month:in->out> ; <day:in->out> = DS_YMD
D19 ; <month:in->out> ; <ccyy:in->out> = DS_DMY
[1-9] ; <month:in->out> ; <ccyy:in->out> = DS_DMY
<day:in->out> ; <month:in->out> ; <ccyy:in->out> = DS_DMY
ENDBLOCK
RESULT DS_D
RESULT DS_Y
RESULT DS_YYMMDD
RESULT DS_SCALED
RESULT DS_M
RESULT DS_DM
RESULT DS_MD
RESULT DS_YM
RESULT DS_MY
RESULT DS_YMD
RESULT DS_DMY
DEFRESULT DS_FAILURE
TYPE "enum DATESCAN_TYPE"
}
ATTR DS_D
ATTR DS_Y
ATTR DS_YYMMDD
ATTR DS_SCALED
ATTR DS_M
ATTR DS_DM
ATTR DS_MD
ATTR DS_YM
ATTR DS_MY
ATTR DS_YMD
ATTR DS_DMY
DEFATTR DS_FAILURE
TYPE "DATESCAN_TYPE"
PREFIX datescan
......
......@@ -20,7 +20,7 @@
# =======================================================================
%{
#include "fromcheck.h"
#include "fromcheck_new.h"
%}
......@@ -35,11 +35,20 @@
# PLUSMINUS : [+-]
# OTHER_EMAIL : other stuff valid in an address, at least [_.]
Tokens LF CR DIGIT AT COLON WHITE LOWER UPPER PLUSMINUS OTHER_EMAIL
Abbrev LF = [\n]
Abbrev CR = [\r]
Abbrev DIGIT = [0-9]
Abbrev AT = [@]
Abbrev LOWER = [a-z]
Abbrev UPPER = [A-Z]
Abbrev COLON = [:]
Abbrev WHITE = [ \t]
Abbrev PLUSMINUS = [+\-]
Abbrev OTHER_EMAIL = [_.=]
Abbrev EMAIL = LOWER | UPPER | DIGIT | PLUSMINUS | OTHER_EMAIL
BLOCK email
BLOCK email {
STATE in
EMAIL -> in, before_at
......@@ -53,9 +62,9 @@ BLOCK email
STATE after_at
EMAIL -> after_at, out
ENDBLOCK
}
BLOCK zone
BLOCK zone {
# Make this pretty lenient
STATE in
UPPER -> zone2
......@@ -65,9 +74,9 @@ BLOCK zone
STATE zone2
UPPER | LOWER -> zone2, out
DIGIT -> zone2, out
ENDBLOCK
}
BLOCK date
BLOCK date {
STATE in
WHITE -> in, before_weekday
......@@ -125,13 +134,12 @@ BLOCK date
WHITE -> after_year
-> out
ENDBLOCK
}
# Assume the earlier code has identified the '\nFrom ' sequence,
# and the validator starts scanning from the character beyond the space
BLOCK main
BLOCK main {
STATE in
# Real return address.
......@@ -156,12 +164,12 @@ BLOCK main
STATE in
ENDBLOCK
}
RESULT FROMCHECK_PASS
RESULT FROMCHECK_FAIL
DEFRESULT FROMCHECK_FAIL
ATTR FROMCHECK_PASS
ATTR FROMCHECK_FAIL
DEFATTR FROMCHECK_FAIL
PREFIX fromcheck
TYPE "enum fromcheck_result"
TYPE "fromcheck_result"
# vim:ft=txt:et:sw=4:sts=4:ht=4
......@@ -30,7 +30,7 @@
#include <sys/stat.h>
#include <sys/mman.h>
#include "mairix.h"
#include "fromcheck.h"
#include "fromcheck_new.h"
#include "md5.h"
struct extant_mbox {/*{{{*/
......@@ -183,8 +183,6 @@ static void init_fromtab(void)/*{{{*/
}
/*}}}*/
static signed char fromcheck_table[256];
/* REAL CHECKING : need to see if the line looks like this:
* From [ <return-path> ] <weekday> <month> <day> <time> [ <timezone> ] <year>
(from the mutt sources).
......@@ -205,7 +203,7 @@ static int looks_like_from_separator(off_t n, char *va, size_t len, int verbose)
if (verbose) {
printf("current_state=%d, p=%02x (%1c) ", current_state, (int)(unsigned char)p, ((p>=32)&&(p<=126))?p:'.');
}
current_state = fromcheck_next_state(current_state, (int)fromcheck_table[(int)(unsigned char)p]);
current_state = fromcheck_next_state(current_state, (int)fromcheck_char2tok[(int)(unsigned char)p]);
if (verbose) {
printf("next_state=%d\n", current_state);
}
......@@ -213,7 +211,7 @@ static int looks_like_from_separator(off_t n, char *va, size_t len, int verbose)
/* not matched */
break;
}
if (fromcheck_exitval[current_state] == FROMCHECK_PASS) {
if (fromcheck_attr[current_state] == FROMCHECK_PASS) {
result = 1; /* matched good separator */
break;
}
......@@ -227,27 +225,6 @@ static int looks_like_from_separator(off_t n, char *va, size_t len, int verbose)
}
/*}}}*/
static void init_fromcheck_table()/*{{{*/
{
int i;
for (i=0; i<256; i++) fromcheck_table[i] = -1;
for (i='A'; i<='Z'; i++) fromcheck_table[i] = FS_UPPER;
for (i='a'; i<='z'; i++) fromcheck_table[i] = FS_LOWER;
for (i='0'; i<='9'; i++) fromcheck_table[i] = FS_DIGIT;
fromcheck_table['+'] = FS_PLUSMINUS;
fromcheck_table['-'] = FS_PLUSMINUS;
fromcheck_table['@'] = FS_AT;
fromcheck_table[':'] = FS_COLON;
fromcheck_table['\n'] = FS_LF;
fromcheck_table['\r'] = FS_CR;
fromcheck_table[' '] = FS_WHITE;
fromcheck_table['\t'] = FS_WHITE;
fromcheck_table['_'] = FS_OTHEREMAIL;
fromcheck_table['.'] = FS_OTHEREMAIL;
fromcheck_table['='] = FS_OTHEREMAIL;
}
/*}}}*/
static off_t find_next_from(off_t n, char *va, size_t len)/*{{{*/
{
unsigned char c;
......@@ -819,8 +796,6 @@ void build_mbox_lists(struct database *db, const char *folder_base, /*{{{*/
marry_up_mboxen(db, extant_mboxen, n_extant);
init_fromcheck_table();
/* Now look for new/modified mboxen, find how many of the old messages are
* still valid and scan the remainder. */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment