fromcheck.nfa 5.46 KB
Newer Older
1 2 3 4
#########################################################################
#
# mairix - message index builder and finder for maildir folders.
#
5
# Copyright (C) Richard P. Curnow  2002-2004,2006
6 7 8 9
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
Richard P. Curnow's avatar
Richard P. Curnow committed
10
#
11 12 13 14
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
Richard P. Curnow's avatar
Richard P. Curnow committed
15
#
16 17 18 19 20
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
#
# =======================================================================
21 22

%{
23
#include "from.h"
24 25 26 27 28 29 30 31 32 33 34 35
%}


# Define tokens
# CR : \n
# DIGIT : [0-9]
# AT : @
# COLON : :
# WHITE : ' ', \t
# LOWER : [a-z]
# UPPER : [A-Z]
# PLUSMINUS : [+-]
36 37
# OTHER_EMAIL : other stuff valid in the LHS of an address
# DOMAIN : stuff valid in the RHS of an address
38

39 40 41
Abbrev LF = [\n]
Abbrev CR = [\r]
Abbrev DIGIT = [0-9]
42
Abbrev PERIOD = [.]
43 44 45 46 47 48
Abbrev AT = [@]
Abbrev LOWER = [a-z]
Abbrev UPPER = [A-Z]
Abbrev COLON = [:]
Abbrev WHITE = [ \t]
Abbrev PLUSMINUS = [+\-]
49 50 51 52 53
# Explained clearly at
# http://en.wikipedia.org/wiki/E-mail_address#RFC_specification
Abbrev OTHER_EMAIL = [.!#$%&'*/=?^_`{|}~]
Abbrev LT = [<]
Abbrev GT = [>]
54
Abbrev EMAIL = LOWER | UPPER | DIGIT | PLUSMINUS | OTHER_EMAIL
55 56 57 58 59 60
Abbrev OTHER_DOMAIN = [\-_.]
Abbrev DOMAIN = LOWER | UPPER | DIGIT | OTHER_DOMAIN
Abbrev DQUOTE = ["]
Abbrev OTHER_QUOTED = [@:<>]
Abbrev LEFTSQUARE = [[]
Abbrev RIGHTSQUARE = [\]]
61

62
BLOCK email {
63 64
    STATE in
        EMAIL -> in, before_at
Richard P. Curnow's avatar
Richard P. Curnow committed
65 66
        DQUOTE -> quoted_before_at
        AT -> domain_route
67 68 69

    STATE domain_route
        DOMAIN -> domain_route
Richard P. Curnow's avatar
Richard P. Curnow committed
70
        COLON -> in
71 72

    STATE quoted_before_at
Richard P. Curnow's avatar
Richard P. Curnow committed
73
        EMAIL | WHITE | OTHER_QUOTED -> quoted_before_at
74
        DQUOTE -> before_at
75 76 77

    STATE before_at
        EMAIL -> before_at
Richard P. Curnow's avatar
Richard P. Curnow committed
78
        DQUOTE -> quoted_before_at
79 80 81
        # Local part only : >=1 characters will suffice, which we've already
        # matched.
        -> out
82 83 84 85
        AT -> start_of_domain

    STATE start_of_domain
        LEFTSQUARE -> dotted_quad
Richard P. Curnow's avatar
Richard P. Curnow committed
86
        DOMAIN -> after_at
87 88

    STATE dotted_quad
Richard P. Curnow's avatar
Richard P. Curnow committed
89 90
        DIGIT | PERIOD -> dotted_quad
        RIGHTSQUARE -> out
91 92

    STATE after_at
93 94 95 96 97 98 99 100 101 102
        DOMAIN -> after_at, out

}

BLOCK angled_email {
    STATE in
        LT -> in_angles

    STATE in_angles
        <email:in->out> -> before_gt
103

104 105
    STATE before_gt
        GT -> out
106
}
107

108
BLOCK zone {
109 110 111 112 113 114 115 116 117
    # Make this pretty lenient
    STATE in
        UPPER -> zone2
        UPPER -> out
        PLUSMINUS -> zone2

    STATE zone2
        UPPER | LOWER -> zone2, out
        DIGIT         -> zone2, out
118
}
119

120
BLOCK date {
121
    STATE in
122
        WHITE -> in, before_weekday
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142

    STATE before_weekday
        UPPER ; LOWER ; LOWER ; WHITE -> after_weekday

    STATE after_weekday
        WHITE -> after_weekday
        UPPER ; LOWER ; LOWER ; WHITE -> after_month

    STATE after_month
        WHITE -> after_month
        DIGIT ; WHITE -> after_day
        DIGIT ; DIGIT ; WHITE -> after_day

    STATE after_day
        WHITE -> after_day
        # Accept HH:MM:SS
        DIGIT ; DIGIT ; COLON ; DIGIT ; DIGIT ; COLON ; DIGIT ; DIGIT ; WHITE -> after_time
        # Accept HH:MM
        DIGIT ; DIGIT ; COLON ; DIGIT ; DIGIT ; WHITE -> after_time

Richard P. Curnow's avatar
Richard P. Curnow committed
143
    # Allow either 1 or 2 words of timezone
144 145 146 147 148 149
    STATE after_time
        WHITE -> after_time
        -> after_timezone
        <zone:in->out> ; WHITE -> after_timezone
        <zone:in->out> ; WHITE -> after_timezone_1

150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
        # It appears that Pine puts the timezone after the year
        DIGIT ; DIGIT ; DIGIT ; DIGIT -> after_year_before_zone

    STATE after_year_before_zone
        WHITE -> after_year_before_zone
        <zone:in->out> -> after_timezone_after_year
        <zone:in->out> ; WHITE -> after_timezone_after_year_1

    STATE after_timezone_after_year_1
        WHITE -> after_timezone_after_year_1
        <zone:in->out> -> after_timezone_after_year

    STATE after_timezone_after_year
        WHITE -> after_timezone_after_year
        -> out
Richard P. Curnow's avatar
Richard P. Curnow committed
165

166 167 168
    STATE after_timezone_1
        WHITE -> after_timezone_1
        <zone:in->out> ; WHITE -> after_timezone
Richard P. Curnow's avatar
Richard P. Curnow committed
169

170 171 172 173 174 175 176
    STATE after_timezone
        WHITE -> after_timezone
        DIGIT ; DIGIT ; DIGIT ; DIGIT -> after_year

    STATE after_year
        WHITE -> after_year
        -> out
Richard P. Curnow's avatar
Richard P. Curnow committed
177

178
}
179 180 181 182

# Assume the earlier code has identified the '\nFrom ' sequence,
# and the validator starts scanning from the character beyond the space

183
BLOCK main {
184 185

    STATE in
186
        # Real return address.
187
        WHITE -> in
188
        <email:in->out> -> before_date
189
        <angled_email:in->out> -> before_date
Richard P. Curnow's avatar
Richard P. Curnow committed
190

191 192 193 194 195
        # Cope with Mozilla mbox folder format which just uses a '-' as
        # the return address field.
        PLUSMINUS       -> before_date

        # Empty return address
196 197 198
                        -> before_date

    STATE before_date
199 200 201 202
        <date:in->out> ; LF = FROMCHECK_PASS

        # Cope with mozilla mbox format
        <date:in->out> ; CR ; LF = FROMCHECK_PASS
203 204 205 206 207 208

    # Mention this state last : the last mentioned state in the last defined
    # block becomes the entry state of the scanner.

    STATE in

209
}
210

211 212 213
ATTR FROMCHECK_PASS
ATTR FROMCHECK_FAIL
DEFATTR FROMCHECK_FAIL
214
PREFIX fromcheck
215
TYPE "enum fromcheck_result"
216 217

# vim:ft=txt:et:sw=4:sts=4:ht=4