fromcheck.nfa 5.5 KB
Newer Older
1 2 3 4
#########################################################################
#
# mairix - message index builder and finder for maildir folders.
#
5
# Copyright (C) Richard P. Curnow  2002-2004,2006
6
# Copyright (C) Jonathan Kamens 2010
7 8 9 10
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
Richard P. Curnow's avatar
Richard P. Curnow committed
11
#
12 13 14 15
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
Richard P. Curnow's avatar
Richard P. Curnow committed
16
#
17 18
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
19
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 21
#
# =======================================================================
22 23

%{
24
#include "from.h"
25 26 27 28 29 30 31 32 33 34 35 36
%}


# Define tokens
# CR : \n
# DIGIT : [0-9]
# AT : @
# COLON : :
# WHITE : ' ', \t
# LOWER : [a-z]
# UPPER : [A-Z]
# PLUSMINUS : [+-]
37 38
# OTHER_EMAIL : other stuff valid in the LHS of an address
# DOMAIN : stuff valid in the RHS of an address
39

40 41 42
Abbrev LF = [\n]
Abbrev CR = [\r]
Abbrev DIGIT = [0-9]
43
Abbrev PERIOD = [.]
44 45 46 47 48 49
Abbrev AT = [@]
Abbrev LOWER = [a-z]
Abbrev UPPER = [A-Z]
Abbrev COLON = [:]
Abbrev WHITE = [ \t]
Abbrev PLUSMINUS = [+\-]
50 51 52 53 54
# Explained clearly at
# http://en.wikipedia.org/wiki/E-mail_address#RFC_specification
Abbrev OTHER_EMAIL = [.!#$%&'*/=?^_`{|}~]
Abbrev LT = [<]
Abbrev GT = [>]
55
Abbrev EMAIL = LOWER | UPPER | DIGIT | PLUSMINUS | OTHER_EMAIL
56 57 58 59 60 61
Abbrev OTHER_DOMAIN = [\-_.]
Abbrev DOMAIN = LOWER | UPPER | DIGIT | OTHER_DOMAIN
Abbrev DQUOTE = ["]
Abbrev OTHER_QUOTED = [@:<>]
Abbrev LEFTSQUARE = [[]
Abbrev RIGHTSQUARE = [\]]
62

63
BLOCK email {
64 65
    STATE in
        EMAIL -> in, before_at
Richard P. Curnow's avatar
Richard P. Curnow committed
66 67
        DQUOTE -> quoted_before_at
        AT -> domain_route
68 69 70

    STATE domain_route
        DOMAIN -> domain_route
Richard P. Curnow's avatar
Richard P. Curnow committed
71
        COLON -> in
72 73

    STATE quoted_before_at
Richard P. Curnow's avatar
Richard P. Curnow committed
74
        EMAIL | WHITE | OTHER_QUOTED -> quoted_before_at
75
        DQUOTE -> before_at
76 77 78

    STATE before_at
        EMAIL -> before_at
Richard P. Curnow's avatar
Richard P. Curnow committed
79
        DQUOTE -> quoted_before_at
80 81 82
        # Local part only : >=1 characters will suffice, which we've already
        # matched.
        -> out
83 84 85 86
        AT -> start_of_domain

    STATE start_of_domain
        LEFTSQUARE -> dotted_quad
Richard P. Curnow's avatar
Richard P. Curnow committed
87
        DOMAIN -> after_at
88 89

    STATE dotted_quad
Richard P. Curnow's avatar
Richard P. Curnow committed
90 91
        DIGIT | PERIOD -> dotted_quad
        RIGHTSQUARE -> out
92 93

    STATE after_at
94 95 96 97 98 99 100 101 102 103
        DOMAIN -> after_at, out

}

BLOCK angled_email {
    STATE in
        LT -> in_angles

    STATE in_angles
        <email:in->out> -> before_gt
104

105 106
    STATE before_gt
        GT -> out
107
}
108

109
BLOCK zone {
110 111 112 113 114 115 116 117 118
    # Make this pretty lenient
    STATE in
        UPPER -> zone2
        UPPER -> out
        PLUSMINUS -> zone2

    STATE zone2
        UPPER | LOWER -> zone2, out
        DIGIT         -> zone2, out
119
}
120

121
BLOCK date {
122
    STATE in
123
        WHITE -> in, before_weekday
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143

    STATE before_weekday
        UPPER ; LOWER ; LOWER ; WHITE -> after_weekday

    STATE after_weekday
        WHITE -> after_weekday
        UPPER ; LOWER ; LOWER ; WHITE -> after_month

    STATE after_month
        WHITE -> after_month
        DIGIT ; WHITE -> after_day
        DIGIT ; DIGIT ; WHITE -> after_day

    STATE after_day
        WHITE -> after_day
        # Accept HH:MM:SS
        DIGIT ; DIGIT ; COLON ; DIGIT ; DIGIT ; COLON ; DIGIT ; DIGIT ; WHITE -> after_time
        # Accept HH:MM
        DIGIT ; DIGIT ; COLON ; DIGIT ; DIGIT ; WHITE -> after_time

Richard P. Curnow's avatar
Richard P. Curnow committed
144
    # Allow either 1 or 2 words of timezone
145 146 147 148 149 150
    STATE after_time
        WHITE -> after_time
        -> after_timezone
        <zone:in->out> ; WHITE -> after_timezone
        <zone:in->out> ; WHITE -> after_timezone_1

151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
        # It appears that Pine puts the timezone after the year
        DIGIT ; DIGIT ; DIGIT ; DIGIT -> after_year_before_zone

    STATE after_year_before_zone
        WHITE -> after_year_before_zone
        <zone:in->out> -> after_timezone_after_year
        <zone:in->out> ; WHITE -> after_timezone_after_year_1

    STATE after_timezone_after_year_1
        WHITE -> after_timezone_after_year_1
        <zone:in->out> -> after_timezone_after_year

    STATE after_timezone_after_year
        WHITE -> after_timezone_after_year
        -> out
Richard P. Curnow's avatar
Richard P. Curnow committed
166

167 168 169
    STATE after_timezone_1
        WHITE -> after_timezone_1
        <zone:in->out> ; WHITE -> after_timezone
Richard P. Curnow's avatar
Richard P. Curnow committed
170

171 172 173 174 175 176 177
    STATE after_timezone
        WHITE -> after_timezone
        DIGIT ; DIGIT ; DIGIT ; DIGIT -> after_year

    STATE after_year
        WHITE -> after_year
        -> out
Richard P. Curnow's avatar
Richard P. Curnow committed
178

179
}
180 181 182 183

# Assume the earlier code has identified the '\nFrom ' sequence,
# and the validator starts scanning from the character beyond the space

184
BLOCK main {
185 186

    STATE in
187
        # Real return address.
188
        WHITE -> in
189
        <email:in->out> -> before_date
190
        <angled_email:in->out> -> before_date
Richard P. Curnow's avatar
Richard P. Curnow committed
191

192 193 194 195 196
        # Cope with Mozilla mbox folder format which just uses a '-' as
        # the return address field.
        PLUSMINUS       -> before_date

        # Empty return address
197 198 199
                        -> before_date

    STATE before_date
200 201 202 203
        <date:in->out> ; LF = FROMCHECK_PASS

        # Cope with mozilla mbox format
        <date:in->out> ; CR ; LF = FROMCHECK_PASS
204 205 206 207 208 209

    # Mention this state last : the last mentioned state in the last defined
    # block becomes the entry state of the scanner.

    STATE in

210
}
211

212 213 214
ATTR FROMCHECK_PASS
ATTR FROMCHECK_FAIL
DEFATTR FROMCHECK_FAIL
215
PREFIX fromcheck
216
TYPE "enum fromcheck_result"
217 218

# vim:ft=txt:et:sw=4:sts=4:ht=4