parse.c 39.9 KB
Newer Older
1
/*
2
 * Copyright (C) 1996-2000,2012-2013 Michael R. Elkins <me@mutt.org>
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
 * 
 *     This program is free software; you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation; either version 2 of the License, or
 *     (at your option) any later version.
 * 
 *     This program is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 * 
 *     You should have received a copy of the GNU General Public License
 *     along with this program; if not, write to the Free Software
 *     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */ 

#if HAVE_CONFIG_H
# include "config.h"
#endif
22 23

#include "mutt.h"
24
#include "mutt_regex.h"
25 26 27 28
#include "mailbox.h"
#include "mime.h"
#include "rfc2047.h"
#include "rfc2231.h"
29
#include "mutt_crypt.h"
30 31
#include "url.h"

32 33 34 35
#include <string.h>
#include <ctype.h>
#include <sys/stat.h>
#include <stdlib.h>
36 37 38 39 40

/* Reads an arbitrarily long header field, and looks ahead for continuation
 * lines.  ``line'' must point to a dynamically allocated string; it is
 * increased if more space is required to fit the whole line.
 */
41
char *mutt_read_rfc822_line (FILE *f, char *line, size_t *linelen)
42 43
{
  char *buf = line;
44
  int ch;
45
  size_t offset = 0;
46
  size_t len = 0;
47

48
  FOREVER
49
  {
50 51
    if (fgets (buf, *linelen - offset, f) == NULL ||	/* end of file or */
	(ISSPACE (*line) && !offset))			/* end of headers */ 
52 53
    {
      *line = 0;
54
      return (line);
55 56
    }

57 58 59
    len = mutt_strlen (buf);
    if (! len)
      return (line);
60 61

    buf += len - 1;
62 63 64
    if (*buf == '\n')
    {
      /* we did get a full line. remove trailing space */
65 66 67
      while (ISSPACE (*buf))
	*buf-- = 0;	/* we cannot come beyond line's beginning because
			 * it begins with a non-space */
68 69

      /* check to see if the next line is a continuation line */
70
      if ((ch = fgetc (f)) != ' ' && ch != '\t')
71
      {
72 73
	ungetc (ch, f);
	return (line); /* next line is a separate header field or EOH */
74 75 76
      }

      /* eat tabs and spaces from the beginning of the continuation line */
77 78 79
      while ((ch = fgetc (f)) == ' ' || ch == '\t')
	;
      ungetc (ch, f);
80
      *++buf = ' '; /* string is still terminated because we removed
81
		       at least one whitespace char above */
82 83 84 85 86 87 88 89
    }

    buf++;
    offset = buf - line;
    if (*linelen < offset + STRING)
    {
      /* grow the buffer */
      *linelen += STRING;
90
      safe_realloc (&line, *linelen);
91 92 93 94 95 96
      buf = line + offset;
    }
  }
  /* not reached */
}

97
static LIST *mutt_parse_references (char *s, int in_reply_to)
98
{
99 100 101
  LIST *t, *lst = NULL;
  char *m;
  const char *sp;
102

103
  m = mutt_extract_message_id (s, &sp);
104
  while (m)
105
  {
106
    t = safe_malloc (sizeof (LIST));
107 108 109
    t->data = m;
    t->next = lst;
    lst = t;
110

111
    m = mutt_extract_message_id (NULL, &sp);
112 113
  }

114
  return lst;
115 116
}

117
int mutt_check_encoding (const char *c)
118
{
119 120 121 122 123 124 125 126 127 128 129 130
  if (ascii_strncasecmp ("7bit", c, sizeof ("7bit")-1) == 0)
    return (ENC7BIT);
  else if (ascii_strncasecmp ("8bit", c, sizeof ("8bit")-1) == 0)
    return (ENC8BIT);
  else if (ascii_strncasecmp ("binary", c, sizeof ("binary")-1) == 0)
    return (ENCBINARY);
  else if (ascii_strncasecmp ("quoted-printable", c, sizeof ("quoted-printable")-1) == 0)
    return (ENCQUOTEDPRINTABLE);
  else if (ascii_strncasecmp ("base64", c, sizeof("base64")-1) == 0)
    return (ENCBASE64);
  else if (ascii_strncasecmp ("x-uuencode", c, sizeof("x-uuencode")-1) == 0)
    return (ENCUUENCODED);
131
#ifdef SUN_ATTACHMENT
132 133
  else if (ascii_strncasecmp ("uuencode", c, sizeof("uuencode")-1) == 0)
    return (ENCUUENCODED);
134 135
#endif
  else
136
    return (ENCOTHER);
137 138
}

139
static PARAMETER *parse_parameters (const char *s)
140
{
141
  PARAMETER *head = 0, *cur = 0, *new;
142
  char buffer[LONG_STRING];
143
  const char *p;
144 145
  size_t i;

146 147
  dprint (2, (debugfile, "parse_parameters: `%s'\n", s));
  
148 149
  while (*s)
  {
150
    if ((p = strpbrk (s, "=;")) == NULL)
151
    {
152
      dprint(1, (debugfile, "parse_parameters: malformed parameter: %s\n", s));
153 154 155 156 157 158 159 160
      goto bail;
    }

    /* if we hit a ; now the parameter has no value, just skip it */
    if (*p != ';')
    {
      i = p - s;
      /* remove whitespace from the end of the attribute name */
161 162
      while (i > 0 && is_email_wsp(s[i-1]))
	--i;
163

164 165 166 167 168
      /* the check for the missing parameter token is here so that we can skip
       * over any quoted value that may be present.
       */
      if (i == 0)
      {
169 170
	dprint(1, (debugfile, "parse_parameters: missing attribute: %s\n", s));
	new = NULL;
171 172 173
      }
      else
      {
174 175
	new = mutt_new_parameter ();
	new->attribute = mutt_substrdup(s, s + i);
176 177 178
      }

      s = skip_email_wsp(p + 1); /* skip over the = */
179 180 181 182

      if (*s == '"')
      {
        int state_ascii = 1;
183 184 185 186
	s++;
	for (i=0; *s && i < sizeof (buffer) - 1; i++, s++)
	{
	  if (AssumedCharset && *AssumedCharset) {
187
            /* As iso-2022-* has a character of '"' with non-ascii state,
188 189
	     * ignore it. */
            if (*s == 0x1b && i < sizeof (buffer) - 2)
190 191 192 193 194 195 196 197 198
            {
              if (s[1] == '(' && (s[2] == 'B' || s[2] == 'J'))
                state_ascii = 1;
              else
                state_ascii = 0;
            }
          }
          if (state_ascii && *s == '"')
            break;
199 200 201 202 203 204 205 206 207 208 209 210 211
	  if (*s == '\\')
	  {
	    /* Quote the next character */
	    buffer[i] = s[1];
	    if (!*++s)
	      break;
	  }
	  else
	    buffer[i] = *s;
	}
	buffer[i] = 0;
	if (*s)
	  s++; /* skip over the " */
212 213 214
      }
      else
      {
215 216 217
	for (i=0; *s && *s != ' ' && *s != ';' && i < sizeof (buffer) - 1; i++, s++)
	  buffer[i] = *s;
	buffer[i] = 0;
218 219
      }

220 221
      /* if the attribute token was missing, 'new' will be NULL */
      if (new)
222
      {
223 224 225 226 227 228 229 230 231 232 233 234 235 236
	new->value = safe_strdup (buffer);

	dprint (2, (debugfile, "parse_parameter: `%s' = `%s'\n",
	      new->attribute ? new->attribute : "",
	      new->value ? new->value : ""));

	/* Add this parameter to the list */
	if (head)
	{
	  cur->next = new;
	  cur = cur->next;
	}
	else
	  head = cur = new;
237 238 239 240
      }
    }
    else
    {
241
      dprint (1, (debugfile, "parse_parameters(): parameter with no value: %s\n", s));
242 243 244 245
      s = p;
    }

    /* Find the next parameter */
246
    if (*s != ';' && (s = strchr (s, ';')) == NULL)
247
      break; /* no more parameters */
248 249 250

    do
    {
251 252
      /* Move past any leading whitespace. the +1 skips over the semicolon */
      s = skip_email_wsp(s + 1);
253 254 255
    }
    while (*s == ';'); /* skip empty parameters */
  }    
256

257
bail:
258

259 260
  rfc2231_decode_parameters (&head);
  return (head);
261 262
}

263
int mutt_check_mime_type (const char *s)
264
{
265
  if (ascii_strcasecmp ("text", s) == 0)
266
    return TYPETEXT;
267
  else if (ascii_strcasecmp ("multipart", s) == 0)
268
    return TYPEMULTIPART;
269 270
#ifdef SUN_ATTACHMENT 
  else if (ascii_strcasecmp ("x-sun-attachment", s) == 0)
271 272
    return TYPEMULTIPART;
#endif
273
  else if (ascii_strcasecmp ("application", s) == 0)
274
    return TYPEAPPLICATION;
275
  else if (ascii_strcasecmp ("message", s) == 0)
276
    return TYPEMESSAGE;
277
  else if (ascii_strcasecmp ("image", s) == 0)
278
    return TYPEIMAGE;
279
  else if (ascii_strcasecmp ("audio", s) == 0)
280
    return TYPEAUDIO;
281
  else if (ascii_strcasecmp ("video", s) == 0)
282
    return TYPEVIDEO;
283
  else if (ascii_strcasecmp ("model", s) == 0)
284
    return TYPEMODEL;
285
  else if (ascii_strcasecmp ("*", s) == 0)
286
    return TYPEANY;
287
  else if (ascii_strcasecmp (".*", s) == 0)
288 289 290 291 292
    return TYPEANY;
  else
    return TYPEOTHER;
}

293
void mutt_parse_content_type (char *s, BODY *ct)
294
{
295 296
  char *pc;
  char *subtype;
297

298
  FREE (&ct->subtype);
299 300 301 302 303 304
  mutt_free_parameter(&ct->parameter);

  /* First extract any existing parameters */
  if ((pc = strchr(s, ';')) != NULL)
  {
    *pc++ = 0;
305
    while (*pc && ISSPACE (*pc))
306 307 308 309 310 311
      pc++;
    ct->parameter = parse_parameters(pc);

    /* Some pre-RFC1521 gateways still use the "name=filename" convention,
     * but if a filename has already been set in the content-disposition,
     * let that take precedence, and don't set it here */
312
    if ((pc = mutt_get_parameter( "name", ct->parameter)) && !ct->filename)
313
      ct->filename = safe_strdup(pc);
314
    
315 316
#ifdef SUN_ATTACHMENT
    /* this is deep and utter perversion */
317 318
    if ((pc = mutt_get_parameter ("conversions", ct->parameter)))
      ct->encoding = mutt_check_encoding (pc);
319
#endif
320
    
321
  }
322
  
323 324 325 326
  /* Now get the subtype */
  if ((subtype = strchr(s, '/')))
  {
    *subtype++ = '\0';
327
    for(pc = subtype; *pc && !ISSPACE(*pc) && *pc != ';'; pc++)
328 329
      ;
    *pc = '\0';
330
    ct->subtype = safe_strdup (subtype);
331 332 333
  }

  /* Finally, get the major type */
334
  ct->type = mutt_check_mime_type (s);
335 336

#ifdef SUN_ATTACHMENT
337 338
  if (ascii_strcasecmp ("x-sun-attachment", s) == 0)
      ct->subtype = safe_strdup ("x-sun-attachment");
339 340 341 342
#endif

  if (ct->type == TYPEOTHER)
  {
343
    ct->xtype = safe_strdup (s);
344 345
  }

346
  if (ct->subtype == NULL)
347 348
  {
    /* Some older non-MIME mailers (i.e., mailtool, elm) have a content-type
349
     * field, so we can attempt to convert the type to BODY here.
350 351
     */
    if (ct->type == TYPETEXT)
352
      ct->subtype = safe_strdup ("plain");
353
    else if (ct->type == TYPEAUDIO)
354
      ct->subtype = safe_strdup ("basic");
355
    else if (ct->type == TYPEMESSAGE)
356
      ct->subtype = safe_strdup ("rfc822");
357 358 359 360 361
    else if (ct->type == TYPEOTHER)
    {
      char buffer[SHORT_STRING];

      ct->type = TYPEAPPLICATION;
362 363
      snprintf (buffer, sizeof (buffer), "x-%s", s);
      ct->subtype = safe_strdup (buffer);
364 365
    }
    else
366
      ct->subtype = safe_strdup ("x-unknown");
367 368 369 370 371
  }

  /* Default character set for text types. */
  if (ct->type == TYPETEXT)
  {
372 373 374 375
    if (!(pc = mutt_get_parameter ("charset", ct->parameter)))
      mutt_set_parameter ("charset", (AssumedCharset && *AssumedCharset) ?
                         (const char *) mutt_get_default_charset ()
                         : "us-ascii", &ct->parameter);
376
  }
377

378 379
}

380
static void parse_content_disposition (const char *s, BODY *ct)
381
{
382
  PARAMETER *parms;
383

384
  if (!ascii_strncasecmp ("inline", s, 6))
385
    ct->disposition = DISPINLINE;
386
  else if (!ascii_strncasecmp ("form-data", s, 9))
387 388 389 390 391
    ct->disposition = DISPFORMDATA;
  else
    ct->disposition = DISPATTACH;

  /* Check to see if a default filename was given */
392
  if ((s = strchr (s, ';')) != NULL)
393
  {
394
    s = skip_email_wsp(s + 1);
395 396 397 398 399
    if ((s = mutt_get_parameter ("filename", (parms = parse_parameters (s)))))
      mutt_str_replace (&ct->filename, s);
    if ((s = mutt_get_parameter ("name", parms)))
      ct->form_name = safe_strdup (s);
    mutt_free_parameter (&parms);
400 401 402 403
  }
}

/* args:
404
 *	fp	stream to read from
405
 *
406 407
 *	digest	1 if reading subparts of a multipart/digest, 0
 *		otherwise
408
 */
409 410

BODY *mutt_read_mime_header (FILE *fp, int digest)
411
{
412 413 414
  BODY *p = mutt_new_body();
  char *c;
  char *line = safe_malloc (LONG_STRING);
415
  size_t linelen = LONG_STRING;
416 417
  
  p->hdr_offset  = ftello (fp);
418

419 420
  p->encoding    = ENC7BIT; /* default from RFC1521 */
  p->type        = digest ? TYPEMESSAGE : TYPETEXT;
421
  p->disposition = DISPINLINE;
422 423
  
  while (*(line = mutt_read_rfc822_line (fp, line, &linelen)) != 0)
424 425
  {
    /* Find the value of the current header */
426
    if ((c = strchr (line, ':')))
427 428
    {
      *c = 0;
429
      c = skip_email_wsp(c + 1);
430 431
      if (!*c)
      {
432 433
	dprint (1, (debugfile, "mutt_read_mime_header(): skipping empty header field: %s\n", line));
	continue;
434 435 436 437
      }
    }
    else
    {
438
      dprint (1, (debugfile, "read_mime_header: bogus MIME header: %s\n", line));
439 440 441
      break;
    }

442
    if (!ascii_strncasecmp ("content-", line, 8))
443
    {
444 445 446 447 448 449 450
      if (!ascii_strcasecmp ("type", line + 8))
	mutt_parse_content_type (c, p);
      else if (!ascii_strcasecmp ("transfer-encoding", line + 8))
	p->encoding = mutt_check_encoding (c);
      else if (!ascii_strcasecmp ("disposition", line + 8))
	parse_content_disposition (c, p);
      else if (!ascii_strcasecmp ("description", line + 8))
451
      {
452 453
	mutt_str_replace (&p->description, c);
	rfc2047_decode (&p->description);
454
      }
455
    } 
456
#ifdef SUN_ATTACHMENT
457
    else if (!ascii_strncasecmp ("x-sun-", line, 6))
458
    {
459 460 461 462 463 464 465
      if (!ascii_strcasecmp ("data-type", line + 6))
        mutt_parse_content_type (c, p);
      else if (!ascii_strcasecmp ("encoding-info", line + 6))
        p->encoding = mutt_check_encoding (c);
      else if (!ascii_strcasecmp ("content-lines", line + 6))
        mutt_set_parameter ("content-lines", c, &(p->parameter));
      else if (!ascii_strcasecmp ("data-description", line + 6))
466
      {
467 468
	mutt_str_replace (&p->description, c);
        rfc2047_decode (&p->description);
469 470 471 472
      }
    }
#endif
  }
473
  p->offset = ftello (fp); /* Mark the start of the real data */
474
  if (p->type == TYPETEXT && !p->subtype)
475
    p->subtype = safe_strdup ("plain");
476
  else if (p->type == TYPEMESSAGE && !p->subtype)
477
    p->subtype = safe_strdup ("rfc822");
478

479
  FREE (&line);
480

481
  return (p);
482 483
}

484
void mutt_parse_part (FILE *fp, BODY *b)
485
{
486
  char *bound = 0;
487 488 489 490 491

  switch (b->type)
  {
    case TYPEMULTIPART:
#ifdef SUN_ATTACHMENT
492 493
      if ( !ascii_strcasecmp (b->subtype, "x-sun-attachment") )
          bound = "--------";
494 495
      else
#endif
496
          bound = mutt_get_parameter ("boundary", b->parameter);
497

498 499 500 501
      fseeko (fp, b->offset, SEEK_SET);
      b->parts =  mutt_parse_multipart (fp, bound, 
					b->offset + b->length,
					ascii_strcasecmp ("digest", b->subtype) == 0);
502 503 504 505 506
      break;

    case TYPEMESSAGE:
      if (b->subtype)
      {
507 508 509 510 511 512 513
	fseeko (fp, b->offset, SEEK_SET);
	if (mutt_is_message_type(b->type, b->subtype))
	  b->parts = mutt_parse_messageRFC822 (fp, b);
	else if (ascii_strcasecmp (b->subtype, "external-body") == 0)
	  b->parts = mutt_read_mime_header (fp, 0);
	else
	  return;
514 515 516 517 518 519 520 521 522 523 524
      }
      break;

    default:
      return;
  }

  /* try to recover from parsing error */
  if (!b->parts)
  {
    b->type = TYPETEXT;
525
    mutt_str_replace (&b->subtype, "plain");
526 527 528
  }
}

529
/* parse a MESSAGE/RFC822 body
530 531
 *
 * args:
532
 *	fp		stream to read from
533
 *
534 535
 *	parent		structure which contains info about the message/rfc822
 *			body part
536 537 538
 *
 * NOTE: this assumes that `parent->length' has been set!
 */
539 540

BODY *mutt_parse_messageRFC822 (FILE *fp, BODY *parent)
541
{
542
  BODY *msg;
543

544 545 546
  parent->hdr = mutt_new_header ();
  parent->hdr->offset = ftello (fp);
  parent->hdr->env = mutt_read_rfc822_header (fp, parent->hdr, 0, 0);
547 548 549 550 551 552 553 554 555 556 557 558
  msg = parent->hdr->content;

  /* ignore the length given in the content-length since it could be wrong
     and we already have the info to calculate the correct length */
  /* if (msg->length == -1) */
  msg->length = parent->length - (msg->offset - parent->offset);

  /* if body of this message is empty, we can end up with a negative length */
  if (msg->length < 0)
    msg->length = 0;

  mutt_parse_part(fp, msg);
559
  return (msg);
560 561 562 563 564
}

/* parse a multipart structure
 *
 * args:
565
 *	fp		stream to read from
566
 *
567
 *	boundary	body separator
568
 *
569 570
 *	end_off		length of the multipart body (used when the final
 *			boundary is missing to avoid reading too far)
571
 *
572
 *	digest		1 if reading a multipart/digest, 0 otherwise
573
 */
574 575

BODY *mutt_parse_multipart (FILE *fp, const char *boundary, LOFF_T end_off, int digest)
576 577 578 579 580 581
{
#ifdef SUN_ATTACHMENT
  int lines;
#endif
  int blen, len, crlf = 0;
  char buffer[LONG_STRING];
582
  BODY *head = 0, *last = 0, *new = 0;
583 584 585 586 587
  int i;
  int final = 0; /* did we see the ending boundary? */

  if (!boundary)
  {
588 589
    mutt_error _("multipart message has no boundary parameter!");
    return (NULL);
590 591
  }

592 593
  blen = mutt_strlen (boundary);
  while (ftello (fp) < end_off && fgets (buffer, LONG_STRING, fp) != NULL)
594
  {
595
    len = mutt_strlen (buffer);
596

597
    crlf =  (len > 1 && buffer[len - 2] == '\r') ? 1 : 0;
598 599

    if (buffer[0] == '-' && buffer[1] == '-' &&
600
	mutt_strncmp (buffer + 2, boundary, blen) == 0)
601 602 603
    {
      if (last)
      {
604 605 606 607 608 609
	last->length = ftello (fp) - last->offset - len - 1 - crlf;
	if (last->parts && last->parts->length == 0)
	  last->parts->length = ftello (fp) - last->parts->offset - len - 1 - crlf;
	/* if the body is empty, we can end up with a -1 length */
	if (last->length < 0)
	  last->length = 0;
610 611 612
      }

      /* Remove any trailing whitespace, up to the length of the boundary */
613
      for (i = len - 1; ISSPACE (buffer[i]) && i >= blen + 2; i--)
614 615 616
        buffer[i] = 0;

      /* Check for the end boundary */
617
      if (mutt_strcmp (buffer + blen + 2, "--") == 0)
618
      {
619 620
	final = 1;
	break; /* done parsing */
621 622 623
      }
      else if (buffer[2 + blen] == 0)
      {
624
	new = mutt_read_mime_header (fp, digest);
625 626

#ifdef SUN_ATTACHMENT
627 628 629 630 631 632
        if (mutt_get_parameter ("content-lines", new->parameter)) {
	  mutt_atoi (mutt_get_parameter ("content-lines", new->parameter), &lines);
	  for ( ; lines; lines-- )
	     if (ftello (fp) >= end_off || fgets (buffer, LONG_STRING, fp) == NULL)
	       break;
	}
633
#endif
634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651
	
	/*
	 * Consistency checking - catch
	 * bad attachment end boundaries
	 */
	
	if(new->offset > end_off)
	{
	  mutt_free_body(&new);
	  break;
	}
	if (head)
	{
	  last->next = new;
	  last = new;
	}
	else
	  last = head = new;
652 653 654 655 656 657 658 659 660
      }
    }
  }

  /* in case of missing end boundary, set the length to something reasonable */
  if (last && last->length == 0 && !final)
    last->length = end_off - last->offset;

  /* parse recursive MIME parts */
661
  for(last = head; last; last = last->next)
662
    mutt_parse_part(fp, last);
663 664
  
  return (head);
665 666
}

667
static const char *uncomment_timezone (char *buf, size_t buflen, const char *tz)
668
{
669
  char *p;
670 671 672 673
  size_t len;

  if (*tz != '(')
    return tz; /* no need to do anything */
674
  tz = skip_email_wsp(tz + 1);
675
  if ((p = strpbrk (tz, " )")) == NULL)
676 677 678 679
    return tz;
  len = p - tz;
  if (len > buflen - 1)
    len = buflen - 1;
680
  memcpy (buf, tz, len);
681 682 683 684
  buf[len] = 0;
  return buf;
}

685
static const struct tz_t
686 687 688 689
{
  char tzname[5];
  unsigned char zhours;
  unsigned char zminutes;
690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741
  unsigned char zoccident; /* west of UTC? */
}
TimeZones[] =
{
  { "aat",   1,  0, 1 }, /* Atlantic Africa Time */
  { "adt",   4,  0, 0 }, /* Arabia DST */
  { "ast",   3,  0, 0 }, /* Arabia */
/*{ "ast",   4,  0, 1 },*/ /* Atlantic */
  { "bst",   1,  0, 0 }, /* British DST */
  { "cat",   1,  0, 0 }, /* Central Africa */
  { "cdt",   5,  0, 1 },
  { "cest",  2,  0, 0 }, /* Central Europe DST */
  { "cet",   1,  0, 0 }, /* Central Europe */
  { "cst",   6,  0, 1 },
/*{ "cst",   8,  0, 0 },*/ /* China */
/*{ "cst",   9, 30, 0 },*/ /* Australian Central Standard Time */
  { "eat",   3,  0, 0 }, /* East Africa */
  { "edt",   4,  0, 1 },
  { "eest",  3,  0, 0 }, /* Eastern Europe DST */
  { "eet",   2,  0, 0 }, /* Eastern Europe */
  { "egst",  0,  0, 0 }, /* Eastern Greenland DST */
  { "egt",   1,  0, 1 }, /* Eastern Greenland */
  { "est",   5,  0, 1 },
  { "gmt",   0,  0, 0 },
  { "gst",   4,  0, 0 }, /* Presian Gulf */
  { "hkt",   8,  0, 0 }, /* Hong Kong */
  { "ict",   7,  0, 0 }, /* Indochina */
  { "idt",   3,  0, 0 }, /* Israel DST */
  { "ist",   2,  0, 0 }, /* Israel */
/*{ "ist",   5, 30, 0 },*/ /* India */
  { "jst",   9,  0, 0 }, /* Japan */
  { "kst",   9,  0, 0 }, /* Korea */
  { "mdt",   6,  0, 1 },
  { "met",   1,  0, 0 }, /* this is now officially CET */
  { "msd",   4,  0, 0 }, /* Moscow DST */
  { "msk",   3,  0, 0 }, /* Moscow */
  { "mst",   7,  0, 1 },
  { "nzdt", 13,  0, 0 }, /* New Zealand DST */
  { "nzst", 12,  0, 0 }, /* New Zealand */
  { "pdt",   7,  0, 1 },
  { "pst",   8,  0, 1 },
  { "sat",   2,  0, 0 }, /* South Africa */
  { "smt",   4,  0, 0 }, /* Seychelles */
  { "sst",  11,  0, 1 }, /* Samoa */
/*{ "sst",   8,  0, 0 },*/ /* Singapore */
  { "utc",   0,  0, 0 },
  { "wat",   0,  0, 0 }, /* West Africa */
  { "west",  1,  0, 0 }, /* Western Europe DST */
  { "wet",   0,  0, 0 }, /* Western Europe */
  { "wgst",  2,  0, 1 }, /* Western Greenland DST */
  { "wgt",   3,  0, 1 }, /* Western Greenland */
  { "wst",   8,  0, 0 }, /* Western Australia */
742 743 744 745 746 747 748 749 750
};

/* parses a date string in RFC822 format:
 *
 * Date: [ weekday , ] day-of-month month year hour:minute:second timezone
 *
 * This routine assumes that `h' has been initialized to 0.  the `timezone'
 * field is optional, defaulting to +0000 if missing.
 */
751
time_t mutt_parse_date (const char *s, HEADER *h)
752 753
{
  int count = 0;
754
  char *t;
755 756 757 758 759 760
  int hour, min, sec;
  struct tm tm;
  int i;
  int tz_offset = 0;
  int zhours = 0;
  int zminutes = 0;
761 762
  int zoccident = 0;
  const char *ptz;
763 764 765 766
  char tzstr[SHORT_STRING];
  char scratch[SHORT_STRING];

  /* Don't modify our argument. Fixed-size buffer is ok here since
767
   * the date format imposes a natural limit. 
768 769
   */

770 771
  strfcpy (scratch, s, sizeof (scratch));
  
772
  /* kill the day of the week, if it exists. */
773
  if ((t = strchr (scratch, ',')))
774 775 776
    t++;
  else
    t = scratch;
777
  t = skip_email_wsp(t);
778

779
  memset (&tm, 0, sizeof (tm));
780

781
  while ((t = strtok (t, " \t")) != NULL)
782 783 784 785
  {
    switch (count)
    {
      case 0: /* day of the month */
786 787 788 789 790
	if (mutt_atoi (t, &tm.tm_mday) < 0 || tm.tm_mday < 0)
	  return (-1);
	if (tm.tm_mday > 31)
	  return (-1);
	break;
791 792

      case 1: /* month of the year */
793 794 795 796
	if ((i = mutt_check_month (t)) < 0)
	  return (-1);
	tm.tm_mon = i;
	break;
797 798

      case 2: /* year */
799 800
	if (mutt_atoi (t, &tm.tm_year) < 0 || tm.tm_year < 0)
	  return (-1);
801
        if (tm.tm_year < 50)
802
	  tm.tm_year += 100;
803
        else if (tm.tm_year >= 1900)
804 805
	  tm.tm_year -= 1900;
	break;
806 807

      case 3: /* time of day */
808 809 810 811 812 813 814 815 816 817 818 819 820
	if (sscanf (t, "%d:%d:%d", &hour, &min, &sec) == 3)
	  ;
	else if (sscanf (t, "%d:%d", &hour, &min) == 2)
	  sec = 0;
	else
	{
	  dprint(1, (debugfile, "parse_date: could not process time format: %s\n", t));
	  return(-1);
	}
	tm.tm_hour = hour;
	tm.tm_min = min;
	tm.tm_sec = sec;
	break;
821 822

      case 4: /* timezone */
823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871
	/* sometimes we see things like (MST) or (-0700) so attempt to
	 * compensate by uncommenting the string if non-RFC822 compliant
	 */
	ptz = uncomment_timezone (tzstr, sizeof (tzstr), t);

	if (*ptz == '+' || *ptz == '-')
	{
	  if (ptz[1] && ptz[2] && ptz[3] && ptz[4]
	      && isdigit ((unsigned char) ptz[1]) && isdigit ((unsigned char) ptz[2])
	      && isdigit ((unsigned char) ptz[3]) && isdigit ((unsigned char) ptz[4]))
	  {
	    zhours = (ptz[1] - '0') * 10 + (ptz[2] - '0');
	    zminutes = (ptz[3] - '0') * 10 + (ptz[4] - '0');

	    if (ptz[0] == '-')
	      zoccident = 1;
	  }
	}
	else
	{
	  struct tz_t *tz;

	  tz = bsearch (ptz, TimeZones, sizeof TimeZones/sizeof (struct tz_t),
			sizeof (struct tz_t),
			(int (*)(const void *, const void *)) ascii_strcasecmp
			/* This is safe to do: A pointer to a struct equals
			 * a pointer to its first element*/);

	  if (tz)
	  {
	    zhours = tz->zhours;
	    zminutes = tz->zminutes;
	    zoccident = tz->zoccident;
	  }

	  /* ad hoc support for the European MET (now officially CET) TZ */
	  if (ascii_strcasecmp (t, "MET") == 0)
	  {
	    if ((t = strtok (NULL, " \t")) != NULL)
	    {
	      if (!ascii_strcasecmp (t, "DST"))
		zhours++;
	    }
	  }
	}
	tz_offset = zhours * 3600 + zminutes * 60;
	if (!zoccident)
	  tz_offset = -tz_offset;
	break;
872 873 874 875 876 877 878
    }
    count++;
    t = 0;
  }

  if (count < 4) /* don't check for missing timezone */
  {
879 880
    dprint(1,(debugfile, "parse_date(): error parsing date format, using received time\n"));
    return (-1);
881 882 883 884 885 886 887 888 889
  }

  if (h)
  {
    h->zhours = zhours;
    h->zminutes = zminutes;
    h->zoccident = zoccident;
  }

890
  return (mutt_mktime (&tm, 0) + tz_offset);
891 892
}

893 894 895
/* extract the first substring that looks like a message-id.
 * call back with NULL for more (like strtok).
 */
896
char *mutt_extract_message_id (const char *s, const char **saveptr)
897
{
898
  const char *o, *onull, *p;
899
  char *ret = NULL;
900

901 902 903 904 905 906 907
  if (s)
    p = s;
  else if (saveptr)
    p = *saveptr;
  else
    return NULL;

908 909
  for (s = NULL, o = NULL, onull = NULL;
       (p = strpbrk (p, "<> \t;")) != NULL; ++p)
910 911 912
  {
    if (*p == '<')
    {
913
      s = p; 
914 915 916 917 918 919 920 921 922 923
      o = onull = NULL;
      continue;
    }

    if (!s)
      continue;

    if (*p == '>')
    {
      size_t olen = onull - o, slen = p - s + 1;
924
      ret = safe_malloc (olen + slen + 1);
925
      if (o)
926 927
	memcpy (ret, o, olen);
      memcpy (ret + olen, s, slen);
928 929
      ret[olen + slen] = '\0';
      if (saveptr)
930
	*saveptr = p + 1; /* next call starts after '>' */
931 932 933 934
      return ret;
    }

    /* some idiotic clients break their message-ids between lines */
935
    if (s == p) 
936 937 938 939 940 941 942 943 944 945 946 947 948 949 950
      /* step past another whitespace */
      s = p + 1;
    else if (o)
      /* more than two lines, give up */
      s = o = onull = NULL;
    else
    {
      /* remember the first line, start looking for the second */
      o = s;
      onull = p;
      s = p + 1;
    }
  }

  return NULL;
951 952
}

953
void mutt_parse_mime_message (CONTEXT *ctx, HEADER *cur)
954
{
955
  MESSAGE *msg;
956

957 958 959
  do {
    if (cur->content->type != TYPEMESSAGE &&
        cur->content->type != TYPEMULTIPART)
960 961 962 963 964
      break; /* nothing to do */

    if (cur->content->parts)
      break; /* The message was parsed earlier. */

965
    if ((msg = mx_open_message (ctx, cur->msgno)))
966
    {
967
      mutt_parse_part (msg->fp, cur->content);
968 969

      if (WithCrypto)
970
        cur->security = crypt_query (cur->content);
971

972
      mx_close_message (ctx, &msg);
973 974 975
    }
  } while (0);

976
  cur->attach_valid = 0;
977 978
}

979 980
int mutt_parse_rfc822_line (ENVELOPE *e, HEADER *hdr, char *line, char *p, short user_hdrs, short weed,
			    short do_2047, LIST **lastp)
981 982
{
  int matched = 0;
983 984
  LIST *last = NULL;
  
985 986
  if (lastp)
    last = *lastp;
987 988
  
  switch (ascii_tolower (line[0]))
989 990
  {
    case 'a':
991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019
    if (ascii_strcasecmp (line+1, "pparently-to") == 0)
    {
      e->to = rfc822_parse_adrlist (e->to, p);
      matched = 1;
    }
    else if (ascii_strcasecmp (line+1, "pparently-from") == 0)
    {
      e->from = rfc822_parse_adrlist (e->from, p);
      matched = 1;
    }
    break;
    
    case 'b':
    if (ascii_strcasecmp (line+1, "cc") == 0)
    {
      e->bcc = rfc822_parse_adrlist (e->bcc, p);
      matched = 1;
    }
    break;
    
    case 'c':
    if (ascii_strcasecmp (line+1, "c") == 0)
    {
      e->cc = rfc822_parse_adrlist (e->cc, p);
      matched = 1;
    }
    else if (ascii_strncasecmp (line + 1, "ontent-", 7) == 0)
    {
      if (ascii_strcasecmp (line+8, "type") == 0)
1020
      {
1021 1022 1023
	if (hdr)
	  mutt_parse_content_type (p, hdr->content);
	matched = 1;
1024
      }
1025
      else if (ascii_strcasecmp (line+8, "transfer-encoding") == 0)
1026
      {
1027 1028 1029
	if (hdr)
	  hdr->content->encoding = mutt_check_encoding (p);
	matched = 1;
1030
      }
1031
      else if (ascii_strcasecmp (line+8, "length") == 0)
1032
      {
1033 1034 1035 1036 1037 1038
	if (hdr)
	{
	  if ((hdr->content->length = atol (p)) < 0)
	    hdr->content->length = -1;
	}
	matched = 1;
1039
      }
1040
      else if (ascii_strcasecmp (line+8, "description") == 0)
1041
      {
1042 1043 1044 1045 1046 1047
	if (hdr)
	{
	  mutt_str_replace (&hdr->content->description, p);
	  rfc2047_decode (&hdr->content->description);
	}
	matched = 1;
1048
      }
1049
      else if (ascii_strcasecmp (line+8, "disposition") == 0)
1050
      {
1051 1052 1053
	if (hdr)
	  parse_content_disposition (p, hdr->content);
	matched = 1;
1054
      }
1055 1056 1057
    }
    break;
    
1058
    case 'd':
1059 1060 1061 1062 1063 1064 1065 1066 1067
    if (!ascii_strcasecmp ("ate", line + 1))
    {
      mutt_str_replace (&e->date, p);
      if (hdr)
	hdr->date_sent = mutt_parse_date (p, hdr);
      matched = 1;
    }
    break;
    
1068
    case 'e':
1069 1070 1071 1072 1073
    if (!ascii_strcasecmp ("xpires", line + 1) &&
	hdr && mutt_parse_date (p, NULL) < time (NULL))
      hdr->expired = 1;
    break;
    
1074
    case 'f':
1075 1076 1077 1078 1079 1080 1081
    if (!ascii_strcasecmp ("rom", line + 1))
    {
      e->from = rfc822_parse_adrlist (e->from, p);
      matched = 1;
    }
    break;
    
1082
    case 'i':
1083 1084 1085 1086 1087 1088 1089 1090
    if (!ascii_strcasecmp (line+1, "n-reply-to"))
    {
      mutt_free_list (&e->in_reply_to);
      e->in_reply_to = mutt_parse_references (p, 1);
      matched = 1;
    }
    break;
    
1091
    case 'l':
1092 1093 1094
    if (!ascii_strcasecmp (line + 1, "ines"))
    {
      if (hdr)
1095
      {
1096 1097 1098 1099 1100 1101
	/* 
	 * HACK - mutt has, for a very short time, produced negative
	 * Lines header values.  Ignore them. 
	 */
	if (mutt_atoi (p, &hdr->lines) < 0 || hdr->lines < 0)
	  hdr->lines = 0;
1102 1103
      }

1104 1105 1106 1107 1108 1109
      matched = 1;
    }
    else if (!ascii_strcasecmp (line + 1, "ist-Post"))
    {
      /* RFC 2369.  FIXME: We should ignore whitespace, but don't. */
      if (strncmp (p, "NO", 2))
1110
      {
1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125
	char *beg, *end;
	for (beg = strchr (p, '<'); beg; beg = strchr (end, ','))
	{
	  ++beg;
	  if (!(end = strchr (beg, '>')))
	    break;
	  
	  /* Take the first mailto URL */
	  if (url_check_scheme (beg) == U_MAILTO)
	  {
	    FREE (&e->list_post);
	    e->list_post = mutt_substrdup (beg, end);
	    break;
	  }
	}
1126
      }
1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147
      matched = 1;
    }
    break;
    
    case 'm':
    if (!ascii_strcasecmp (line + 1, "ime-version"))
    {
      if (hdr)
	hdr->mime = 1;
      matched = 1;
    }
    else if (!ascii_strcasecmp (line + 1, "essage-id"))
    {
      /* We add a new "Message-ID:" when building a message */
      FREE (&e->message_id);
      e->message_id = mutt_extract_message_id (p, NULL);
      matched = 1;
    }
    else if (!ascii_strncasecmp (line + 1, "ail-", 4))
    {
      if (!ascii_strcasecmp (line + 5, "reply-to"))
1148
      {
1149 1150 1151 1152
	/* override the Reply-To: field */
	rfc822_free_address (&e->reply_to);
	e->reply_to = rfc822_parse_adrlist (e->reply_to, p);
	matched = 1;
1153
      }
1154
      else if (!ascii_strcasecmp (line + 5, "followup-to"))
1155
      {
1156 1157
	e->mail_followup_to = rfc822_parse_adrlist (e->mail_followup_to, p);
	matched = 1;
1158
      }
1159 1160 1161
    }
    break;
    
1162
    case 'r':
1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181
    if (!ascii_strcasecmp (line + 1, "eferences"))
    {
      mutt_free_list (&e->references);
      e->references = mutt_parse_references (p, 0);
      matched = 1;
    }
    else if (!ascii_strcasecmp (line + 1, "eply-to"))
    {
      e->reply_to = rfc822_parse_adrlist (e->reply_to, p);
      matched = 1;
    }
    else if (!ascii_strcasecmp (line + 1, "eturn-path"))
    {
      e->return_path = rfc822_parse_adrlist (e->return_path, p);
      matched = 1;
    }
    else if (!ascii_strcasecmp (line + 1, "eceived"))
    {
      if (hdr && !hdr->received)
1182
      {
1183 1184 1185 1186
	char *d = strrchr (p, ';');
	
	if (d)
	  hdr->received = mutt_parse_date (d + 1, NULL);
1187
      }
1188 1189 1190
    }
    break;
    
1191
    case 's':
1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205
    if (!ascii_strcasecmp (line + 1, "ubject"))
    {
      if (!e->subject)
	e->subject = safe_strdup (p);
      matched = 1;
    }
    else if (!ascii_strcasecmp (line + 1, "ender"))
    {
      e->sender = rfc822_parse_adrlist (e->sender, p);
      matched = 1;
    }
    else if (!ascii_strcasecmp (line + 1, "tatus"))
    {
      if (hdr)
1206
      {
1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222
	while (*p)
	{
	  switch(*p)
	  {
	    case 'r':
	    hdr->replied = 1;
	    break;
	    case 'O':
	      hdr->old = 1;
	    break;
	    case 'R':
	    hdr->read = 1;
	    break;
	  }
	  p++;
	}
1223
      }
1224 1225 1226 1227 1228 1229 1230 1231 1232 1233
      matched = 1;
    }
    else if ((!ascii_strcasecmp ("upersedes", line + 1) ||
	      !ascii_strcasecmp ("upercedes", line + 1)) && hdr)
    {
      FREE(&e->supersedes);
      e->supersedes = safe_strdup (p);
    }
    break;
    
1234
    case 't':
1235 1236 1237 1238 1239 1240 1241
    if (ascii_strcasecmp (line+1, "o") == 0)
    {
      e->to = rfc822_parse_adrlist (e->to, p);
      matched = 1;
    }
    break;
    
1242
    case 'x':
1243 1244 1245
    if (ascii_strcasecmp (line+1, "-status") == 0)
    {
      if (hdr)
1246
      {
1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264
	while (*p)
	{
	  switch (*p)
	  {
	    case 'A':
	    hdr->replied = 1;
	    break;
	    case 'D':
	    hdr->deleted = 1;
	    break;
	    case 'F':
	    hdr->flagged = 1;
	    break;
	    default:
	    break;
	  }
	  p++;
	}
1265
      }
1266 1267 1268 1269 1270 1271 1272 1273 1274
      matched = 1;
    }
    else if (ascii_strcasecmp (line+1, "-label") == 0)
    {
      FREE(&e->x_label);
      e->x_label = safe_strdup(p);
      matched = 1;
    }
    
1275
    default:
1276
    break;
1277
  }
1278
  
1279 1280 1281 1282
  /* Keep track of the user-defined headers */
  if (!matched && user_hdrs)
  {
    /* restore the original line */
1283 1284 1285 1286
    line[strlen (line)] = ':';
    
    if (weed && option (OPTWEED) && mutt_matches_ignore (line, Ignore)
	&& !mutt_matches_ignore (line, UnIgnore))
1287 1288 1289 1290
      goto done;

    if (last)
    {
1291
      last->next = mutt_new_list ();
1292 1293 1294
      last = last->next;
    }
    else
1295 1296
      last = e->userhdrs = mutt_new_list ();
    last->data = safe_strdup (line);
1297
    if (do_2047)
1298
      rfc2047_decode (&last->data);
1299 1300
  }

1301 1302 1303
  done:
  
  *lastp = last;
1304 1305
  return matched;
}
1306 1307
  
  
1308 1309 1310 1311
/* mutt_read_rfc822_header() -- parses a RFC822 header
 *
 * Args:
 *
1312
 * f		stream to read from
1313
 *
1314 1315 1316 1317 1318 1319 1320 1321 1322
 * hdr		header structure of current message (optional).
 * 
 * user_hdrs	If set, store user headers.  Used for recall-message and
 * 		postpone modes.
 * 
 * weed		If this parameter is set and the user has activated the
 * 		$weed option, honor the header weed list for user headers.
 * 	        Used for recall-message.
 * 
1323 1324 1325
 * Returns:     newly allocated envelope structure.  You should free it by
 *              mutt_free_envelope() when envelope stay unneeded.
 */
1326 1327
ENVELOPE *mutt_read_rfc822_header (FILE *f, HEADER *hdr, short user_hdrs,
				   short weed)
1328
{
1329 1330 1331 1332
  ENVELOPE *e = mutt_new_envelope();
  LIST *last = NULL;
  char *line = safe_malloc (LONG_STRING);
  char *p;
1333 1334
  LOFF_T loc;
  size_t linelen = LONG_STRING;
1335
  char buf[LONG_STRING+1];
1336 1337 1338

  if (hdr)
  {
1339
    if (hdr->content == NULL)
1340
    {
1341
      hdr->content = mutt_new_body ();
1342 1343

      /* set the defaults from RFC1521 */
1344 1345 1346 1347
      hdr->content->type        = TYPETEXT;
      hdr->content->subtype     = safe_strdup ("plain");
      hdr->content->encoding    = ENC7BIT;
      hdr->content->length      = -1;
1348 1349 1350 1351 1352 1353

      /* RFC 2183 says this is arbitrary */
      hdr->content->disposition = DISPINLINE;
    }
  }

1354 1355
  while ((loc = ftello (f)),
	  *(line = mutt_read_rfc822_line (f, line, &linelen)) != 0)
1356
  {
1357
    if ((p = strpbrk (line, ": \t")) == NULL || *p != ':')
1358 1359 1360 1361 1362
    {
      char return_path[LONG_STRING];
      time_t t;

      /* some bogus MTAs will quote the original "From " line */
1363 1364 1365
      if (mutt_strncmp (">From ", line, 6) == 0)
	continue; /* just ignore */
      else if (is_from (line, return_path, sizeof (return_path), &t))
1366
      {
1367 1368 1369 1370
	/* MH sometimes has the From_ line in the middle of the header! */
	if (hdr && !hdr->received)
	  hdr->received = t - mutt_local_tz (t);
	continue;
1371 1372
      }

1373
      fseeko (f, loc, 0);
1374 1375 1376 1377 1378 1379 1380 1381 1382 1383
      break; /* end of header */
    }

    *buf = '\0';

    if (mutt_match_spam_list(line, SpamList, buf, sizeof(buf)))
    {
      if (!mutt_match_rx_list(line, NoSpamList))
      {

1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416
	/* if spam tag already exists, figure out how to amend it */
	if (e->spam && *buf)
	{
	  /* If SpamSep defined, append with separator */
	  if (SpamSep)
	  {
	    mutt_buffer_addstr(e->spam, SpamSep);
	    mutt_buffer_addstr(e->spam, buf);
	  }

	  /* else overwrite */
	  else
	  {
	    e->spam->dptr = e->spam->data;
	    *e->spam->dptr = '\0';
	    mutt_buffer_addstr(e->spam, buf);
	  }
	}

	/* spam tag is new, and match expr is non-empty; copy */
	else if (!e->spam && *buf)
	{
	  e->spam = mutt_buffer_from (buf);
	}

	/* match expr is empty; plug in null string if no existing tag */
	else if (!e->spam)
	{
	  e->spam = mutt_buffer_from("");
	}

	if (e->spam && e->spam->data)
          dprint(5, (debugfile, "p822: spam = %s\n", e->spam->data));
1417 1418 1419 1420
      }
    }

    *p = 0;
1421
    p = skip_email_wsp(p + 1);
1422 1423 1424
    if (!*p)
      continue; /* skip empty header fields */

1425
    mutt_parse_rfc822_line (e, hdr, line, p, user_hdrs, weed, 1, &last);