rfc2047.c 20.9 KB
Newer Older
1
/*
2
 * Copyright (C) 1996-2000,2010 Michael R. Elkins <me@mutt.org>
3
 * Copyright (C) 2000-2002 Edmund Grimley Evans <edmundo@rano.org>
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
 * 
 *     This program is free software; you can redistribute it and/or modify
 *     it under the terms of the GNU General Public License as published by
 *     the Free Software Foundation; either version 2 of the License, or
 *     (at your option) any later version.
 * 
 *     This program is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU General Public License for more details.
 * 
 *     You should have received a copy of the GNU General Public License
 *     along with this program; if not, write to the Free Software
 *     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */ 

#if HAVE_CONFIG_H
# include "config.h"
#endif

#include "mutt.h"
#include "mime.h"
#include "charset.h"
#include "rfc2047.h"
28 29 30

#include <ctype.h>
#include <errno.h>
31 32
#include <stdio.h>
#include <stdlib.h>
33 34 35
#include <string.h>

/* If you are debugging this file, comment out the following line. */
36 37
/*#define NDEBUG*/

38 39 40 41 42 43 44 45 46 47 48
#ifdef NDEBUG
#define assert(x)
#else
#include <assert.h>
#endif

#define ENCWORD_LEN_MAX 75
#define ENCWORD_LEN_MIN 9 /* strlen ("=?.?.?.?=") */

#define HSPACE(x) ((x) == '\0' || (x) == ' ' || (x) == '\t')

49
#define CONTINUATION_BYTE(c) (((c) & 0xc0) == 0x80)
50 51 52

extern char RFC822Specials[];

53 54
typedef size_t (*encoder_t) (char *, ICONV_CONST char *, size_t,
			     const char *);
55

56 57 58
static size_t convert_string (ICONV_CONST char *f, size_t flen,
			      const char *from, const char *to,
			      char **t, size_t *tlen)
59 60
{
  iconv_t cd;
61
  char *buf, *ob;
62 63 64
  size_t obl, n;
  int e;

65
  cd = mutt_iconv_open (to, from, 0);
66 67 68
  if (cd == (iconv_t)(-1))
    return (size_t)(-1);
  obl = 4 * flen + 1;
69 70 71
  ob = buf = safe_malloc (obl);
  n = iconv (cd, &f, &flen, &ob, &obl);
  if (n == (size_t)(-1) || iconv (cd, 0, 0, &ob, &obl) == (size_t)(-1))
72 73
  {
    e = errno;
74 75
    FREE (&buf);
    iconv_close (cd);
76 77 78 79
    errno = e;
    return (size_t)(-1);
  }
  *ob = '\0';
80
  
81 82
  *tlen = ob - buf;

83
  safe_realloc (&buf, ob - buf + 1);
84
  *t = buf;
85
  iconv_close (cd);
86 87 88 89

  return n;
}

90
int convert_nonmime_string (char **ps)
91
{
92
  const char *c, *c1;
93 94 95 96

  for (c = AssumedCharset; c; c = c1 ? c1 + 1 : 0)
  {
    char *u = *ps;
97 98
    char *s;
    char *fromcode;
99
    size_t m, n;
100
    size_t ulen = mutt_strlen (*ps);
101 102 103 104 105
    size_t slen;

    if (!u || !*u)
      return 0;

106 107
    c1 = strchr (c, ':');
    n = c1 ? c1 - c : mutt_strlen (c);
108 109
    if (!n)
      return 0;
110 111 112 113
    fromcode = safe_malloc (n + 1);
    strfcpy (fromcode, c, n + 1);
    m = convert_string (u, ulen, fromcode, Charset, &s, &slen);
    FREE (&fromcode);
114 115
    if (m != (size_t)(-1))
    {
116
      FREE (ps); /* __FREE_CHECKED__ */
117 118 119 120
      *ps = s;
      return 0;
    }
  }
121 122 123
  mutt_convert_string (ps,
      (const char *)mutt_get_default_charset (),
      Charset, MUTT_ICONV_HOOK_FROM);
124 125 126
  return -1;
}

127 128
char *mutt_choose_charset (const char *fromcode, const char *charsets,
		      char *u, size_t ulen, char **d, size_t *dlen)
129 130
{
  char canonical_buff[LONG_STRING];
131
  char *e = 0, *tocode = 0;
132
  size_t elen = 0, bestn = 0;
133
  const char *p, *q;
134 135 136

  for (p = charsets; p; p = q ? q + 1 : 0)
  {
137
    char *s, *t;
138 139
    size_t slen, n;

140
    q = strchr (p, ':');
141

142
    n = q ? q - p : strlen (p);
143
    if (!n)
144 145
      continue;

146 147
    t = safe_malloc (n + 1);
    memcpy (t, p, n);
148 149
    t[n] = '\0';

150
    n = convert_string (u, ulen, fromcode, t, &s, &slen);
151
    if (n == (size_t)(-1))
152
    {
153
      FREE (&t);
154
      continue;
155
    }
156 157 158 159

    if (!tocode || n < bestn)
    {
      bestn = n;
160
      FREE (&tocode);
161 162 163
      tocode = t;
      if (d)
      {
164 165
	FREE (&e);
	e = s;
166 167
      }
      else
168
	FREE (&s);
169 170
      elen = slen;
      if (!bestn)
171
	break;
172 173 174
    }
    else
    {
175 176
      FREE (&t);
      FREE (&s);
177 178 179 180 181 182 183 184
    }
  }
  if (tocode)
  {
    if (d)
      *d = e;
    if (dlen)
      *dlen = elen;
185 186 187
    
    mutt_canonical_charset (canonical_buff, sizeof (canonical_buff), tocode);
    mutt_str_replace (&tocode, canonical_buff);
188 189 190 191
  }
  return tocode;
}

192 193
static size_t b_encoder (char *s, ICONV_CONST char *d, size_t dlen,
			 const char *tocode)
194 195 196
{
  char *s0 = s;

197 198 199 200
  memcpy (s, "=?", 2), s += 2;
  memcpy (s, tocode, strlen (tocode)), s += strlen (tocode);
  memcpy (s, "?B?", 3), s += 3;
  for (;;)
201
  {
202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
    if (!dlen)
      break;
    else if (dlen == 1)
    {
      *s++ = B64Chars[(*d >> 2) & 0x3f];
      *s++ = B64Chars[(*d & 0x03) << 4];
      *s++ = '=';
      *s++ = '=';
      break;
    }
    else if (dlen == 2)
    {
      *s++ = B64Chars[(*d >> 2) & 0x3f];
      *s++ = B64Chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
      *s++ = B64Chars[(d[1] & 0x0f) << 2];
      *s++ = '=';
      break;
    }
    else
    {
      *s++ = B64Chars[(*d >> 2) & 0x3f];
      *s++ = B64Chars[((*d & 0x03) << 4) | ((d[1] >> 4) & 0x0f)];
      *s++ = B64Chars[((d[1] & 0x0f) << 2) | ((d[2] >> 6) & 0x03)];
      *s++ = B64Chars[d[2] & 0x3f];
      d += 3, dlen -= 3;
    }
228
  }
229
  memcpy (s, "?=", 2), s += 2;
230 231 232
  return s - s0;
}

233 234
static size_t q_encoder (char *s, ICONV_CONST char *d, size_t dlen,
			 const char *tocode)
235
{
236
  static const char hex[] = "0123456789ABCDEF";
237 238
  char *s0 = s;

239 240 241
  memcpy (s, "=?", 2), s += 2;
  memcpy (s, tocode, strlen (tocode)), s += strlen (tocode);
  memcpy (s, "?Q?", 3), s += 3;
242 243 244 245 246
  while (dlen--)
  {
    unsigned char c = *d++;
    if (c == ' ')
      *s++ = '_';
247
    else if (c >= 0x7f || c < 0x20 || c == '_' ||  strchr (MimeSpecials, c))
248 249 250 251 252 253 254 255
    {
      *s++ = '=';
      *s++ = hex[(c & 0xf0) >> 4];
      *s++ = hex[c & 0x0f];
    }
    else
      *s++ = c;
  }
256
  memcpy (s, "?=", 2), s += 2;
257 258 259 260 261 262 263 264 265 266 267 268
  return s - s0;
}

/*
 * Return 0 if and set *encoder and *wlen if the data (d, dlen) could
 * be converted to an encoded word of length *wlen using *encoder.
 * Otherwise return an upper bound on the maximum length of the data
 * which could be converted.
 * The data is converted from fromcode (which must be stateless) to
 * tocode, unless fromcode is 0, in which case the data is assumed to
 * be already in tocode, which should be 8-bit and stateless.
 */
269 270 271
static size_t try_block (ICONV_CONST char *d, size_t dlen,
			 const char *fromcode, const char *tocode,
			 encoder_t *encoder, size_t *wlen)
272 273 274
{
  char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
  iconv_t cd;
275 276
  ICONV_CONST char *ib;
  char *ob, *p;
277 278 279 280 281
  size_t ibl, obl;
  int count, len, len_b, len_q;

  if (fromcode)
  {
282 283 284 285 286
    cd = mutt_iconv_open (tocode, fromcode, 0);
    assert (cd != (iconv_t)(-1));
    ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - strlen (tocode);
    if (iconv (cd, &ib, &ibl, &ob, &obl) == (size_t)(-1) ||
	iconv (cd, 0, 0, &ob, &obl) == (size_t)(-1))
287
    {
288 289 290
      assert (errno == E2BIG);
      iconv_close (cd);
      assert (ib > d);
291 292
      return (ib - d == dlen) ? dlen : ib - d + 1;
    }
293
    iconv_close (cd);
294 295 296
  }
  else
  {
297 298 299
    if (dlen > sizeof (buf1) - strlen (tocode))
      return sizeof (buf1) - strlen (tocode) + 1;
    memcpy (buf1, d, dlen);
300 301 302 303 304 305 306
    ob = buf1 + dlen;
  }

  count = 0;
  for (p = buf1; p < ob; p++)
  {
    unsigned char c = *p;
307 308 309
    assert (strchr (MimeSpecials, '?'));
    if (c >= 0x7f || c < 0x20 || *p == '_' ||
	(c != ' ' && strchr (MimeSpecials, *p)))
310 311 312
      ++count;
  }

313
  len = ENCWORD_LEN_MIN - 2 + strlen (tocode);
314 315 316 317
  len_b = len + (((ob - buf1) + 2) / 3) * 4;
  len_q = len + (ob - buf1) + 2 * count;

  /* Apparently RFC 1468 says to use B encoding for iso-2022-jp. */
318
  if (!ascii_strcasecmp (tocode, "ISO-2022-JP"))
319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340
    len_q = ENCWORD_LEN_MAX + 1;

  if (len_b < len_q && len_b <= ENCWORD_LEN_MAX)
  {
    *encoder = b_encoder;
    *wlen = len_b;
    return 0;
  }
  else if (len_q <= ENCWORD_LEN_MAX)
  {
    *encoder = q_encoder;
    *wlen = len_q;
    return 0;
  }
  else
    return dlen;
}

/*
 * Encode the data (d, dlen) into s using the encoder.
 * Return the length of the encoded word.
 */
341 342 343
static size_t encode_block (char *s, char *d, size_t dlen,
			    const char *fromcode, const char *tocode,
			    encoder_t encoder)
344 345 346
{
  char buf1[ENCWORD_LEN_MAX - ENCWORD_LEN_MIN + 1];
  iconv_t cd;
347 348
  ICONV_CONST char *ib;
  char *ob;
349 350 351 352
  size_t ibl, obl, n1, n2;

  if (fromcode)
  {
353 354 355 356 357 358 359 360
    cd = mutt_iconv_open (tocode, fromcode, 0);
    assert (cd != (iconv_t)(-1));
    ib = d, ibl = dlen, ob = buf1, obl = sizeof (buf1) - strlen (tocode);
    n1 = iconv (cd, &ib, &ibl, &ob, &obl);
    n2 = iconv (cd, 0, 0, &ob, &obl);
    assert (n1 != (size_t)(-1) && n2 != (size_t)(-1));
    iconv_close (cd);
    return (*encoder) (s, buf1, ob - buf1, tocode);
361 362
  }
  else
363
    return (*encoder) (s, d, dlen, tocode);
364 365 366 367 368 369 370 371
}

/*
 * Discover how much of the data (d, dlen) can be converted into
 * a single encoded word. Return how much data can be converted,
 * and set the length *wlen of the encoded word and *encoder.
 * We start in column col, which limits the length of the word.
 */
372 373 374
static size_t choose_block (char *d, size_t dlen, int col,
			    const char *fromcode, const char *tocode,
			    encoder_t *encoder, size_t *wlen)
375 376
{
  size_t n, nn;
377
  int utf8 = fromcode && !ascii_strcasecmp (fromcode, "utf-8");
378 379 380 381

  n = dlen;
  for (;;)
  {
382 383
    assert (d + n > d);
    nn = try_block (d, n, fromcode, tocode, encoder, wlen);
384 385 386
    if (!nn && (col + *wlen <= ENCWORD_LEN_MAX + 1 || n <= 1))
      break;
    n = (nn ? nn : n) - 1;
387
    assert (n > 0);
388 389
    if (utf8)
      while (n > 1 && CONTINUATION_BYTE(d[n]))
390
	--n;
391 392 393 394 395 396 397 398 399 400 401 402 403 404
  }
  return n;
}

/*
 * Place the result of RFC-2047-encoding (d, dlen) into the dynamically
 * allocated buffer (e, elen). The input data is in charset fromcode
 * and is converted into a charset chosen from charsets.
 * Return 1 if the conversion to UTF-8 failed, 2 if conversion from UTF-8
 * failed, otherwise 0. If conversion failed, fromcode is assumed to be
 * compatible with us-ascii and the original data is used.
 * The input data is assumed to be a single line starting at column col;
 * if col is non-zero, the preceding character was a space.
 */
405 406 407
static int rfc2047_encode (ICONV_CONST char *d, size_t dlen, int col,
			   const char *fromcode, const char *charsets,
			   char **e, size_t *elen, char *specials)
408 409
{
  int ret = 0;
410
  char *buf;
411
  size_t bufpos, buflen;
412 413
  char *u = NULL, *t0, *t1, *t;
  char *s0, *s1;
414 415
  size_t ulen, r, n, wlen;
  encoder_t encoder;
416 417
  char *tocode1 = 0;
  const char *tocode;
418 419 420
  char *icode = "utf-8";

  /* Try to convert to UTF-8. */
421
  if (convert_string (d, dlen, fromcode, icode, &u, &ulen))
422
  {
423
    ret = 1; 
424
    icode = 0;
425 426
    safe_realloc (&u, (ulen = dlen) + 1);
    memcpy (u, d, dlen);
427 428 429 430 431 432 433
    u[ulen] = 0;
  }

  /* Find earliest and latest things we must encode. */
  s0 = s1 = t0 = t1 = 0;
  for (t = u; t < u + ulen; t++)
  {
434 435
    if ((*t & 0x80) || 
	(*t == '=' && t[1] == '?' && (t == u || HSPACE(*(t-1)))))
436
    {
437
      if (!t0) t0 = t;
438 439
      t1 = t;
    }
440
    else if (specials && *t && strchr (specials, *t))
441
    {
442
      if (!s0) s0 = t;
443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464
      s1 = t;
    }
  }

  /* If we have something to encode, include RFC822 specials */
  if (t0 && s0 && s0 < t0)
    t0 = s0;
  if (t1 && s1 && s1 > t1)
    t1 = s1;

  if (!t0)
  {
    /* No encoding is required. */
    *e = u;
    *elen = ulen;
    return ret;
  }

  /* Choose target charset. */
  tocode = fromcode;
  if (icode)
  {
465
    if ((tocode1 = mutt_choose_charset (icode, charsets, u, ulen, 0, 0)))
466 467 468 469 470 471
      tocode = tocode1;
    else
      ret = 2, icode = 0;
  }

  /* Hack to avoid labelling 8-bit data as us-ascii. */
472
  if (!icode && mutt_is_us_ascii (tocode))
473
    tocode = "unknown-8bit";
474
  
475 476
  /* Adjust t0 for maximum length of line. */
  t = u + (ENCWORD_LEN_MAX + 1) - col - ENCWORD_LEN_MIN;
477 478 479
  if (t < u)  t = u;
  if (t < t0) t0 = t;
  
480 481 482 483

  /* Adjust t0 until we can encode a character after a space. */
  for (; t0 > u; t0--)
  {
484
    if (!HSPACE(*(t0-1)))
485 486 487 488
      continue;
    t = t0 + 1;
    if (icode)
      while (t < u + ulen && CONTINUATION_BYTE(*t))
489 490 491
	++t;
    if (!try_block (t0, t - t0, icode, tocode, &encoder, &wlen) &&
	col + (t0 - u) + wlen <= ENCWORD_LEN_MAX + 1)
492 493 494 495 496 497 498 499 500 501 502
      break;
  }

  /* Adjust t1 until we can encode a character before a space. */
  for (; t1 < u + ulen; t1++)
  {
    if (!HSPACE(*t1))
      continue;
    t = t1 - 1;
    if (icode)
      while (CONTINUATION_BYTE(*t))
503 504 505
	--t;
    if (!try_block (t, t1 - t, icode, tocode, &encoder, &wlen) &&
	1 + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
506 507 508 509 510 511 512
      break;
  }

  /* We shall encode the region [t0,t1). */

  /* Initialise the output buffer with the us-ascii prefix. */
  buflen = 2 * ulen;
513
  buf = safe_malloc (buflen);
514
  bufpos = t0 - u;
515
  memcpy (buf, u, t0 - u);
516 517 518 519 520 521 522

  col += t0 - u;

  t = t0;
  for (;;)
  {
    /* Find how much we can encode. */
523
    n = choose_block (t, t1 - t, col, icode, tocode, &encoder, &wlen);
524 525 526 527
    if (n == t1 - t)
    {
      /* See if we can fit the us-ascii suffix, too. */
      if (col + wlen + (u + ulen - t1) <= ENCWORD_LEN_MAX + 1)
528
	break;
529 530
      n = t1 - t - 1;
      if (icode)
531 532 533
	while (CONTINUATION_BYTE(t[n]))
	  --n;
      assert (t + n >= t);
534 535
      if (!n)
      {
536 537 538 539 540 541 542 543 544
	/* This should only happen in the really stupid case where the
	   only word that needs encoding is one character long, but
	   there is too much us-ascii stuff after it to use a single
	   encoded word. We add the next word to the encoded region
	   and try again. */
	assert (t1 < u + ulen);
	for (t1++; t1 < u + ulen && !HSPACE(*t1); t1++)
	  ;
	continue;
545
      }
546
      n = choose_block (t, n, col, icode, tocode, &encoder, &wlen);
547 548
    }

549
    /* Add to output buffer. */
550
#define LINEBREAK "\n\t"
551
    if (bufpos + wlen + strlen (LINEBREAK) > buflen)
552
    {
553 554
      buflen = bufpos + wlen + strlen (LINEBREAK);
      safe_realloc (&buf, buflen);
555
    }
556 557
    r = encode_block (buf + bufpos, t, n, icode, tocode, encoder);
    assert (r == wlen);
558
    bufpos += wlen;
559 560
    memcpy (buf + bufpos, LINEBREAK, strlen (LINEBREAK));
    bufpos += strlen (LINEBREAK);
561 562 563 564 565 566 567 568 569
#undef LINEBREAK

    col = 1;

    t += n;
  }

  /* Add last encoded word and us-ascii suffix to buffer. */
  buflen = bufpos + wlen + (u + ulen - t1);
570 571 572
  safe_realloc (&buf, buflen + 1);
  r = encode_block (buf + bufpos, t, t1 - t, icode, tocode, encoder);
  assert (r == wlen);
573
  bufpos += wlen;
574
  memcpy (buf + bufpos, t1, u + ulen - t1);
575

576 577
  FREE (&tocode1);
  FREE (&u);
578 579

  buf[buflen] = '\0';
580
  
581 582 583 584 585
  *e = buf;
  *elen = buflen + 1;
  return ret;
}

586
void _rfc2047_encode_string (char **pd, int encode_specials, int col)
587
{
588
  char *e;
589
  size_t elen;
590
  char *charsets;
591 592 593 594 595 596 597 598

  if (!Charset || !*pd)
    return;

  charsets = SendCharset;
  if (!charsets || !*charsets)
    charsets = "utf-8";

599 600 601
  rfc2047_encode (*pd, strlen (*pd), col,
		  Charset, charsets, &e, &elen,
		  encode_specials ? RFC822Specials : NULL);
602

603
  FREE (pd);		/* __FREE_CHECKED__ */
604 605 606
  *pd = e;
}

607
void rfc2047_encode_adrlist (ADDRESS *addr, const char *tag)
608
{
609 610 611
  ADDRESS *ptr = addr;
  int col = tag ? strlen (tag) + 2 : 32;
  
612 613 614
  while (ptr)
  {
    if (ptr->personal)
615
      _rfc2047_encode_string (&ptr->personal, 1, col);
616
    else if (ptr->group && ptr->mailbox)
617
      _rfc2047_encode_string (&ptr->mailbox, 1, col);
618 619
#ifdef EXACT_ADDRESS
    if (ptr->val)
620
      _rfc2047_encode_string (&ptr->val, 1, col);
621 622 623 624 625
#endif
    ptr = ptr->next;
  }
}

626
static int rfc2047_decode_word (char *d, const char *s, size_t len)
627
{
628 629 630
  const char *pp, *pp1;
  char *pd, *d0;
  const char *t, *t1;
631 632
  int enc = 0, count = 0;
  char *charset = NULL;
633
  int rv = -1;
634

635
  pd = d0 = safe_malloc (strlen (s));
636

637
  for (pp = s; (pp1 = strchr (pp, '?')); pp = pp1 + 1)
638 639
  {
    count++;
640 641 642 643 644

    /* hack for non-compliant MUAs that allow unquoted question marks in encoded-text */
    if (count == 4)
    {
      while (pp1 && *(pp1 + 1) != '=')
645
	pp1 = strchr(pp1 + 1, '?');
646
      if (!pp1)
647
	  goto error_out_0;
648 649
    }

650 651 652
    switch (count)
    {
      case 2:
653 654 655 656 657 658
	/* ignore language specification a la RFC 2231 */        
	t = pp1;
        if ((t1 = memchr (pp, '*', t - pp)))
	  t = t1;
	charset = mutt_substrdup (pp, t);
	break;
659
      case 3:
660 661 662 663 664 665 666
	if (toupper ((unsigned char) *pp) == 'Q')
	  enc = ENCQUOTEDPRINTABLE;
	else if (toupper ((unsigned char) *pp) == 'B')
	  enc = ENCBASE64;
	else
	  goto error_out_0;
	break;
667
      case 4:
668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710
	if (enc == ENCQUOTEDPRINTABLE)
	{
	  for (; pp < pp1; pp++)
	  {
	    if (*pp == '_')
	      *pd++ = ' ';
	    else if (*pp == '=' &&
		     (!(pp[1] & ~127) && hexval(pp[1]) != -1) &&
		     (!(pp[2] & ~127) && hexval(pp[2]) != -1))
	    {
	      *pd++ = (hexval(pp[1]) << 4) | hexval(pp[2]);
	      pp += 2;
	    }
	    else
	      *pd++ = *pp;
	  }
	  *pd = 0;
	}
	else if (enc == ENCBASE64)
	{
	  int c, b = 0, k = 0;

	  for (; pp < pp1; pp++)
	  {
	    if (*pp == '=')
	      break;
	    if ((*pp & ~127) || (c = base64val(*pp)) == -1)
	      continue;
	    if (k + 6 >= 8)
	    {
	      k -= 2;
	      *pd++ = b | (c >> k);
	      b = c << (8 - k);
	    }
	    else
	    {
	      b |= c << (k + 2);
	      k += 6;
	    }
	  }
	  *pd = 0;
	}
	break;
711 712
    }
  }
713
  
714
  if (charset)
715 716 717
    mutt_convert_string (&d0, charset, Charset, MUTT_ICONV_HOOK_FROM);
  mutt_filter_unprintable (&d0);
  strfcpy (d, d0, len);
718 719
  rv = 0;
error_out_0:
720 721
  FREE (&charset);
  FREE (&d0);
722
  return rv;
723 724 725 726 727 728 729 730
}

/*
 * Find the start and end of the first encoded word in the string.
 * We use the grammar in section 2 of RFC 2047, but the "encoding"
 * must be B or Q. Also, we don't require the encoded word to be
 * separated by linear-white-space (section 5(1)).
 */
731
static const char *find_encoded_word (const char *s, const char **x)
732
{
733
  const char *p, *q;
734 735

  q = s;
736
  while ((p = strstr (q, "=?")))
737
  {
738 739 740
    for (q = p + 2;
	 0x20 < *q && *q < 0x7f && !strchr ("()<>@,;:\"/[]?.=", *q);
	 q++)
741
      ;
742
    if (q[0] != '?' || q[1] == '\0' || !strchr ("BbQq", q[1]) || q[2] != '?')
743
      continue;
744 745
    /* non-strict check since many MUAs will not encode spaces and question marks */
    for (q = q + 3; 0x20 <= *q && *q < 0x7f && (*q != '?' || q[1] != '='); q++)
746 747 748 749 750 751 752 753 754 755 756 757 758 759 760
      ;
    if (q[0] != '?' || q[1] != '=')
    {
      --q;
      continue;
    }

    *x = q + 2;
    return p;
  }

  return 0;
}

/* return length of linear-white-space */
761
static size_t lwslen (const char *s, size_t n)
762 763 764 765 766 767 768 769
{
  const char *p = s;
  size_t len = n;

  if (n <= 0)
    return 0;

  for (; p < s + n; p++)
770
    if (!strchr (" \t\r\n", *p))
771 772 773 774
    {
      len = (size_t)(p - s);
      break;
    }
775 776
  if (strchr ("\r\n", *(p-1))) /* LWS doesn't end with CRLF */
    len = (size_t)0;
777 778 779 780
  return len;
}

/* return length of linear-white-space : reverse */
781
static size_t lwsrlen (const char *s, size_t n)
782 783 784 785 786 787 788
{
  const char *p = s + n - 1;
  size_t len = n;

  if (n <= 0)
    return 0;

789 790
  if (strchr ("\r\n", *p)) /* LWS doesn't end with CRLF */
    return (size_t)0;
791 792

  for (; p >= s; p--)
793
    if (!strchr (" \t\r\n", *p))
794 795 796 797 798 799 800 801 802 803
    {
      len = (size_t)(s + n - 1 - p);
      break;
    }
  return len;
}

/* try to decode anything that looks like a valid RFC2047 encoded
 * header field, ignoring RFC822 parsing rules
 */
804
void rfc2047_decode (char **pd)
805
{
806
  const char *p, *q;
807 808
  size_t m, n;
  int found_encoded = 0;
809
  char *d0, *d;
810 811 812 813 814 815
  const char *s = *pd;
  size_t dlen;

  if (!s || !*s)
    return;

816 817
  dlen = 4 * strlen (s); /* should be enough */
  d = d0 = safe_malloc (dlen + 1);
818 819 820

  while (*s && dlen > 0)
  {
821
    if (!(p = find_encoded_word (s, &q)))
822 823
    {
      /* no encoded words */
824
      if (option (OPTIGNORELWS))
825
      {
826 827
        n = mutt_strlen (s);
        if (found_encoded && (m = lwslen (s, n)) != 0)
828 829 830 831 832 833 834 835
        {
          if (m != n)
            *d = ' ', d++, dlen--;
          s += m;
        }
      }
      if (AssumedCharset && *AssumedCharset)
      {
836 837 838 839 840 841 842 843 844 845 846 847
	char *t;
	size_t tlen;

	n = mutt_strlen (s);
	t = safe_malloc (n + 1);
	strfcpy (t, s, n + 1);
	convert_nonmime_string (&t);
	tlen = mutt_strlen (t);
	strncpy (d, t, tlen);
	d += tlen;
	FREE (&t);
	break;
848
      }
849
      strncpy (d, s, dlen);
850 851 852 853 854 855
      d += dlen;
      break;
    }

    if (p != s)
    {
856
      n = (size_t) (p - s);
857 858
      /* ignore spaces between encoded word
       * and linear-white-space between encoded word and *text */
859
      if (option (OPTIGNORELWS))
860
      {
861
        if (found_encoded && (m = lwslen (s, n)) != 0)
862 863 864 865 866 867
        {
          if (m != n)
            *d = ' ', d++, dlen--;
          n -= m, s += m;
        }

868
        if ((m = n - lwsrlen (s, n)) != 0)
869 870 871
        {
          if (m > dlen)
            m = dlen;
872
          memcpy (d, s, m);
873 874 875 876 877 878
          d += m;
          dlen -= m;
          if (m != n)
            *d = ' ', d++, dlen--;
        }
      }
879
      else if (!found_encoded || strspn (s, " \t\r\n") != n)
880
      {
881 882 883 884 885
	if (n > dlen)
	  n = dlen;
	memcpy (d, s, n);
	d += n;
	dlen -= n;
886 887 888
      }
    }

889
    if (rfc2047_decode_word (d, p, dlen) == -1)
890 891 892 893
    {
      /* could not decode word, fall back to displaying the raw string */
      strfcpy(d, p, dlen);
    }
894 895
    found_encoded = 1;
    s = q;
896
    n = mutt_strlen (d);
897 898 899 900 901
    dlen -= n;
    d += n;
  }
  *d = 0;

902
  FREE (pd);		/* __FREE_CHECKED__ */
903
  *pd = d0;
904
  mutt_str_adjust (pd);
905 906
}

907
void rfc2047_decode_adrlist (ADDRESS *a)
908 909 910
{
  while (a)
  {
911 912 913 914 915
    if (a->personal && ((strstr (a->personal, "=?") != NULL) || 
			(AssumedCharset && *AssumedCharset)))
      rfc2047_decode (&a->personal);
    else if (a->group && a->mailbox && (strstr (a->mailbox, "=?") != NULL))
      rfc2047_decode (&a->mailbox);
916
#ifdef EXACT_ADDRESS
917 918
    if (a->val && strstr (a->val, "=?") != NULL)
      rfc2047_decode (&a->val);
919 920 921 922
#endif
    a = a->next;
  }
}