Commit f52795ea authored by Peter Stephenson's avatar Peter Stephenson

36415: remap bytes from invalid multibyte characters.

These now go to 0xdc00 + index.  If wchar_t is a Unicode code point,
this is by construction an invalid character within the Unicode range.
If it isn't, we would hope the result was no worse than the current
fudge.
parent 32f5d3d8
2015-09-04 Peter Stephenson <p.stephenson@samsung.com>
* 36415: Src/pattern.c, Test/D07multibyte.ztst: remap bytes from
invalid multibyte characters to 0xDC00 + index which is invalid
in Unicode. Strictly this only works if whcar_t is
ISO-10646-compliant, however it ought to be at least as good as
the current fudge in any case.
2015-09-03 Peter Stephenson <p.stephenson@samsung.com>
* 36416: Src/Zle/zle_refresh.c, Src/Zle/zle_utils.c: If
......
......@@ -224,6 +224,22 @@ typedef zlong zrange_t;
typedef unsigned long zrange_t;
#endif
#ifdef MULTIBYTE_SUPPORT
/*
* Handle a byte that's not part of a valid character.
*
* This range in Unicode is recommended for purposes of this
* kind as it corresponds to invalid characters.
*
* Note that this strictly only works if wchar_t represents
* Unicode code points, which isn't necessarily true; however,
* converting an invalid character into an unknown format is
* a bit tricky...
*/
#define WCHAR_INVALID(ch) \
((wchar_t) (0xDC00 + STOUC(ch)))
#endif /* MULTIBYTE_SUPPORT */
/*
* Array of characters corresponding to zpc_chars enum, which it must match.
*/
......@@ -353,10 +369,10 @@ metacharinc(char **x)
return wc;
}
/* Error. Treat as single byte. */
/* Error. */
/* Reset the shift state for next time. */
memset(&shiftstate, 0, sizeof(shiftstate));
return (wchar_t) STOUC(*(*x)++);
return WCHAR_INVALID(*(*x)++);
}
#else
......@@ -1867,10 +1883,10 @@ charref(char *x, char *y)
ret = mbrtowc(&wc, x, y-x, &shiftstate);
if (ret == MB_INVALID || ret == MB_INCOMPLETE) {
/* Error. Treat as single byte. */
/* Error. */
/* Reset the shift state for next time. */
memset(&shiftstate, 0, sizeof(shiftstate));
return (wchar_t) STOUC(*x);
return WCHAR_INVALID(*x);
}
return wc;
......@@ -1913,7 +1929,7 @@ charrefinc(char **x, char *y, int *z)
size_t ret;
if (!(patglobflags & GF_MULTIBYTE) || !(STOUC(**x) & 0x80))
return (wchar_t) STOUC(*(*x)++);
return WCHAR_INVALID(*(*x)++);
ret = mbrtowc(&wc, *x, y-*x, &shiftstate);
......@@ -1922,7 +1938,7 @@ charrefinc(char **x, char *y, int *z)
*z = 1;
/* Reset the shift state for next time. */
memset(&shiftstate, 0, sizeof(shiftstate));
return (wchar_t) STOUC(*(*x)++);
return WCHAR_INVALID(*(*x)++);
}
/* Nulls here are normal characters */
......
......@@ -508,3 +508,20 @@
cd ..
}
0:cd with special characters
test_array=(
'[[ \xcc = \xcc ]]'
'[[ \xcc != \xcd ]]'
'[[ \xcc != \ucc ]]'
'[[ \ucc = \ucc ]]'
'[[ \ucc = [\ucc] ]]'
'[[ \xcc != [\ucc] ]]'
# Not clear how useful the following is...
'[[ \xcc = [\xcc] ]]'
)
for test in $test_array; do
if ! eval ${(g::)test} ; then
print -rl "Test $test failed" >&2
fi
done
0:Invalid characters in pattern matching
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment