make_charset_table.c 2.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
/* make_charset_table.c
 * sample program to generate tables for charsets.c using iconv
 *
 * public domain
 */

#include <stdio.h>
#include <stdint.h>
#include <errno.h>
#include <iconv.h>

#define UNREPL 0xFFFD

int main(int argc, char **argv) {
	/* for now only UCS-2 */
	uint16_t table[0x100];

	iconv_t conv;
	const char *charset;
	int i, j;

	/* 0x00 ... 0x7F same as ASCII? */
	int ascii_based = 1;
	/* 0x00 ... 0x9F same as ISO? */
	int iso_based = 1;

	if (argc != 2) {
		printf("usage: %s <charset>\n", argv[0]);
		return 1;
	}

	charset = argv[1];

	conv = iconv_open("UCS-2", charset);
	if (conv == (iconv_t) -1) {
		perror("iconv_open");
		return 2;
	}
	iconv_close(conv);

	for (i = 0x00; i < 0x100; i++) {
		unsigned char in[1], out[2];
		size_t inlen = 1, outlen = 2;

		char *inbuf = (char *) in;
		char *outbuf = (char *) out;

		size_t ret;

		in[0] = i;

		conv = iconv_open("UCS-2BE", charset);

		if (conv == (iconv_t) -1) {
			/* shouldn't fail now */
			perror("iconv_open");
			return 2;
		}

		ret = iconv(conv, &inbuf, &inlen, &outbuf, &outlen);

		if (ret == (size_t) -1 && errno == EILSEQ) {
			table[i] = UNREPL;
			iconv_close(conv);
			continue;
		}

		if (ret == (size_t) -1) {
			perror("iconv");
			iconv_close(conv);
			return 4;
		}

		iconv_close(conv);

		if (ret != 0 || inlen != 0 || outlen != 0) {
77
			fprintf(stderr, "%d: smth went wrong: %zu %zu %zu\n", i, ret, inlen, outlen);
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
			return 3;
		}

		if (i < 0x80 && (out[0] != 0 || out[1] != i))
			ascii_based = 0;

		if (i < 0xA0 && (out[0] != 0 || out[1] != i))
			iso_based = 0;
			
		table[i] = (out[0] << 8) | out[1];
	}

	/* iso_based not supported */
	iso_based = 0;

	printf("/* generated by %s %s */\n", argv[0], charset);

	if (iso_based)
		i = 0xA0;
	else if (ascii_based)
		i = 0x80;
	else
		i = 0;

	printf("const gunichar2 charset_table_%s[0x%x] = {\n", charset, 0x100 - i);
	while (i < 0x100) {
		int start = i;

		printf("    ");

		for (j = 0; j < 8; j++, i++) {
			if (table[i] == UNREPL)
				printf("UNREPL, ");
			else
				printf("0x%.4x, ", table[i]);
		}

		if ((start & 0xf) == 0)
			printf("       /* 0x%.2X -      */", start);
		else
			printf("       /*      - 0x%.2X */", i - 1);

		printf("\n");
	}
	printf("};\n");

	return 0;
}