Commit 432e9521 authored by Christian Persch's avatar Christian Persch

Add unicode version info. Bug #347941, based on a patch by Petr Tomasek.

svn path=/trunk/; revision=1686
parent b0565329
......@@ -25,22 +25,40 @@ BUILT_SOURCES = gucharmap-marshal.c gucharmap-marshal.h
lib_LTLIBRARIES = libgucharmap.la
libgucharmap_la_SOURCES = \
gucharmap-block-chapters-model.c gucharmap-block-chapters-model.h \
gucharmap-chapters-model.c gucharmap-chapters-model.h \
gucharmap-chapters-view.c gucharmap-chapters-view.h \
gucharmap-charmap.c gucharmap-charmap.h \
gucharmap-chartable-accessible.c gucharmap-chartable-accessible.h \
gucharmap-chartable-cell-accessible.c gucharmap-chartable-cell-accessible.h \
gucharmap-chartable.c gucharmap-chartable.h gucharmap-chartable-private.h \
gucharmap-codepoint-list.c gucharmap-codepoint-list.h \
gucharmap-init.c gucharmap-init.h \
gucharmap-marshal.c gucharmap-marshal.h \
gucharmap-block-chapters-model.c \
gucharmap-block-chapters-model.h \
gucharmap-chapters-model.c \
gucharmap-chapters-model.h \
gucharmap-chapters-view.c \
gucharmap-chapters-view.h \
gucharmap-charmap.c \
gucharmap-charmap.h \
gucharmap-chartable-accessible.c \
gucharmap-chartable-accessible.h \
gucharmap-chartable-cell-accessible.c \
gucharmap-chartable-cell-accessible.h \
gucharmap-chartable.c \
gucharmap-chartable.h \
gucharmap-codepoint-list.c \
gucharmap-codepoint-list.h \
gucharmap-init.c \
gucharmap-init.h \
gucharmap-marshal.c \
gucharmap-marshal.h \
gucharmap-private.h \
gucharmap-script-chapters-model.c gucharmap-script-chapters-model.h \
gucharmap-script-codepoint-list.c gucharmap-script-codepoint-list.h \
gucharmap-unicode-info.c gucharmap-unicode-info.h \
unicode-blocks.h unicode-categories.h unicode-names.h \
unicode-nameslist.h unicode-scripts.h unicode-unihan.h \
gucharmap-script-chapters-model.c \
gucharmap-script-chapters-model.h \
gucharmap-script-codepoint-list.c \
gucharmap-script-codepoint-list.h \
gucharmap-unicode-info.c \
gucharmap-unicode-info.h \
unicode-blocks.h \
unicode-categories.h \
unicode-names.h \
unicode-nameslist.h \
unicode-scripts.h \
unicode-unihan.h \
unicode-versions.h \
$(NULL)
libgucharmap_la_CPPFLAGS = \
......
......@@ -8,9 +8,10 @@
# - NamesList.txt
# - Blocks.txt
# - Scripts.txt
# - DerivedAge.txt
#
FILES='UnicodeData.txt Unihan.zip NamesList.txt Blocks.txt Scripts.txt'
FILES='UnicodeData.txt Unihan.zip NamesList.txt Blocks.txt Scripts.txt DerivedAge.txt'
mkdir -p ${1:-unicode}
......
......@@ -9,6 +9,7 @@
# - unicode-unihan.h
# - unicode-categories.h
# - unicode-scripts.h
# - unicode-versions.h
#
# usage: ./gen-guch-unicode-tables.pl UNICODE-VERSION DIRECTORY
# where DIRECTORY contains UnicodeData.txt Unihan.zip NamesList.txt Blocks.txt Scripts.txt
......@@ -26,6 +27,7 @@ sub process_unihan_zip ($);
sub process_nameslist_txt ($);
sub process_blocks_txt ($);
sub process_scripts_txt ($);
sub process_versions_txt ($);
$| = 1; # flush stdout buffer
......@@ -44,7 +46,7 @@ which can be found at http://www.unicode.org/Public/UNIDATA/
EOF
}
my ($unicodedata_txt, $unihan_zip, $nameslist_txt, $blocks_txt, $scripts_txt);
my ($unicodedata_txt, $unihan_zip, $nameslist_txt, $blocks_txt, $scripts_txt, $versions_txt);
my $v = $ARGV[0];
my $d = $ARGV[1];
......@@ -56,6 +58,7 @@ for my $f (readdir ($dir))
$nameslist_txt = "$d/$f" if ($f =~ /NamesList.*\.txt/);
$blocks_txt = "$d/$f" if ($f =~ /Blocks.*\.txt/);
$scripts_txt = "$d/$f" if ($f =~ /Scripts.*\.txt/);
$versions_txt = "$d/$f" if ($f =~ /DerivedAge.*\.txt/);
}
defined $unicodedata_txt or die "Did not find $d/UnicodeData.txt";
......@@ -63,11 +66,13 @@ defined $unihan_zip or die "Did not find $d/Unihan.zip";
defined $nameslist_txt or die "Did not find $d/NamesList.txt";
defined $blocks_txt or die "Did not find $d/Blocks.txt";
defined $scripts_txt or die "Did not find $d/Scripts.txt";
defined $versions_txt or die "Did not find $d/DerivedAge.txt";
process_unicode_data_txt ($unicodedata_txt);
process_nameslist_txt ($nameslist_txt);
process_blocks_txt ($blocks_txt);
process_scripts_txt ($scripts_txt);
process_versions_txt ($versions_txt);
process_unihan_zip ($unihan_zip);
exit;
......@@ -858,3 +863,102 @@ sub process_scripts_txt ($)
close ($out);
print " done.\n";
}
#------------------------#
sub process_versions_txt ($)
{
my ($versions_txt) = @_;
my %version_hash;
my %versions;
open (my $versions, $versions_txt) or die;
open (my $out, "> unicode-versions.h") or die;
print "processing $versions_txt...";
while (my $line = <$versions>)
{
my ($start, $end, $raw_version);
if ($line =~ /^([0-9A-F]+)\.\.([0-9A-F]+)\s+;\s+(\S+)/)
{
$start = hex ($1);
$end = hex ($2);
$raw_version = $3;
}
elsif ($line =~ /^([0-9A-F]+)\s+;\s+(\S+)/)
{
$start = hex ($1);
$end = $start;
$raw_version = $2;
}
else
{
next;
}
my $version = $raw_version;
$version =~ tr/_/ /;
$version =~ s/(\w+)/\u\L$1/g;
$versions{$version} = 1;
$version =~ s/\./_/g;
$version_hash{$start} = { 'end' => $end, 'version' => $version };
}
close ($versions);
print $out "/* unicode-versions.h */\n";
print $out "/* THIS IS A GENERATED FILE. CHANGES WILL BE OVERWRITTEN. */\n";
print $out "/* Generated by $0 */\n";
print $out "/* Generated from UCD version $v */\n\n";
print $out "#ifndef UNICODE_VERSIONS_H\n";
print $out "#define UNICODE_VERSIONS_H\n\n";
print $out "#include <glib/gunicode.h>\n";
print $out "#include <glib/gi18n-lib.h>\n\n";
print $out "typedef struct {\n";
print $out " gunichar start;\n";
print $out " gunichar end;\n";
print $out " GucharmapUnicodeVersion version;\n";
print $out "} UnicodeVersion;\n\n";
print $out "static const UnicodeVersion unicode_versions[] =\n";
print $out "{\n";
for my $start (sort { $a <=> $b } keys %version_hash)
{
printf $out (qq/ { 0x%04X, 0x%04X, GUCHARMAP_UNICODE_VERSION_\%s },\n/,
$start, $version_hash{$start}->{'end'}, $version_hash{$start}->{'version'});
}
print $out "};\n\n";
print $out "static const gchar unicode_version_strings[] =\n";
my $offset = 0;
my %version_offsets;
for my $version (sort keys %versions)
{
printf $out (qq/ "\%s\\0"\n/, $version);
$version_offsets{$version} = $offset;
$offset += length($version) + 1;
}
print $out " ;\n\n";
undef $offset;
print $out "static const guint16 unicode_version_string_offsets[] =\n";
print $out "{\n";
for my $version (sort keys %versions)
{
printf $out (qq/ \%d,\n/, $version_offsets{$version});
}
print $out "};\n\n";
print $out "#endif /* #ifndef UNICODE_VERSIONS_H */\n";
close ($out);
print " done.\n";
}
......@@ -422,6 +422,7 @@ set_details (GucharmapCharmap *charmap,
const gchar **csarr;
gunichar *ucs;
gunichar2 *utf16;
GucharmapUnicodeVersion version;
buffer = gtk_text_view_get_buffer (charmap->details_view);
gtk_text_buffer_set_text (buffer, "", 0);
......@@ -449,6 +450,13 @@ set_details (GucharmapCharmap *charmap,
insert_heading (charmap, buffer, &iter, _("General Character Properties"));
/* Unicode version */
version = gucharmap_get_unicode_version (uc);
if (version)
insert_vanilla_detail (charmap, buffer, &iter,
_("In Unicode since:"),
gucharmap_unicode_version_to_string (version));
/* character category */
insert_vanilla_detail (charmap, buffer, &iter, _("Unicode category:"),
gucharmap_get_unicode_category_name (uc));
......
......@@ -27,6 +27,7 @@
#include "unicode-blocks.h"
#include "unicode-nameslist.h"
#include "unicode-categories.h"
#include "unicode-versions.h"
#if ENABLE_UNIHAN
# include "unicode-unihan.h"
#endif
......@@ -178,6 +179,41 @@ gucharmap_get_unicode_data_name_count (void)
return G_N_ELEMENTS (unicode_names);
}
/* does a binary search on unicode_versions */
GucharmapUnicodeVersion
gucharmap_get_unicode_version (gunichar uc)
{
gint min = 0;
gint mid;
gint max = G_N_ELEMENTS (unicode_versions) - 1;
if (uc < unicode_versions[0].start || uc > unicode_versions[max].end)
return GUCHARMAP_UNICODE_VERSION_UNASSIGNED;;
while (max >= min)
{
mid = (min + max) / 2;
if (uc > unicode_versions[mid].end)
min = mid + 1;
else if (uc < unicode_versions[mid].start)
max = mid - 1;
else if ((uc >= unicode_versions[mid].start) && (uc <= unicode_versions[mid].end))
return unicode_versions[mid].version;
}
return GUCHARMAP_UNICODE_VERSION_UNASSIGNED;
}
G_CONST_RETURN gchar *
gucharmap_unicode_version_to_string (GucharmapUnicodeVersion version)
{
g_return_val_if_fail (version >= GUCHARMAP_UNICODE_VERSION_UNASSIGNED &&
version <= GUCHARMAP_UNICODE_VERSION_LATEST, NULL);
return unicode_version_strings + unicode_version_string_offsets[version];
}
#if ENABLE_UNIHAN
gint
......
......@@ -27,10 +27,25 @@
G_BEGIN_DECLS
typedef enum {
GUCHARMAP_UNICODE_VERSION_UNASSIGNED,
GUCHARMAP_UNICODE_VERSION_1_1,
GUCHARMAP_UNICODE_VERSION_2_0,
GUCHARMAP_UNICODE_VERSION_2_1,
GUCHARMAP_UNICODE_VERSION_3_0,
GUCHARMAP_UNICODE_VERSION_3_1,
GUCHARMAP_UNICODE_VERSION_3_2,
GUCHARMAP_UNICODE_VERSION_4_0,
GUCHARMAP_UNICODE_VERSION_4_1,
GUCHARMAP_UNICODE_VERSION_5_0,
GUCHARMAP_UNICODE_VERSION_LATEST = GUCHARMAP_UNICODE_VERSION_5_0 /* private, will move forward with each revision */
} GucharmapUnicodeVersion;
/* return values are read-only */
G_CONST_RETURN gchar * gucharmap_get_unicode_name (gunichar uc);
G_CONST_RETURN gchar * gucharmap_get_unicode_data_name (gunichar uc);
gint gucharmap_get_unicode_data_name_count (void);
GucharmapUnicodeVersion gucharmap_get_unicode_version (gunichar wc);
G_CONST_RETURN gchar * gucharmap_get_unicode_category_name (gunichar uc);
gint gucharmap_get_unihan_count (void);
G_CONST_RETURN gchar * gucharmap_get_unicode_kDefinition (gunichar uc);
......@@ -58,6 +73,8 @@ gboolean gucharmap_unichar_isgraph (gunichar wc)
G_CONST_RETURN gchar ** gucharmap_unicode_list_scripts (void);
G_CONST_RETURN gchar * gucharmap_unicode_get_script_for_char (gunichar wc);
G_CONST_RETURN gchar * gucharmap_unicode_version_to_string (GucharmapUnicodeVersion version);
/* doesn't really belong here, but no better place was available */
gunichar gucharmap_unicode_get_locale_character (void);
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment