gucharmap-unicode-info.c 17.9 KB
Newer Older
1
/*
Christian Persch's avatar
Christian Persch committed
2
 * Copyright © 2004 Noah Levitt
3 4 5 6 7 8 9 10 11 12 13 14 15
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the
 * Free Software Foundation; either version 2 of the License, or (at your
 * option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
16
 * 59 Temple Place, Suite 330, Boston, MA 02110-1301  USA
17 18
 */

Christian Persch's avatar
Christian Persch committed
19
#include <config.h>
20

21
#include <gtk/gtk.h>
22
#include <string.h>
Noah Levitt's avatar
Noah Levitt committed
23
#include "gucharmap-unicode-info.h"
24
#include "gucharmap-private.h"
25

Noah Levitt's avatar
Noah Levitt committed
26 27 28 29
#include "unicode-names.h"
#include "unicode-blocks.h"
#include "unicode-nameslist.h"
#include "unicode-categories.h"
30
#include "unicode-versions.h"
Noah Levitt's avatar
Noah Levitt committed
31 32 33 34
#if ENABLE_UNIHAN
# include "unicode-unihan.h"
#endif

35 36 37 38 39 40 41 42
/* constants for hangul (de)composition, see UAX #15 */
#define SBase 0xAC00
#define LCount 19
#define VCount 21
#define TCount 28
#define NCount (VCount * TCount)
#define SCount (LCount * NCount)

43
static const gchar JAMO_L_TABLE[][4] = {
44
  "G", "GG", "N", "D", "DD", "R", "M", "B", "BB",
45
  "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H"
46 47
};

48
static const gchar JAMO_V_TABLE[][4] = {
49 50 51 52 53
  "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O",
  "WA", "WAE", "OE", "YO", "U", "WEO", "WE", "WI",
  "YU", "EU", "YI", "I"
};

54
static const gchar JAMO_T_TABLE[][4] = {
55 56 57
  "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM",
  "LB", "LS", "LT", "LP", "LH", "M", "B", "BS",
  "S", "SS", "NG", "J", "C", "K", "T", "P", "H"
58 59
};

60
G_CONST_RETURN gchar *
Noah Levitt's avatar
Noah Levitt committed
61
gucharmap_get_unicode_name (gunichar wc)
62
{
63
  static gchar buf[32];
64

65 66
  _gucharmap_intl_ensure_initialized ();

67
  if ((wc >= 0x3400 && wc <= 0x4DB5)
Noah Levitt's avatar
Noah Levitt committed
68 69 70 71 72 73 74 75 76 77 78
      || (wc >= 0x4e00 && wc <= 0x9fa5) 
      || (wc >= 0x20000 && wc <= 0x2A6D6))
    {
      g_snprintf (buf, sizeof (buf), "CJK UNIFIED IDEOGRAPH-%04X", wc);
      return buf;
    }
  else if (wc >= 0xac00 && wc <= 0xd7af)
    {
      /* compute hangul syllable name as per UAX #15 */
      gint SIndex = wc - SBase;
      gint LIndex, VIndex, TIndex;
79

Noah Levitt's avatar
Noah Levitt committed
80 81
      if (SIndex < 0 || SIndex >= SCount)
        return "";
82

Noah Levitt's avatar
Noah Levitt committed
83 84 85
      LIndex = SIndex / NCount;
      VIndex = (SIndex % NCount) / TCount;
      TIndex = SIndex % TCount;
86

Noah Levitt's avatar
Noah Levitt committed
87 88
      g_snprintf (buf, sizeof (buf), "HANGUL SYLLABLE %s%s%s", 
                  JAMO_L_TABLE[LIndex], JAMO_V_TABLE[VIndex], JAMO_T_TABLE[TIndex]);
89

Noah Levitt's avatar
Noah Levitt committed
90 91 92
      return buf;
    }
  else if (wc >= 0xD800 && wc <= 0xDB7F) 
93
    return _("<Non Private Use High Surrogate>");
Noah Levitt's avatar
Noah Levitt committed
94
  else if (wc >= 0xDB80 && wc <= 0xDBFF) 
95
    return _("<Private Use High Surrogate>");
Noah Levitt's avatar
Noah Levitt committed
96 97 98
  else if (wc >= 0xDC00 && wc <= 0xDFFF)
    return _("<Low Surrogate>");
  else if (wc >= 0xE000 && wc <= 0xF8FF) 
99
    return _("<Private Use>");
Noah Levitt's avatar
Noah Levitt committed
100
  else if (wc >= 0xF0000 && wc <= 0xFFFFD)
101
    return _("<Plane 15 Private Use>");
Noah Levitt's avatar
Noah Levitt committed
102
  else if (wc >= 0x100000 && wc <= 0x10FFFD)
103
    return _("<Plane 16 Private Use>");
104
  else
105
    {
Noah Levitt's avatar
Noah Levitt committed
106
      const gchar *x = gucharmap_get_unicode_data_name (wc);
107
      if (x == NULL)
108
        return _("<not assigned>");
109 110 111
      else
        return x;
    }
112 113
}

114
G_CONST_RETURN gchar *
Noah Levitt's avatar
Noah Levitt committed
115
gucharmap_get_unicode_category_name (gunichar wc)
116
{
117 118
  _gucharmap_intl_ensure_initialized ();

Noah Levitt's avatar
Noah Levitt committed
119
  switch (gucharmap_unichar_type (wc))
120
    {
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
      case G_UNICODE_CONTROL: return _("Other, Control");
      case G_UNICODE_FORMAT: return _("Other, Format");
      case G_UNICODE_UNASSIGNED: return _("Other, Not Assigned");
      case G_UNICODE_PRIVATE_USE: return _("Other, Private Use");
      case G_UNICODE_SURROGATE: return _("Other, Surrogate");
      case G_UNICODE_LOWERCASE_LETTER: return _("Letter, Lowercase");
      case G_UNICODE_MODIFIER_LETTER: return _("Letter, Modifier");
      case G_UNICODE_OTHER_LETTER: return _("Letter, Other");
      case G_UNICODE_TITLECASE_LETTER: return _("Letter, Titlecase");
      case G_UNICODE_UPPERCASE_LETTER: return _("Letter, Uppercase");
      case G_UNICODE_COMBINING_MARK: return _("Mark, Spacing Combining");
      case G_UNICODE_ENCLOSING_MARK: return _("Mark, Enclosing");
      case G_UNICODE_NON_SPACING_MARK: return _("Mark, Non-Spacing");
      case G_UNICODE_DECIMAL_NUMBER: return _("Number, Decimal Digit");
      case G_UNICODE_LETTER_NUMBER: return _("Number, Letter");
      case G_UNICODE_OTHER_NUMBER: return _("Number, Other");
      case G_UNICODE_CONNECT_PUNCTUATION: return _("Punctuation, Connector");
      case G_UNICODE_DASH_PUNCTUATION: return _("Punctuation, Dash");
      case G_UNICODE_CLOSE_PUNCTUATION: return _("Punctuation, Close");
      case G_UNICODE_FINAL_PUNCTUATION: return _("Punctuation, Final Quote");
      case G_UNICODE_INITIAL_PUNCTUATION: return _("Punctuation, Initial Quote");
      case G_UNICODE_OTHER_PUNCTUATION: return _("Punctuation, Other");
      case G_UNICODE_OPEN_PUNCTUATION: return _("Punctuation, Open");
      case G_UNICODE_CURRENCY_SYMBOL: return _("Symbol, Currency");
      case G_UNICODE_MODIFIER_SYMBOL: return _("Symbol, Modifier");
      case G_UNICODE_MATH_SYMBOL: return _("Symbol, Math");
      case G_UNICODE_OTHER_SYMBOL: return _("Symbol, Other");
      case G_UNICODE_LINE_SEPARATOR: return _("Separator, Line");
      case G_UNICODE_PARAGRAPH_SEPARATOR: return _("Separator, Paragraph");
      case G_UNICODE_SPACE_SEPARATOR: return _("Separator, Space");
151 152
      default: return "";
    }
153
}
154

Noah Levitt's avatar
Noah Levitt committed
155
/* does a binary search on unicode_names */
156
G_CONST_RETURN gchar *
157
gucharmap_get_unicode_data_name (gunichar uc)
158 159 160
{
  gint min = 0;
  gint mid;
161
  gint max = G_N_ELEMENTS(unicode_names) - 1;
162

Noah Levitt's avatar
Noah Levitt committed
163
  if (uc < unicode_names[0].index || uc > unicode_names[max].index)
164 165 166 167 168
    return "";

  while (max >= min) 
    {
      mid = (min + max) / 2;
Noah Levitt's avatar
Noah Levitt committed
169
      if (uc > unicode_names[mid].index)
170
        min = mid + 1;
Noah Levitt's avatar
Noah Levitt committed
171
      else if (uc < unicode_names[mid].index)
172 173
        max = mid - 1;
      else
174
        return unicode_name_get_name(&unicode_names[mid]);
175 176 177 178 179
    }

  return NULL;
}

Noah Levitt's avatar
Noah Levitt committed
180
gint
181
gucharmap_get_unicode_data_name_count (void)
Noah Levitt's avatar
Noah Levitt committed
182 183 184 185
{
  return G_N_ELEMENTS (unicode_names);
}

186 187 188 189 190 191 192 193 194
/* does a binary search on unicode_versions */
GucharmapUnicodeVersion
gucharmap_get_unicode_version (gunichar uc)
{
  gint min = 0;
  gint mid;
  gint max = G_N_ELEMENTS (unicode_versions) - 1;

  if (uc < unicode_versions[0].start || uc > unicode_versions[max].end)
195
    return GUCHARMAP_UNICODE_VERSION_UNASSIGNED;
196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217

  while (max >= min)
    {
      mid = (min + max) / 2;

      if (uc > unicode_versions[mid].end)
        min = mid + 1;
      else if (uc < unicode_versions[mid].start)
        max = mid - 1;
      else if ((uc >= unicode_versions[mid].start) && (uc <= unicode_versions[mid].end))
        return unicode_versions[mid].version;
    }

  return GUCHARMAP_UNICODE_VERSION_UNASSIGNED;
}

G_CONST_RETURN gchar *
gucharmap_unicode_version_to_string (GucharmapUnicodeVersion version)
{
  g_return_val_if_fail (version >= GUCHARMAP_UNICODE_VERSION_UNASSIGNED &&
                        version <= GUCHARMAP_UNICODE_VERSION_LATEST, NULL);

218 219 220 221
  if (G_UNLIKELY (version == GUCHARMAP_UNICODE_VERSION_UNASSIGNED))
    return NULL;

  return unicode_version_strings + unicode_version_string_offsets[version - 1];
222 223
}

Noah Levitt's avatar
Noah Levitt committed
224 225
#if ENABLE_UNIHAN

Noah Levitt's avatar
Noah Levitt committed
226
gint
227
gucharmap_get_unihan_count (void)
Noah Levitt's avatar
Noah Levitt committed
228 229 230 231
{
  return G_N_ELEMENTS (unihan);
}

232 233
/* does a binary search; also caches most recent, since it will often be
 * called in succession on the same character */
234
static G_CONST_RETURN Unihan *
235 236 237 238 239 240
_get_unihan (gunichar uc)
{
  static gunichar most_recent_searched;
  static const Unihan *most_recent_result;
  gint min = 0;
  gint mid;
241 242
  gint max = G_N_ELEMENTS(unihan) - 1;

243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271

  if (uc < unihan[0].index || uc > unihan[max].index)
    return NULL;

  if (uc == most_recent_searched)
    return most_recent_result;

  most_recent_searched = uc;

  while (max >= min) 
    {
      mid = (min + max) / 2;
      if (uc > unihan[mid].index)
        min = mid + 1;
      else if (uc < unihan[mid].index)
        max = mid - 1;
      else
        {
          most_recent_result = unihan + mid;
          return unihan + mid;
        }
    }

  most_recent_result = NULL;
  return NULL;
}

#else /* #if ENABLE_UNIHAN */

Noah Levitt's avatar
Noah Levitt committed
272 273 274 275 276 277
gint
gucharmap_get_unihan_count ()
{
  return 0;
}

278
G_CONST_RETURN gchar * 
279
gucharmap_get_unicode_kDefinition (gunichar uc)
280 281 282 283
{
  return "This feature was not compiled in.";
}

284
G_CONST_RETURN gchar * 
285
gucharmap_get_unicode_kCantonese (gunichar uc)
286 287 288 289
{
  return "This feature was not compiled in.";
}

290
G_CONST_RETURN gchar * 
291
gucharmap_get_unicode_kMandarin (gunichar uc)
292 293 294 295
{
  return "This feature was not compiled in.";
}

296
G_CONST_RETURN gchar * 
297
gucharmap_get_unicode_kTang (gunichar uc)
298 299 300 301
{
  return "This feature was not compiled in.";
}

302
G_CONST_RETURN gchar * 
303
gucharmap_get_unicode_kKorean (gunichar uc)
304 305 306 307
{
  return "This feature was not compiled in.";
}

308
G_CONST_RETURN gchar * 
309
gucharmap_get_unicode_kJapaneseKun (gunichar uc)
310 311 312 313
{
  return "This feature was not compiled in.";
}

314
G_CONST_RETURN gchar * 
315
gucharmap_get_unicode_kJapaneseOn (gunichar uc)
316 317 318 319 320 321
{
  return "This feature was not compiled in.";
}

#endif /* #else (#if ENABLE_UNIHAN) */

322 323
/* does a binary search; also caches most recent, since it will often be
 * called in succession on the same character */
324
static G_CONST_RETURN NamesList *
325 326 327 328 329 330
get_nameslist (gunichar uc)
{
  static gunichar most_recent_searched;
  static const NamesList *most_recent_result;
  gint min = 0;
  gint mid;
331
  gint max = G_N_ELEMENTS (names_list) - 1;
332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358

  if (uc < names_list[0].index || uc > names_list[max].index)
    return NULL;

  if (uc == most_recent_searched)
    return most_recent_result;

  most_recent_searched = uc;

  while (max >= min) 
    {
      mid = (min + max) / 2;
      if (uc > names_list[mid].index)
        min = mid + 1;
      else if (uc < names_list[mid].index)
        max = mid - 1;
      else
        {
          most_recent_result = names_list + mid;
          return names_list + mid;
        }
    }

  most_recent_result = NULL;
  return NULL;
}

359
G_GNUC_INTERNAL gboolean
Noah Levitt's avatar
Noah Levitt committed
360 361 362 363 364
_gucharmap_unicode_has_nameslist_entry (gunichar uc)
{
  return get_nameslist (uc) != NULL;
}

365 366
/* returns newly allocated array of gunichar terminated with -1 */
gunichar *
367
gucharmap_get_nameslist_exes (gunichar uc)
368 369 370
{
  const NamesList *nl;
  gunichar *exes;
371
  gunichar i, count;
372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
  
  nl = get_nameslist (uc);

  if (nl == NULL || nl->exes_index == -1)
    return NULL;

  /* count the number of exes */
  for (i = 0;  names_list_exes[nl->exes_index + i].index == uc;  i++);
  count = i;

  exes = g_malloc ((count + 1) * sizeof (gunichar));
  for (i = 0;  i < count;  i++)
    exes[i] = names_list_exes[nl->exes_index + i].value;
  exes[count] = (gunichar)(-1);

  return exes;
}

/* returns newly allocated null-terminated array of gchar* */
/* the items are const, but the array should be freed by the caller */
392
G_CONST_RETURN gchar **
393
gucharmap_get_nameslist_equals (gunichar uc)
394 395 396
{
  const NamesList *nl;
  const gchar **equals;
397 398
  gunichar i, count;

399 400 401 402 403 404 405 406 407 408 409
  nl = get_nameslist (uc);

  if (nl == NULL || nl->equals_index == -1)
    return NULL;

  /* count the number of equals */
  for (i = 0;  names_list_equals[nl->equals_index + i].index == uc;  i++);
  count = i;

  equals = g_malloc ((count + 1) * sizeof (gchar *));
  for (i = 0;  i < count;  i++)
410
    equals[i] = names_list_equals_strings + names_list_equals[nl->equals_index + i].string_index;
411 412 413 414 415 416 417
  equals[count] = NULL;

  return equals;
}

/* returns newly allocated null-terminated array of gchar* */
/* the items are const, but the array should be freed by the caller */
418
G_CONST_RETURN gchar **
419
gucharmap_get_nameslist_stars (gunichar uc)
420 421 422
{
  const NamesList *nl;
  const gchar **stars;
423
  gunichar i, count;
424 425 426 427 428 429 430 431 432 433 434 435

  nl = get_nameslist (uc);

  if (nl == NULL || nl->stars_index == -1)
    return NULL;

  /* count the number of stars */
  for (i = 0;  names_list_stars[nl->stars_index + i].index == uc;  i++);
  count = i;

  stars = g_malloc ((count + 1) * sizeof (gchar *));
  for (i = 0;  i < count;  i++)
436
    stars[i] = names_list_stars_strings + names_list_stars[nl->stars_index + i].string_index;
437 438 439 440 441 442 443
  stars[count] = NULL;

  return stars;
}

/* returns newly allocated null-terminated array of gchar* */
/* the items are const, but the array should be freed by the caller */
444
G_CONST_RETURN gchar **
445
gucharmap_get_nameslist_pounds (gunichar uc)
446 447 448
{
  const NamesList *nl;
  const gchar **pounds;
449
  gunichar i, count;
450 451 452 453 454 455 456 457 458 459 460 461
  
  nl = get_nameslist (uc);

  if (nl == NULL || nl->pounds_index == -1)
    return NULL;

  /* count the number of pounds */
  for (i = 0;  names_list_pounds[nl->pounds_index + i].index == uc;  i++);
  count = i;

  pounds = g_malloc ((count + 1) * sizeof (gchar *));
  for (i = 0;  i < count;  i++)
462
    pounds[i] = names_list_pounds_strings + names_list_pounds[nl->pounds_index + i].string_index;
463 464 465 466 467
  pounds[count] = NULL;

  return pounds;
}

468 469
/* returns newly allocated null-terminated array of gchar* */
/* the items are const, but the array should be freed by the caller */
470
G_CONST_RETURN gchar **
471
gucharmap_get_nameslist_colons (gunichar uc)
472 473 474
{
  const NamesList *nl;
  const gchar **colons;
475 476
  gunichar i, count;

477 478 479 480 481 482 483 484 485 486 487
  nl = get_nameslist (uc);

  if (nl == NULL || nl->colons_index == -1)
    return NULL;

  /* count the number of colons */
  for (i = 0;  names_list_colons[nl->colons_index + i].index == uc;  i++);
  count = i;

  colons = g_malloc ((count + 1) * sizeof (gchar *));
  for (i = 0;  i < count;  i++)
488
    colons[i] = names_list_colons_strings + names_list_colons[nl->colons_index + i].string_index;
489 490 491 492 493
  colons[count] = NULL;

  return colons;
}

494
/* Wrapper, in case we want to support a newer unicode version than glib */
495
gboolean
496
gucharmap_unichar_validate (gunichar ch)
497
{
498
  return g_unichar_validate (ch);
499 500 501
}

/**
502
 * gucharmap_unichar_to_printable_utf8
503 504 505 506 507 508 509 510 511 512 513 514
 * @uc: a unicode character 
 * @outbuf: output buffer, must have at least 10 bytes of space.
 *          If %NULL, the length will be computed and returned
 *          and nothing will be written to @outbuf.
 *
 * Converts a single character to UTF-8 suitable for rendering. Check the
 * source to see what this means. ;-)
 * 
 *
 * Return value: number of bytes written
 **/
gint
515
gucharmap_unichar_to_printable_utf8 (gunichar uc, gchar *outbuf)
516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531
{
  /* Unicode Standard 3.2, section 2.6, "By convention, diacritical marks
   * used by the Unicode Standard may be exhibited in (apparent) isolation
   * by applying them to U+0020 SPACE or to U+00A0 NO BREAK SPACE." */

  /* 17:10 < owen> noah: I'm *not* claiming that what Pango does currently
   *               is right, but convention isn't a requirement. I think
   *               it's probably better to do the Uniscribe thing and put
   *               the lone combining mark on a dummy character and require
   *               ZWJ
   * 17:11 < noah> owen: do you mean that i should put a ZWJ in there, or
   *               that pango will do that?
   * 17:11 < owen> noah: I mean, you should (assuming some future more
   *               capable version of Pango) put it in there
   */

532 533
  if (! gucharmap_unichar_validate (uc) || (! gucharmap_unichar_isgraph (uc) 
      && gucharmap_unichar_type (uc) != G_UNICODE_PRIVATE_USE))
534
    return 0;
535 536 537
  else if (gucharmap_unichar_type (uc) == G_UNICODE_COMBINING_MARK
      || gucharmap_unichar_type (uc) == G_UNICODE_ENCLOSING_MARK
      || gucharmap_unichar_type (uc) == G_UNICODE_NON_SPACING_MARK)
538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554
    {
      gint x;

      outbuf[0] = ' ';
      outbuf[1] = '\xe2'; /* ZERO */ 
      outbuf[2] = '\x80'; /* WIDTH */
      outbuf[3] = '\x8d'; /* JOINER (0x200D) */

      x = g_unichar_to_utf8 (uc, outbuf + 4);

      return x + 4;
    }
  else
    return g_unichar_to_utf8 (uc, outbuf);
}

/**
555
 * gucharmap_unichar_type:
556 557 558 559 560 561 562
 * @c: a Unicode character
 * 
 * Classifies a Unicode character by type.
 * 
 * Return value: the type of the character.
 **/
GUnicodeType
563
gucharmap_unichar_type (gunichar uc)
564 565 566 567 568
{
  gint min = 0;
  gint mid;
  gint max = sizeof (unicode_categories) / sizeof (UnicodeCategory) - 1;

Noah Levitt's avatar
Noah Levitt committed
569
  if (uc < unicode_categories[0].start || uc > unicode_categories[max].end)
570 571 572 573 574
    return G_UNICODE_UNASSIGNED;

  while (max >= min) 
    {
      mid = (min + max) / 2;
Noah Levitt's avatar
Noah Levitt committed
575
      if (uc > unicode_categories[mid].end)
576
        min = mid + 1;
Noah Levitt's avatar
Noah Levitt committed
577
      else if (uc < unicode_categories[mid].start)
578 579 580 581 582 583 584 585 586
        max = mid - 1;
      else
        return unicode_categories[mid].category;
    }

  return G_UNICODE_UNASSIGNED;
}

/**
587
 * gucharmap_unichar_isdefined:
588 589 590 591 592 593 594 595
 * @uc: a Unicode character
 * 
 * Determines if a given character is assigned in the Unicode
 * standard.
 *
 * Return value: %TRUE if the character has an assigned value
 **/
gboolean
596
gucharmap_unichar_isdefined (gunichar uc)
597
{
598
  return gucharmap_unichar_type (uc) != G_UNICODE_UNASSIGNED;
599 600 601
}

/**
602
 * gucharmap_unichar_isgraph:
603 604 605 606 607 608 609 610 611 612 613
 * @uc: a Unicode character
 * 
 * Determines whether a character is printable and not a space
 * (returns %FALSE for control characters, format characters, and
 * spaces). g_unichar_isprint() is similar, but returns %TRUE for
 * spaces. Given some UTF-8 text, obtain a character value with
 * g_utf8_get_char().
 * 
 * Return value: %TRUE if @c is printable unless it's a space
 **/
gboolean
614
gucharmap_unichar_isgraph (gunichar uc)
615
{
616
  GUnicodeType t = gucharmap_unichar_type (uc);
617 618 619 620 621 622 623 624 625

  return (t != G_UNICODE_CONTROL
          && t != G_UNICODE_FORMAT
          && t != G_UNICODE_UNASSIGNED
          && t != G_UNICODE_PRIVATE_USE
          && t != G_UNICODE_SURROGATE
          && t != G_UNICODE_SPACE_SEPARATOR);
}

626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657
static gunichar
get_first_non_underscore_char (const char *str)
{
  const char *p;

  if (!str)
    return 0;

  for (p = str; p && *p; p = g_utf8_find_next_char (p, NULL))
    {
      gunichar ch;

      ch = g_utf8_get_char (p);
      if (g_unichar_isalpha (ch))
        return ch;
    }

  return 0;
}

/**
 * gucharmap_unicode_get_locale_character:
 *
 * Determines a character that's commonly used in the current
 * locale's script.
 * 
 * Returns: a unicode character
 */
gunichar
gucharmap_unicode_get_locale_character (void)
{
  GtkStockItem item;
658
  if (!gtk_stock_lookup (GTK_STOCK_FIND, &item))
659 660 661 662
    return 0;

  return get_first_non_underscore_char (item.label);
}