Commit 0cb8e255 authored by ChangZhuo Chen's avatar ChangZhuo Chen

New upstream version 1.11.0

parent 06bb4afd
version 1.11.0
* fixes keypad decimal
* fixes emoji candidates
* support configurable opencc config
version 1.10.92
* fixes Enter handling
version 1.10.91
* support ime.register_trigger in lua extension
* support predicted candidates
* support emoji input
version 1.10.0
* bug fixes
......
......@@ -21,7 +21,7 @@
# if not 1, append datestamp to the version number.
m4_define([ibus_released], [1])
m4_define([ibus_major_version], [1])
m4_define([ibus_minor_version], [10])
m4_define([ibus_minor_version], [11])
m4_define([ibus_micro_version], [0])
m4_define(ibus_maybe_datestamp,
m4_esyscmd([if test x]ibus_released[ != x1; then date +.%Y%m%d | tr -d '\n\r'; fi]))
......@@ -68,7 +68,7 @@ if test -z "$SQLITE3"; then
fi
PKG_CHECK_MODULES(LIBPINYIN, [
libpinyin >= 1.9.91
libpinyin >= 2.2.1
], [enable_libpinyin=yes])
LIBPINYIN_DATADIR=`$PKG_CONFIG --variable=pkgdatadir libpinyin`
......
......@@ -47,7 +47,7 @@
</key>
<key name="dictionaries" type="s">
<default>''</default>
<summary>enable install updates</summary>
<summary>enable dictionaries</summary>
</key>
<key name="double-pinyin" type="b">
<default>false</default>
......@@ -177,6 +177,18 @@
<default>''</default>
<summary>Clean User Data</summary>
</key>
<key name="lua-converter" type="s">
<default>''</default>
<summary>Use Lua Converter</summary>
</key>
<key name="show-suggestion" type="b">
<default>false</default>
<summary>Show Suggestions</summary>
</key>
<key name="opencc-config" type="s">
<default>'s2t.json'</default>
<summary>The config file which should be used by OpenCC for traditional input. Please consult the OpenCC documentation for more information</summary>
</key>
</schema>
<schema path="/com/github/libpinyin/ibus-libpinyin/libbopomofo/" id="com.github.libpinyin.ibus-libpinyin.libbopomofo">
<key name="auxiliary-select-key-f" type="i">
......@@ -193,7 +205,7 @@
</key>
<key name="dictionaries" type="s">
<default>''</default>
<summary>enable install updates</summary>
<summary>enable dictionaries</summary>
</key>
<key name="dynamic-adjust" type="b">
<default>true</default>
......@@ -307,5 +319,13 @@
<default>1</default>
<summary>Sort candidate option</summary>
</key>
<key name="show-suggestion" type="b">
<default>false</default>
<summary>Show Suggestions</summary>
</key>
<key name="opencc-config" type="s">
<default>'s2t.json'</default>
<summary>The config file which should be used by OpenCC for traditional input. Please consult the OpenCC documentation for more information</summary>
</key>
</schema>
</schemalist>
......@@ -235,11 +235,20 @@ function query_zodiac(birthday)
error("Should never reach here")
end
function get_current_time()
return get_time("")
end
function get_today()
return get_date("")
end
------------
ime.register_command("sj", "get_time", "输入时间", "alpha", "输入可选时间,例如12:34")
ime.register_command("rq", "get_date", "输入日期", "alpha", "输入可选日期,例如2013-01-01")
ime.register_command("js", "compute", "计算模式", "none", "输入表达式,例如log(2)")
ime.register_command("xz", "query_zodiac", "查询星座", "none", "输入您的生日,例如12-3")
ime.register_trigger("get_current_time", "显示时间", {}, {'时间'})
ime.register_trigger("get_today", "显示日期", {}, {'日期'})
print("lua script loaded.")
......@@ -36,7 +36,9 @@ void print_interactive_help(){
printf("i \t\t\t - lists all commands.\n");
printf("i [COMMAND] \t\t - evaluates command without argument. \n");
printf("i [COMMAND] [ARGUMENT] \t evaluates command with argument. \n");
/* printf("g [TRIGGER_STRING] \t\t - tests a trigger string, fire trigger if hit.\n"); */
printf("g [TRIGGER_STRING] \t\t - tests a trigger string, fire trigger if hit.\n");
printf("c \t\t\t - lists all converters.\n");
printf("c [FUNCTION] [STRING] \t tests a converter function. \n");
printf("quit \t\t\t - quit the shell.\n");
printf("help \t\t\t - show this message.\n");
}
......@@ -52,6 +54,16 @@ void list_all_commands(IBusEnginePlugin * plugin){
printf("\n");
}
void list_all_converters(IBusEnginePlugin * plugin){
const GArray * converters = ibus_engine_plugin_get_available_converters(plugin);
size_t i;
for ( i = 0; i < converters->len; ++i ){
lua_converter_t * converter = &g_array_index(converters, lua_converter_t, i);
printf("%s %s >\t", converter->lua_function_name, converter->description);
}
printf("\n");
}
int print_lua_call_result(IBusEnginePlugin * plugin, size_t num){
if ( 1 == num ) {
const lua_command_candidate_t * result = ibus_engine_plugin_get_retval(plugin);
......@@ -90,6 +102,13 @@ int do_lua_call(IBusEnginePlugin * plugin, const char * command_name, const char
return 0;
}
int do_simple_lua_call(IBusEnginePlugin * plugin, const char * lua_function_name, const char * string){
int num = ibus_engine_plugin_call(plugin, lua_function_name, string);
printf("result: %s.\n", ibus_engine_plugin_get_first_result(plugin));
return 0;
}
int main(int argc, char * argv[]){
char * line = NULL;
size_t len = 0;
......@@ -126,16 +145,28 @@ int main(int argc, char * argv[]){
print_interactive_help();
if ( 0 == strcmp("i", strs[0]) )
list_all_commands(plugin);
if ( 0 == strcmp("c", strs[0]) )
list_all_converters(plugin);
break;
case 2:
if ( 0 == strcmp("i", strs[0]))
do_lua_call(plugin, strs[1], NULL);
if ( 0 == strcmp("g", strs[0]))
fprintf(stderr, "ime trigger unimplemented.");
if ( 0 == strcmp("g", strs[0])) {
const char * lua_function_name = NULL;
if (ibus_engine_plugin_match_input
(plugin, strs[1], &lua_function_name)) {
do_simple_lua_call(plugin, lua_function_name, strs[1]);
} else if (ibus_engine_plugin_match_candidate
(plugin, strs[1], &lua_function_name)) {
do_simple_lua_call(plugin, lua_function_name, strs[1]);
}
}
break;
case 3:
if ( 0 == strcmp("i", strs[0]))
do_lua_call(plugin, strs[1], strs[2]);
if ( 0 == strcmp("c", strs[0]))
do_simple_lua_call(plugin, strs[1], strs[2]);
break;
default:
fprintf(stderr, "wrong arguments.");
......
......@@ -241,44 +241,76 @@ static int ime_register_command(lua_State * L){
}
static int ime_register_trigger(lua_State * L){
const char * lua_function_name = luaL_checklstring(L, 1, NULL);
const char * description = luaL_checklstring(L, 2, NULL);
size_t num; size_t i;
fprintf(stderr, "TODO: ime_register_trigger unimplemented when called with %s (%s).\n", lua_function_name, description);
lua_trigger_t new_trigger;
luaL_checktype(L, 3, LUA_TTABLE);
memset(&new_trigger, 0, sizeof(new_trigger));
new_trigger.lua_function_name = luaL_checklstring(L, 1, NULL);
lua_getglobal(L, new_trigger.lua_function_name);
luaL_checktype(L, -1, LUA_TFUNCTION);
lua_pop(L, 1);
/* TODO: register_trigger with input_trigger_strings. */
new_trigger.description = luaL_checklstring(L, 2, NULL);
size_t num; gint i;
GPtrArray *array;
/* register_trigger with input_trigger_strings. */
array = g_ptr_array_new();
luaL_checktype(L, 3, LUA_TTABLE);
num = lua_objlen(L, 3);
for ( i = 0; i < num; ++i) {
lua_pushinteger(L, i + 1);
lua_gettable(L, 3);
fprintf(stderr, "%d:%s\t", (int)i + 1, lua_tostring(L, -1));
g_ptr_array_add(array, (gpointer)lua_tostring(L, -1));
lua_pop(L, 1);
}
fprintf(stderr, "\n");
g_ptr_array_add(array, NULL);
new_trigger.input_trigger_strings =
(gchar **)g_ptr_array_free(array, FALSE);
/* register_trigger with candidate_trigger_strings. */
array = g_ptr_array_new();
luaL_checktype(L, 4, LUA_TTABLE);
/* TODO: register_trigger with candidate_trigger_strings. */
num = lua_objlen(L, 4);
for ( i = 0; i < num; ++i) {
lua_pushinteger(L, i + 1);
lua_gettable(L, 4);
fprintf(stderr, "%d:%s\t", (int) i + 1, lua_tostring(L, -1));
g_ptr_array_add(array, (gpointer)lua_tostring(L, -1));
lua_pop(L, 1);
}
fprintf(stderr, "\n");
g_ptr_array_add(array, NULL);
new_trigger.candidate_trigger_strings =
(gchar **)g_ptr_array_free(array, FALSE);
gboolean result = ibus_engine_plugin_add_trigger
(lua_plugin_retrieve_plugin(L), &new_trigger);
g_free(new_trigger.input_trigger_strings);
g_free(new_trigger.candidate_trigger_strings);
if (!result)
return luaL_error(L, "register trigger with function %s failed.\n", new_trigger.lua_function_name);
return 0;
}
static int ime_register_converter(lua_State * L){
const char * lua_function_name = luaL_checklstring(L, 1, NULL);
const char * description = luaL_checklstring(L, 2, NULL);
lua_converter_t new_converter;
memset(&new_converter, 0, sizeof(new_converter));
new_converter.lua_function_name = luaL_checklstring(L, 1, NULL);
lua_getglobal(L, new_converter.lua_function_name);
luaL_checktype(L, -1, LUA_TFUNCTION);
lua_pop(L, 1);
fprintf(stderr, "TODO: ime_register_converter unimplemented when called with %s(%s).\n", lua_function_name, description);
new_converter.description = luaL_checklstring(L, 2, NULL);
return 0;
gboolean result = ibus_engine_plugin_add_converter
(lua_plugin_retrieve_plugin(L), &new_converter);
if (!result)
return luaL_error(L, "register converter with function %s failed.\n", new_converter.lua_function_name);
return 0;
}
static int ime_split_string(lua_State * L){
......@@ -413,9 +445,7 @@ static const luaL_Reg imelib[] = {
{"join_string", ime_join_string},
{"parse_mapping", ime_parse_mapping},
{"register_command", ime_register_command},
/* Note: the register_converter function is dropped for ibus-libpinyin. */
{"register_converter", ime_register_converter},
/* Note: the register_trigger function is dropped for ibus-libpinyin. */
{"register_trigger", ime_register_trigger},
{"split_string", ime_split_string},
{"trim_string_left", ime_trim_string_left},
......
......@@ -41,6 +41,9 @@
struct _IBusEnginePluginPrivate{
lua_State * L;
GArray * lua_commands; /* Array of lua_command_t. */
GArray * lua_triggers; /* Array of lua_trigger_t. */
GArray * lua_converters; /* Array of lua_converter_t. */
gchar * use_converter;
};
G_DEFINE_TYPE (IBusEnginePlugin, ibus_engine_plugin, G_TYPE_OBJECT);
......@@ -61,6 +64,30 @@ static void lua_command_reclaim(lua_command_t * command){
g_free((gpointer)command->help);
}
static void lua_trigger_clone(lua_trigger_t * trigger, lua_trigger_t * new_trigger){
new_trigger->lua_function_name = g_strdup(trigger->lua_function_name);
new_trigger->description = g_strdup(trigger->description);
new_trigger->input_trigger_strings = g_strdupv(trigger->input_trigger_strings);
new_trigger->candidate_trigger_strings = g_strdupv(trigger->candidate_trigger_strings);
}
static void lua_trigger_reclaim(lua_trigger_t * trigger){
g_free((gpointer)trigger->lua_function_name);
g_free((gpointer)trigger->description);
g_strfreev((gchar **)trigger->input_trigger_strings);
g_strfreev((gchar **)trigger->candidate_trigger_strings);
}
static void lua_converter_clone(lua_converter_t * converter, lua_converter_t * new_converter){
new_converter->lua_function_name = g_strdup(converter->lua_function_name);
new_converter->description = g_strdup(converter->description);
}
static void lua_converter_reclaim(lua_converter_t * converter){
g_free((gpointer)converter->lua_function_name);
g_free((gpointer)converter->description);
}
static int
lua_plugin_init(IBusEnginePluginPrivate * plugin){
g_assert(NULL == plugin->L);
......@@ -72,6 +99,14 @@ lua_plugin_init(IBusEnginePluginPrivate * plugin){
g_assert ( NULL == plugin->lua_commands );
plugin->lua_commands = g_array_new(TRUE, TRUE, sizeof(lua_command_t));
g_assert ( NULL == plugin->lua_triggers );
plugin->lua_triggers = g_array_new(TRUE, TRUE, sizeof(lua_trigger_t));
g_assert ( NULL == plugin->lua_converters );
plugin->lua_converters = g_array_new(TRUE, TRUE, sizeof(lua_converter_t));
plugin->use_converter = NULL;
return 0;
}
......@@ -79,6 +114,8 @@ static int
lua_plugin_fini(IBusEnginePluginPrivate * plugin){
size_t i;
lua_command_t * command;
lua_trigger_t * trigger;
lua_converter_t * converter;
if ( plugin->lua_commands ){
for ( i = 0; i < plugin->lua_commands->len; ++i){
......@@ -89,8 +126,30 @@ lua_plugin_fini(IBusEnginePluginPrivate * plugin){
plugin->lua_commands = NULL;
}
if ( plugin->lua_triggers ){
for ( i = 0; i < plugin->lua_triggers->len; ++i){
trigger = &g_array_index(plugin->lua_triggers, lua_trigger_t, i);
lua_trigger_reclaim(trigger);
}
g_array_free(plugin->lua_triggers, TRUE);
plugin->lua_triggers = NULL;
}
if ( plugin->lua_converters ){
for ( i = 0; i < plugin->lua_converters->len; ++i){
converter = &g_array_index(plugin->lua_converters, lua_converter_t, i);
lua_converter_reclaim(converter);
}
g_array_free(plugin->lua_converters, TRUE);
plugin->lua_converters = NULL;
}
lua_close(plugin->L);
plugin->L = NULL;
g_free(plugin->use_converter);
plugin->use_converter = NULL;
return 0;
}
......@@ -199,6 +258,101 @@ const GArray * ibus_engine_plugin_get_available_commands(IBusEnginePlugin * plug
return priv->lua_commands;
}
gboolean ibus_engine_plugin_add_trigger(IBusEnginePlugin * plugin, lua_trigger_t * trigger){
IBusEnginePluginPrivate * priv = IBUS_ENGINE_PLUGIN_GET_PRIVATE(plugin);
GArray * lua_triggers = priv->lua_triggers;
lua_trigger_t new_trigger;
lua_trigger_clone(trigger, &new_trigger);
g_array_append_val(lua_triggers, new_trigger);
return TRUE;
}
const GArray * ibus_engine_plugin_get_available_triggers(IBusEnginePlugin * plugin){
IBusEnginePluginPrivate * priv = IBUS_ENGINE_PLUGIN_GET_PRIVATE(plugin);
return priv->lua_triggers;
}
gboolean ibus_engine_plugin_match_input(IBusEnginePlugin * plugin, const char * input, const char ** lua_function_name){
IBusEnginePluginPrivate * priv = IBUS_ENGINE_PLUGIN_GET_PRIVATE(plugin);
GArray * lua_triggers = priv->lua_triggers;
gint i; gchar ** pattern;
for (i = 0; i < lua_triggers->len; ++i){
lua_trigger_t * trigger = &g_array_index(lua_triggers, lua_trigger_t, i);
for (pattern = trigger->input_trigger_strings; *pattern != NULL; ++pattern){
if (g_pattern_match_simple(*pattern, input)){
*lua_function_name = trigger->lua_function_name;
return TRUE;
}
}
}
return FALSE;
}
gboolean ibus_engine_plugin_match_candidate(IBusEnginePlugin * plugin, const char * candidate, const char ** lua_function_name){
IBusEnginePluginPrivate * priv = IBUS_ENGINE_PLUGIN_GET_PRIVATE(plugin);
GArray * lua_triggers = priv->lua_triggers;
gint i; gchar ** pattern;
for (i = 0; i < lua_triggers->len; ++i){
lua_trigger_t * trigger = &g_array_index(lua_triggers, lua_trigger_t, i);
for (pattern = trigger->candidate_trigger_strings; *pattern != NULL; ++pattern){
if (g_pattern_match_simple(*pattern, candidate)){
*lua_function_name = trigger->lua_function_name;
return TRUE;
}
}
}
return FALSE;
}
gboolean ibus_engine_plugin_add_converter(IBusEnginePlugin * plugin, lua_converter_t * converter){
IBusEnginePluginPrivate * priv = IBUS_ENGINE_PLUGIN_GET_PRIVATE(plugin);
GArray * lua_converters = priv->lua_converters;
lua_converter_t new_converter;
lua_converter_clone(converter, &new_converter);
g_array_append_val(lua_converters, new_converter);
return TRUE;
}
const GArray * ibus_engine_plugin_get_available_converters(IBusEnginePlugin * plugin){
IBusEnginePluginPrivate * priv = IBUS_ENGINE_PLUGIN_GET_PRIVATE(plugin);
return priv->lua_converters;
}
gboolean ibus_engine_plugin_set_converter(IBusEnginePlugin * plugin, const char * lua_function_name){
IBusEnginePluginPrivate * priv = IBUS_ENGINE_PLUGIN_GET_PRIVATE(plugin);
GArray * lua_converters = priv->lua_converters;
gint i;
for (i = 0; i < lua_converters->len; ++i) {
lua_converter_t * converter = &g_array_index
(lua_converters, lua_converter_t, i);
if (g_strcmp0 (converter->lua_function_name, lua_function_name) == 0) {
g_free(priv->use_converter);
priv->use_converter = g_strdup(lua_function_name);
return TRUE;
}
}
return FALSE;
}
const char * ibus_engine_plugin_get_converter(IBusEnginePlugin * plugin){
IBusEnginePluginPrivate * priv = IBUS_ENGINE_PLUGIN_GET_PRIVATE(plugin);
return priv->use_converter;
}
int ibus_engine_plugin_call(IBusEnginePlugin * plugin, const char * lua_function_name, const char * argument /*optional, maybe NULL.*/){
IBusEnginePluginPrivate * priv = IBUS_ENGINE_PLUGIN_GET_PRIVATE(plugin);
int type; int result;
......@@ -259,6 +413,30 @@ static const lua_command_candidate_t * ibus_engine_plugin_get_candidate(lua_Stat
return candidate;
}
/**
* retrieve the first string value. (value has been copied.)
*/
gchar * ibus_engine_plugin_get_first_result(IBusEnginePlugin * plugin){
IBusEnginePluginPrivate * priv = IBUS_ENGINE_PLUGIN_GET_PRIVATE(plugin);
const char * result = NULL; int type;
lua_State * L = priv->L;
type = lua_type(L ,-1);
if ( LUA_TNUMBER == type || LUA_TBOOLEAN == type || LUA_TSTRING == type) {
result = g_strdup(lua_tostring(L, -1));
lua_pop(L, 1);
} else if( LUA_TTABLE == type ){
lua_pushinteger(L, 1);
lua_gettable(L, -2);
int type = lua_type(L, -1);
if ( LUA_TNUMBER == type || LUA_TBOOLEAN == type || LUA_TSTRING == type )
result = g_strdup(lua_tostring(L, -1));
lua_pop(L, 2);
}
return (const char *)result;
}
/**
* retrieve the retval string value. (value has been copied.)
*/
......
......@@ -23,6 +23,10 @@
#ifndef LUA_PLUGIN_H
#define LUA_PLUGIN_H
#include <glib.h>
G_BEGIN_DECLS
#include <lua.h>
#include <lualib.h>
#include <lauxlib.h>
......@@ -52,13 +56,15 @@ typedef struct _lua_command_candidate_t{
typedef struct _lua_trigger_t{
const char * lua_function_name;
const char * description;
/*< private, skip it, and register it into Special Table directly with * wildcard. >*/
/*
* list of input_trigger_strings;
* list of candidate_trigger_strings;
*/
gchar **input_trigger_strings;
gchar **candidate_trigger_strings;
} lua_trigger_t;
typedef struct _lua_converter_t{
const char * lua_function_name;
const char * description;
} lua_converter_t;
/*
* Type macros.
*/
......@@ -109,10 +115,52 @@ gboolean ibus_engine_plugin_add_command(IBusEnginePlugin * plugin, lua_command_t
/**
* retrieve all available lua plugin commands.
* return array of command informations of type lua_command_t without copies.
* return array of command information of type lua_command_t without copies.
*/
const GArray * ibus_engine_plugin_get_available_commands(IBusEnginePlugin * plugin);
/**
* add a lua_trigger_t to plugin.
*/
gboolean ibus_engine_plugin_add_trigger(IBusEnginePlugin * plugin, lua_trigger_t * trigger);
/**
* retrieve all available lua plugin triggers.
* return array of trigger information of type lua_trigger_t without copies.
*/
const GArray * ibus_engine_plugin_get_available_triggers(IBusEnginePlugin * plugin);
/**
* retrieve the lua function name of the matched input for lua_trigger_t.
*/
gboolean ibus_engine_plugin_match_input(IBusEnginePlugin * plugin, const char * input, const char ** lua_function_name);
/**
* retrieve the lua function name of the matched candidate for lua_trigger_t.
*/
gboolean ibus_engine_plugin_match_candidate(IBusEnginePlugin * plugin, const char * candidate, const char ** lua_function_name);
/**
* add a lua_converter_t to plugin.
*/
gboolean ibus_engine_plugin_add_converter(IBusEnginePlugin * plugin, lua_converter_t * converter);
/**
* retrieve all available lua plugin converters.
* return array of converter information of type lua_converter_t without copies.
*/
const GArray * ibus_engine_plugin_get_available_converters(IBusEnginePlugin * plugin);
/**
* set the converter with the lua function name.
*/
gboolean ibus_engine_plugin_set_converter(IBusEnginePlugin * plugin, const char * lua_function_name);
/**
* get the converter with the lua function name.
*/
const char * ibus_engine_plugin_get_converter(IBusEnginePlugin * plugin);
/**
* Lookup a special command in ime lua extension.
* command must be an 2-char long string.
......@@ -127,6 +175,11 @@ const lua_command_t * ibus_engine_plugin_lookup_command(IBusEnginePlugin * plugi
*/
int ibus_engine_plugin_call(IBusEnginePlugin * plugin, const char * lua_function_name, const char * argument /*optional, maybe NULL.*/);
/**
* retrieve the first string value. (value has been copied.)
*/
gchar * ibus_engine_plugin_get_first_result(IBusEnginePlugin * plugin);
/**
* retrieve the retval string value. (value has been copied.)
*/
......@@ -138,4 +191,7 @@ const lua_command_candidate_t * ibus_engine_plugin_get_retval(IBusEnginePlugin *
GArray * ibus_engine_plugin_get_retvals(IBusEnginePlugin * plugin);
void ibus_engine_plugin_free_candidate(lua_command_candidate_t * candidate);
G_END_DECLS
#endif
This diff is collapsed.
This diff is collapsed.
......@@ -510,7 +510,7 @@ msgstr "模糊音"
#: ../setup/ibus-libpinyin-preferences.ui.h:63
msgid "Tips: this changes may take effects after ime restarted."
msgstr "提示:此更改可能需要重智能拼音输入法后生效"
msgstr "提示:此更改可能需要重智能拼音输入法后生效"
#: ../setup/ibus-libpinyin-preferences.ui.h:64
msgid "<b>Dictionary option</b>"
......
#ifndef __PY_LIB_PINYIN_EMOJI_TABLE_H
#define __PY_LIB_PINYIN_EMOJI_TABLE_H
namespace PY{
typedef struct {
const char * m_emoji_match;
const char * m_emoji_string;
} EmojiItem;
const EmojiItem english_emoji_table[] = {
@ENGLISH_EMOJIS@
};
const EmojiItem chinese_emoji_table[] = {
@CHINESE_EMOJIS@
};
};
#endif
#!/usr/bin/python3
import os
import operator
from argparse import ArgumentParser
import xml.etree.ElementTree as ET
header = '''/* This file is generated by python scripts. Don't edit this file directly.
*/
'''
alphabet = "abcdefghijklmnopqrstuvwxyz"
eng_emojis = []
chs_emojis = []
def load_emoji(filename):
tree = ET.parse(filename)
root = tree.getroot()
emojis = {}
for annotation in root.findall('.//annotation'):
for word in annotation.text.split('|'):
word = word.strip()
# only keep the first encountered emoji
if not word in emojis:
# print(annotation.get('cp'))
emojis[word] = annotation.get('cp')
return emojis
# no space allowed for English emoji
def filter_English_emoji(emojis):
emojis_copy = {}
for key, value in emojis.items():
if ' ' in key:
continue
if len(key) > 6:
continue
# only accept alphabet
isalphabet = True
for c in key:
if not c in alphabet:
isalphabet = False
if not isalphabet:
continue
#print(key, value)
emojis_copy[key] = value
return emojis_copy
# less than four characters for Chinese emoji
def filter_Chinese_emoji(emojis):
emojis_copy = {}
for key, value in emojis.items():
if len(key) > 2:
continue
# just reject alphabet
isalnum = False
for c in key:
if c.isdigit() or c in alphabet or c in alphabet.upper():