Commit 7610d223 authored by Kartik Mistry's avatar Kartik Mistry 🇮🇳

Update upstream source from tag 'upstream/1.25.22'

Update to upstream version '1.25.22'
with Debian dir ae2096debc7adbbd6836ea228a2a6501a9f1bb7e
parents efadcbbb 898efe04
......@@ -758,5 +758,6 @@ dist_man5_MANS = doc/man/recoll.conf.5
dist-hook:
(cd $(top_srcdir); find . \
\( -name '*.pyc' -o -name '#*' -o -name '*~' \) -delete)
test -z "`git status -s | grep -v '??' | grep -v Makefile.am`"
git tag -a RECOLL-$(VERSION) -m "Release $(VERSION) tagged"
test -z "`git status -s | grep -v '??' | grep -v Makefile.am`"
vers=`echo $(VERSION) | sed -e 's/~/_/g'`;\
git tag -a RECOLL-$$vers -m "Release $$vers tagged"
# Makefile.in generated by automake 1.15 from Makefile.am.
# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
# Copyright (C) 1994-2014 Free Software Foundation, Inc.
# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
......@@ -2075,7 +2075,7 @@ distdir: $(DISTFILES)
! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \
|| chmod -R a+r "$(distdir)"
dist-gzip: distdir
tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz
tardir=$(distdir) && $(am__tar) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).tar.gz
$(am__post_remove_distdir)
dist-bzip2: distdir
......@@ -2101,7 +2101,7 @@ dist-shar: distdir
@echo WARNING: "Support for shar distribution archives is" \
"deprecated." >&2
@echo WARNING: "It will be removed altogether in Automake 2.0" >&2
shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz
shar $(distdir) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).shar.gz
$(am__post_remove_distdir)
dist-zip: distdir
......@@ -2119,7 +2119,7 @@ dist dist-all:
distcheck: dist
case '$(DIST_ARCHIVES)' in \
*.tar.gz*) \
GZIP=$(GZIP_ENV) gzip -dc $(distdir).tar.gz | $(am__untar) ;;\
eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).tar.gz | $(am__untar) ;;\
*.tar.bz2*) \
bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\
*.tar.lz*) \
......@@ -2129,7 +2129,7 @@ distcheck: dist
*.tar.Z*) \
uncompress -c $(distdir).tar.Z | $(am__untar) ;;\
*.shar.gz*) \
GZIP=$(GZIP_ENV) gzip -dc $(distdir).shar.gz | unshar ;;\
eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).shar.gz | unshar ;;\
*.zip*) \
unzip $(distdir).zip ;;\
esac
......@@ -2466,8 +2466,9 @@ install-data-hook:
dist-hook:
(cd $(top_srcdir); find . \
\( -name '*.pyc' -o -name '#*' -o -name '*~' \) -delete)
test -z "`git status -s | grep -v '??' | grep -v Makefile.am`"
git tag -a RECOLL-$(VERSION) -m "Release $(VERSION) tagged"
test -z "`git status -s | grep -v '??' | grep -v Makefile.am`"
vers=`echo $(VERSION) | sed -e 's/~/_/g'`;\
git tag -a RECOLL-$$vers -m "Release $$vers tagged"
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
......
# generated automatically by aclocal 1.15 -*- Autoconf -*-
# generated automatically by aclocal 1.15.1 -*- Autoconf -*-
# Copyright (C) 1996-2014 Free Software Foundation, Inc.
# Copyright (C) 1996-2017 Free Software Foundation, Inc.
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
......@@ -1143,7 +1143,7 @@ sixtyfour bits
test -n "$acl_libdirstem2" || acl_libdirstem2="$acl_libdirstem"
])
# Copyright (C) 2002-2014 Free Software Foundation, Inc.
# Copyright (C) 2002-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
......@@ -1158,7 +1158,7 @@ AC_DEFUN([AM_AUTOMAKE_VERSION],
[am__api_version='1.15'
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
dnl require some minimum version. Point them to the right macro.
m4_if([$1], [1.15], [],
m4_if([$1], [1.15.1], [],
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
])
......@@ -1174,14 +1174,14 @@ m4_define([_AM_AUTOCONF_VERSION], [])
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
[AM_AUTOMAKE_VERSION([1.15])dnl
[AM_AUTOMAKE_VERSION([1.15.1])dnl
m4_ifndef([AC_AUTOCONF_VERSION],
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
# AM_AUX_DIR_EXPAND -*- Autoconf -*-
# Copyright (C) 2001-2014 Free Software Foundation, Inc.
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
......@@ -1233,7 +1233,7 @@ am_aux_dir=`cd "$ac_aux_dir" && pwd`
# AM_CONDITIONAL -*- Autoconf -*-
# Copyright (C) 1997-2014 Free Software Foundation, Inc.
# Copyright (C) 1997-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
......@@ -1264,7 +1264,7 @@ AC_CONFIG_COMMANDS_PRE(
Usually this means the macro was only invoked conditionally.]])
fi])])
# Copyright (C) 1999-2014 Free Software Foundation, Inc.
# Copyright (C) 1999-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
......@@ -1455,7 +1455,7 @@ _AM_SUBST_NOTMAKE([am__nodep])dnl
# Generate code to set up dependency tracking. -*- Autoconf -*-
# Copyright (C) 1999-2014 Free Software Foundation, Inc.
# Copyright (C) 1999-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
......@@ -1531,7 +1531,7 @@ AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
# Do all the work for Automake. -*- Autoconf -*-
# Copyright (C) 1996-2014 Free Software Foundation, Inc.
# Copyright (C) 1996-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
......@@ -1728,7 +1728,7 @@ for _am_header in $config_headers :; do
done
echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
# Copyright (C) 2001-2014 Free Software Foundation, Inc.
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
......@@ -1749,7 +1749,7 @@ if test x"${install_sh+set}" != xset; then
fi
AC_SUBST([install_sh])])
# Copyright (C) 2003-2014 Free Software Foundation, Inc.
# Copyright (C) 2003-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
......@@ -1770,7 +1770,7 @@ AC_SUBST([am__leading_dot])])
# Check to see how 'make' treats includes. -*- Autoconf -*-
# Copyright (C) 2001-2014 Free Software Foundation, Inc.
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
......@@ -1820,7 +1820,7 @@ rm -f confinc confmf
# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*-
# Copyright (C) 1997-2014 Free Software Foundation, Inc.
# Copyright (C) 1997-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
......@@ -1859,7 +1859,7 @@ fi
# Helper functions for option handling. -*- Autoconf -*-
# Copyright (C) 2001-2014 Free Software Foundation, Inc.
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
......@@ -1888,7 +1888,7 @@ AC_DEFUN([_AM_SET_OPTIONS],
AC_DEFUN([_AM_IF_OPTION],
[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
# Copyright (C) 1999-2014 Free Software Foundation, Inc.
# Copyright (C) 1999-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
......@@ -1935,7 +1935,7 @@ AC_LANG_POP([C])])
# For backward compatibility.
AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])])
# Copyright (C) 2001-2014 Free Software Foundation, Inc.
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
......@@ -1954,7 +1954,7 @@ AC_DEFUN([AM_RUN_LOG],
# Check to make sure that the build environment is sane. -*- Autoconf -*-
# Copyright (C) 1996-2014 Free Software Foundation, Inc.
# Copyright (C) 1996-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
......@@ -2035,7 +2035,7 @@ AC_CONFIG_COMMANDS_PRE(
rm -f conftest.file
])
# Copyright (C) 2009-2014 Free Software Foundation, Inc.
# Copyright (C) 2009-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
......@@ -2095,7 +2095,7 @@ AC_SUBST([AM_BACKSLASH])dnl
_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl
])
# Copyright (C) 2001-2014 Free Software Foundation, Inc.
# Copyright (C) 2001-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
......@@ -2123,7 +2123,7 @@ fi
INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
AC_SUBST([INSTALL_STRIP_PROGRAM])])
# Copyright (C) 2006-2014 Free Software Foundation, Inc.
# Copyright (C) 2006-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
......@@ -2142,7 +2142,7 @@ AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
# Check how to create a tarball. -*- Autoconf -*-
# Copyright (C) 2004-2014 Free Software Foundation, Inc.
# Copyright (C) 2004-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
......
......@@ -105,6 +105,9 @@
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* Define to 1 if you have the `vsnprintf' function. */
#undef HAVE_VSNPRINTF
/* Define as const if the declaration of iconv() needs const. */
#undef ICONV_CONST
......
......@@ -25,8 +25,9 @@ typedef int mode_t;
#endif // GMinw only
typedef int pid_t;
inline int readlink(const char *cp, void *buf, int cnt)
inline int readlink(const char *a, void *b, int c)
{
a = a; b = b; c = c;
return -1;
}
......
This diff is collapsed.
......@@ -214,6 +214,9 @@ class RclConfig {
/** Get list of skipped file names for current keydir */
vector<string>& getSkippedNames();
/** Get list of file name filters for current keydir (only those
names indexed) */
vector<string>& getOnlyNames();
/** Get list of skipped paths patterns. Doesn't depend on the keydir */
vector<string> getSkippedPaths() const;
......@@ -390,9 +393,14 @@ class RclConfig {
ParamStale m_stpsuffstate;
vector<string> m_stopsuffvec;
// skippedNames state
ParamStale m_skpnstate;
vector<string> m_skpnlist;
// onlyNames state
ParamStale m_onlnstate;
vector<string> m_onlnlist;
// Original current working directory. Set once at init before we do any
// chdir'ing and used for converting user args to absolute paths.
static string o_origcwd;
......
......@@ -38,6 +38,7 @@
#include "smallut.h"
#include "execmd.h"
#include "textsplit.h"
#include "rcldb.h"
std::thread::id mainthread_id;
......@@ -204,16 +205,16 @@ LRESULT CALLBACK MainWndProc(HWND hwnd , UINT msg , WPARAM wParam,
bool CreateInvisibleWindow()
{
HWND hwnd;
WNDCLASS wc = {0};
WNDCLASS wc = {0,0,0,0,0,0,0,0,0,0};
wc.lpfnWndProc = (WNDPROC)MainWndProc;
wc.hInstance = GetModuleHandle(NULL);
wc.hIcon = LoadIcon(GetModuleHandle(NULL), "TestWClass");
wc.lpszClassName = "TestWClass";
wc.hIcon = LoadIcon(GetModuleHandle(NULL), L"TestWClass");
wc.lpszClassName = L"TestWClass";
RegisterClass(&wc);
hwnd =
CreateWindowEx(0, "TestWClass", "TestWClass", WS_OVERLAPPEDWINDOW,
CreateWindowEx(0, L"TestWClass", L"TestWClass", WS_OVERLAPPEDWINDOW,
CW_USEDEFAULT, CW_USEDEFAULT, CW_USEDEFAULT,
CW_USEDEFAULT, (HWND) NULL, (HMENU) NULL,
GetModuleHandle(NULL), (LPVOID) NULL);
......@@ -247,8 +248,8 @@ void initAsyncSigs(void (*sigcleanup)(int))
}
}
}
HANDLE hInvisiblethread =
CreateThread(NULL, 0, RunInvisibleWindowThread, NULL, 0, &tid);
CreateThread(NULL, 0, RunInvisibleWindowThread, NULL, 0, &tid);
SetConsoleCtrlHandler((PHANDLER_ROUTINE)CtrlHandler, TRUE);
eWorkFinished = CreateEvent(NULL, TRUE, FALSE, NULL);
if (eWorkFinished == INVALID_HANDLE_VALUE) {
......@@ -337,7 +338,7 @@ RclConfig *recollinit(int flags,
int lev = atoi(loglevel.c_str());
Logger::getTheLog("")->setLogLevel(Logger::LogLevel(lev));
}
LOGINF("Configuration directory: " << config->getConfDir() << std::endl);
LOGINF(Rcl::version_string() << " [" << config->getConfDir() << "]\n");
// Make sure the locale charset is initialized (so that multiple
// threads don't try to do it at once).
......
/* Copyright (C) 2014 J.F.Dockes
/* Copyright (C) 2014-2019 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
......@@ -14,19 +14,21 @@
* Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#ifndef TEST_SYNGROUPS
#include "autoconfig.h"
#include "syngroups.h"
#include "log.h"
#include "smallut.h"
#include "pathut.h"
#include <errno.h>
#include <unordered_map>
#include <fstream>
#include <iostream>
#include <cstring>
#include "safesysstat.h"
using namespace std;
......@@ -44,11 +46,28 @@ class SynGroups::Internal {
public:
Internal() : ok(false) {
}
void setpath(const string& fn) {
path = path_canon(fn);
stat(path.c_str(), &st);
}
bool samefile(const string& fn) {
string p1 = path_canon(fn);
if (path != p1) {
return false;
}
struct stat st1;
if (stat(p1.c_str(), &st1) != 0) {
return false;
}
return st.st_mtime == st1.st_mtime && st.st_size == st1.st_size;
}
bool ok;
// Term to group num
std::unordered_map<string, unsigned int> terms;
// Group num to group
vector<vector<string> > groups;
std::string path;
struct stat st;
};
bool SynGroups::ok()
......@@ -83,6 +102,12 @@ bool SynGroups::setfile(const string& fn)
return true;
}
if (m->samefile(fn)) {
LOGDEB("SynGroups::setfile: unchanged: " << fn << endl);
return true;
}
LOGDEB("SynGroups::setfile: parsing file " << fn << endl);
ifstream input;
input.open(fn.c_str(), ios::in);
if (!input.is_open()) {
......@@ -162,7 +187,10 @@ bool SynGroups::setfile(const string& fn)
LOGDEB1("SynGroups::setfile: group: [" <<
stringsToString(m->groups.back()) << "]\n");
}
LOGDEB("SynGroups::setfile: got " << m->groups.size() <<
" distinct terms." << endl);
m->ok = true;
m->setpath(fn);
return true;
}
......@@ -174,7 +202,7 @@ vector<string> SynGroups::getgroup(const string& term)
const auto it1 = m->terms.find(term);
if (it1 == m->terms.end()) {
LOGDEB1("SynGroups::getgroup: [" << term<<"] not found in direct map\n");
LOGDEB0("SynGroups::getgroup: [" << term << "] not found in map\n");
return ret;
}
......@@ -183,69 +211,7 @@ vector<string> SynGroups::getgroup(const string& term)
LOGERR("SynGroups::getgroup: line index higher than line count !\n");
return ret;
}
LOGDEB0("SynGroups::getgroup: result: " << stringsToString(m->groups[idx])
<< endl);
return m->groups[idx];
}
#else
#include "syngroups.h"
#include "log.h"
#include <string>
#include <iostream>
#include <vector>
#include <cstdlib>
#include <cstdio>
using namespace std;
static char *thisprog;
static char usage [] =
"syngroups <synfilename> <word>\n"
" \n\n"
;
static void Usage(void)
{
fprintf(stderr, "%s: usage:\n%s", thisprog, usage);
exit(1);
}
static int op_flags;
#define OPT_MOINS 0x1
#define OPT_s 0x2
#define OPT_b 0x4
int main(int argc, char **argv)
{
thisprog = argv[0];
argc--; argv++;
if (argc != 2) {
Usage();
}
string fn = *argv++;argc--;
string word = *argv++;argc--;
DebugLog::getdbl()->setloglevel(DEBDEB1);
DebugLog::setfilename("stderr");
SynGroups syns;
syns.setfile(fn);
if (!syns.ok()) {
cerr << "Initialization failed\n";
return 1;
}
vector<string> group = syns.getgroup(word);
cout << group.size() << " terms in group\n";
for (vector<string>::const_iterator it = group.begin();
it != group.end(); it++) {
cout << "[" << *it << "] ";
}
cout << endl;
return 0;
}
#endif
......@@ -577,8 +577,9 @@ bool TextSplit::text_to_words(const string &in)
int nonalnumcnt = 0;
Utf8Iter it(in);
#ifdef KATAKANA_AS_WORDS
int prev_csc = -1;
#endif
for (; !it.eof(); it++) {
unsigned int c = *it;
nonalnumcnt++;
......@@ -627,9 +628,9 @@ bool TextSplit::text_to_words(const string &in)
return false;
}
}
prev_csc = csc;
#endif
prev_csc = csc;
char asciirep = 0;
int cc = whatcc(c, &asciirep);
......
/* Copyright (C) 2004 J.F.Dockes
/* Copyright (C) 2004-2019 J.F.Dockes
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
......@@ -15,7 +15,6 @@
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#ifndef TEST_UNACPP
#include <stdio.h>
#include <cstdlib>
#include <errno.h>
......@@ -28,7 +27,7 @@
#include "utf8iter.h"
bool unacmaybefold(const string &in, string &out,
const char *encoding, UnacOp what)
const char *encoding, UnacOp what)
{
char *cout = 0;
size_t out_len;
......@@ -36,30 +35,30 @@ bool unacmaybefold(const string &in, string &out,
switch (what) {
case UNACOP_UNAC:
status = unac_string(encoding, in.c_str(), in.length(),
&cout, &out_len);
break;
status = unac_string(encoding, in.c_str(), in.length(),
&cout, &out_len);
break;
case UNACOP_UNACFOLD:
status = unacfold_string(encoding, in.c_str(), in.length(),
&cout, &out_len);
break;
status = unacfold_string(encoding, in.c_str(), in.length(),
&cout, &out_len);
break;
case UNACOP_FOLD:
status = fold_string(encoding, in.c_str(), in.length(),
&cout, &out_len);
break;
status = fold_string(encoding, in.c_str(), in.length(),
&cout, &out_len);
break;
}
if (status < 0) {
if (cout)
free(cout);
char cerrno[20];
sprintf(cerrno, "%d", errno);
out = string("unac_string failed, errno : ") + cerrno;
return false;
if (cout)
free(cout);
char cerrno[20];
sprintf(cerrno, "%d", errno);
out = string("unac_string failed, errno : ") + cerrno;
return false;
}
out.assign(cout, out_len);
if (cout)
free(cout);
free(cout);
return true;
}
......@@ -68,183 +67,83 @@ bool unacmaybefold(const string &in, string &out,
// testing user-entered terms, so we don't really care.
bool unaciscapital(const string& in)
{
LOGDEB2("unaciscapital: [" << (in) << "]\n" );
LOGDEB2("unaciscapital: [" << in << "]\n");
if (in.empty())
return false;
return false;
Utf8Iter it(in);
string shorter;
it.appendchartostring(shorter);
string lower;
if (!unacmaybefold(shorter, lower, "UTF-8", UNACOP_FOLD)) {
LOGINFO("unaciscapital: unac/fold failed for [" << (in) << "]\n" );
return false;
LOGINFO("unaciscapital: unac/fold failed for [" << in << "]\n");
return false;
}
Utf8Iter it1(lower);
if (*it != *it1)
return true;
return true;
else
return false;
return false;
}
bool unachasuppercase(const string& in)
{
LOGDEB2("unachasuppercase: [" << (in) << "]\n" );
if (in.empty())
return false;
// Check if input contains upper case characters. We used to case-fold
// the input and look for a difference, but lowercasing and
// casefolding are actually not exactly the same, for example german
// sharp s folds to ss but lowercases to itself, and greek final sigma
// folds to sigma. So an input containing one of these characters
// would wrongly detected as containing upper case. We now handle a
// few special cases explicitely, by folding them before performing
// the lowercasing. There are actually quite a few other cases of
// lowercase being transformed by casefolding, check Unicode
// CaseFolding.txt for occurrences of SMALL. One more step towards
// ditching everything and using icu...
bool unachasuppercase(const string& _in)
{
LOGDEB("unachasuppercase: in [" << _in << "]\n");
if (_in.empty())
return false;
string in;
Utf8Iter it(_in);
for (; !it.eof(); it++) {
if (*it == 0xdf) {
// s sharp -> ss
in += 's';
in += 's';
} else if (*it == 0x3c2) {
// final sigma -> sigma
in.append("\xcf\x83");
} else {
it.appendchartostring(in);
}
}
LOGDEB("unachasuppercase: folded: [" << in << "]\n");
string lower;
if (!unacmaybefold(in, lower, "UTF-8", UNACOP_FOLD)) {
LOGINFO("unachasuppercase: unac/fold failed for [" << (in) << "]\n" );
return false;
LOGINFO("unachasuppercase: unac/fold failed for [" << in << "]\n");
return false;
}
LOGDEB("unachasuppercase: lower [" << lower << "]\n");
if (lower != in)
return true;
return true;
else
return false;
return false;
}
bool unachasaccents(const string& in)
{
LOGDEB2("unachasaccents: [" << (in) << "]\n" );
LOGDEB("unachasaccents: in [" << in << "]\n");
if (in.empty())
return false;
return false;
string noac;
if (!unacmaybefold(in, noac, "UTF-8", UNACOP_UNAC)) {
LOGINFO("unachasaccents: unac/unac failed for [" << (in) << "]\n" );
return false;
LOGINFO("unachasaccents: unac/unac failed for [" << (in) << "]\n" );
return false;
}
LOGDEB("unachasaccents: noac [" << noac << "]\n");
if (noac != in)
return true;
return true;