Commit 112f7c3d authored by Peter Colberg's avatar Peter Colberg

Imported Upstream version 1.3.1

parents
*.tar.gz
*.exe
*.dll
*.do
*.o
*.so*
*.a
*.dll
*.dylib
*.dSYM
*.out
*.new
data/*.txt
data/*.ttf
data/*.sfd
/docs/
bench/bench
bench/icu
bench/unistring
normtest
graphemetest
printproperty
charwidth
valid
iterate
case
/tmp/
language: c
compiler:
- gcc
- clang
notifications:
email: false
before_install:
- sudo add-apt-repository ppa:staticfloat/julia-deps -y
- sudo add-apt-repository ppa:staticfloat/juliareleases -y
- sudo apt-get update -qq -y
- sudo apt-get install libpcre3-dev julia fontforge -y
script:
- make manifest && diff MANIFEST.new MANIFEST
- make check
- make data && diff data/utf8proc_data.c.new utf8proc_data.c
- make clean && git status --ignored --porcelain && test -z "$(git status --ignored --porcelain)"
- (mkdir build_static && cd build_static && cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON && make)
- (mkdir build_shared && cd build_shared && cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON -DBUILD_SHARED_LIBS=ON && make)
env:
# use JuliaLang caching (https://github.com/staticfloat/cache.julialang.org)
# so that Travis builds do not depend on anyone's flaky servers but our own
- URLCACHE=https://cache.e.ip.saba.us/
cmake_minimum_required (VERSION 2.8)
include (utils.cmake)
disallow_intree_builds()
project (utf8proc C)
# Be sure to also update these in Makefile!
set(SO_MAJOR 1)
set(SO_MINOR 3)
set(SO_PATCH 1)
add_definitions (
-DUTF8PROC_EXPORTS
)
if (NOT MSVC)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2 -std=c99 -pedantic -Wall")
endif ()
add_library (utf8proc
utf8proc.c
utf8proc.h
)
set_target_properties (utf8proc PROPERTIES
POSITION_INDEPENDENT_CODE ON
VERSION "${SO_MAJOR}.${SO_MINOR}.${SO_PATCH}"
SOVERSION ${SO_MAJOR}
)
This diff is collapsed.
## utf8proc license ##
**utf8proc** is a software package originally developed
by Jan Behrens and the rest of the Public Software Group, who
deserve nearly all of the credit for this library, that is now maintained by the Julia-language developers. Like the original utf8proc,
whose copyright and license statements are reproduced below, all new
work on the utf8proc library is licensed under the [MIT "expat"
license](http://opensource.org/licenses/MIT):
*Copyright © 2014-2015 by Steven G. Johnson, Jiahao Chen, Tony Kelman, Jonas Fonseca, and other contributors listed in the git history.*
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
## Original utf8proc license ##
*Copyright (c) 2009, 2013 Public Software Group e. V., Berlin, Germany*
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
## Unicode data license ##
This software distribution contains derived data from a modified version of
the Unicode data files. The following license applies to that data:
**COPYRIGHT AND PERMISSION NOTICE**
*Copyright (c) 1991-2007 Unicode, Inc. All rights reserved. Distributed
under the Terms of Use in http://www.unicode.org/copyright.html.*
Permission is hereby granted, free of charge, to any person obtaining a
copy of the Unicode data files and any associated documentation (the "Data
Files") or Unicode software and any associated documentation (the
"Software") to deal in the Data Files or Software without restriction,
including without limitation the rights to use, copy, modify, merge,
publish, distribute, and/or sell copies of the Data Files or Software, and
to permit persons to whom the Data Files or Software are furnished to do
so, provided that (a) the above copyright notice(s) and this permission
notice appear with all copies of the Data Files or Software, (b) both the
above copyright notice(s) and this permission notice appear in associated
documentation, and (c) there is clear notice in each modified Data File or
in the Software as well as in the documentation associated with the Data
File(s) or Software that the data or software has been modified.
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF
THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS
INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR
CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
Except as contained in this notice, the name of a copyright holder shall
not be used in advertising or otherwise to promote the sale, use or other
dealings in these Data Files or Software without prior written
authorization of the copyright holder.
Unicode and the Unicode logo are trademarks of Unicode, Inc., and may be
registered in some jurisdictions. All other trademarks and registered
trademarks mentioned herein are the property of their respective owners.
include/
include/utf8proc.h
lib/
lib/libutf8proc.a
lib/libutf8proc.so -> libutf8proc.so.1.3.1
lib/libutf8proc.so.1 -> libutf8proc.so.1.3.1
lib/libutf8proc.so.1.3.1
# libutf8proc Makefile
# programs
MAKE=make
AR?=ar
CC?=gcc
INSTALL=install
FIND=find
# compiler settings
CFLAGS ?= -O2
PICFLAG = -fPIC
C99FLAG = -std=c99
WCFLAGS = -Wall -Wmissing-prototypes -pedantic
UCFLAGS = $(CFLAGS) $(PICFLAG) $(C99FLAG) $(WCFLAGS) -DUTF8PROC_EXPORTS
# shared-library version MAJOR.MINOR.PATCH ... this may be *different*
# from the utf8proc version number because it indicates ABI compatibility,
# not API compatibility: MAJOR should be incremented whenever *binary*
# compatibility is broken, even if the API is backward-compatible
# Be sure to also update these in MANIFEST and CMakeLists.txt!
MAJOR=1
MINOR=3
PATCH=1
OS := $(shell uname)
ifeq ($(OS),Darwin) # MacOS X
SHLIB_EXT = dylib
SHLIB_VERS_EXT = $(MAJOR).dylib
else # GNU/Linux, at least (Windows should probably use cmake)
SHLIB_EXT = so
SHLIB_VERS_EXT = so.$(MAJOR).$(MINOR).$(PATCH)
endif
# installation directories (for 'make install')
prefix=/usr/local
libdir=$(prefix)/lib
includedir=$(prefix)/include
# meta targets
.PHONY: all clean data update manifest install
all: libutf8proc.a libutf8proc.$(SHLIB_EXT)
clean:
rm -f utf8proc.o libutf8proc.a libutf8proc.$(SHLIB_VERS_EXT) libutf8proc.$(SHLIB_EXT)
ifneq ($(OS),Darwin)
rm -f libutf8proc.so.$(MAJOR)
endif
rm -f test/tests.o test/normtest test/graphemetest test/printproperty test/charwidth test/valid test/iterate test/case
rm -rf MANIFEST.new tmp
$(MAKE) -C bench clean
$(MAKE) -C data clean
data: data/utf8proc_data.c.new
update: data/utf8proc_data.c.new
cp -f data/utf8proc_data.c.new utf8proc_data.c
manifest: MANIFEST.new
# real targets
data/utf8proc_data.c.new: libutf8proc.$(SHLIB_EXT) data/data_generator.rb data/charwidths.jl
$(MAKE) -C data utf8proc_data.c.new
utf8proc.o: utf8proc.h utf8proc.c utf8proc_data.c
$(CC) $(UCFLAGS) -c -o utf8proc.o utf8proc.c
libutf8proc.a: utf8proc.o
rm -f libutf8proc.a
$(AR) rs libutf8proc.a utf8proc.o
libutf8proc.so.$(MAJOR).$(MINOR).$(PATCH): utf8proc.o
$(CC) $(LDFLAGS) -shared -o $@ -Wl,-soname -Wl,libutf8proc.so.$(MAJOR) utf8proc.o
chmod a-x $@
libutf8proc.so: libutf8proc.so.$(MAJOR).$(MINOR).$(PATCH)
ln -f -s libutf8proc.so.$(MAJOR).$(MINOR).$(PATCH) $@
ln -f -s libutf8proc.so.$(MAJOR).$(MINOR).$(PATCH) $@.$(MAJOR)
libutf8proc.$(MAJOR).dylib: utf8proc.o
$(CC) -dynamiclib -o $@ $^ -install_name $(libdir)/$@ -Wl,-compatibility_version -Wl,$(MAJOR) -Wl,-current_version -Wl,$(MAJOR).$(MINOR).$(PATCH)
libutf8proc.dylib: libutf8proc.$(MAJOR).dylib
ln -f -s libutf8proc.$(MAJOR).dylib $@
install: libutf8proc.a libutf8proc.$(SHLIB_EXT) libutf8proc.$(SHLIB_VERS_EXT)
mkdir -m 755 -p $(DESTDIR)$(includedir)
$(INSTALL) -m 644 utf8proc.h $(DESTDIR)$(includedir)
mkdir -m 755 -p $(DESTDIR)$(libdir)
$(INSTALL) -m 644 libutf8proc.a $(DESTDIR)$(libdir)
$(INSTALL) -m 755 libutf8proc.$(SHLIB_VERS_EXT) $(DESTDIR)$(libdir)
ln -f -s libutf8proc.$(SHLIB_VERS_EXT) $(DESTDIR)$(libdir)/libutf8proc.$(SHLIB_EXT)
ifneq ($(OS),Darwin)
ln -f -s libutf8proc.$(SHLIB_VERS_EXT) $(DESTDIR)$(libdir)/libutf8proc.so.$(MAJOR)
endif
MANIFEST.new:
rm -rf tmp
$(MAKE) install prefix=/usr DESTDIR=$(PWD)/tmp
$(FIND) tmp/usr -mindepth 1 -type l -printf "%P -> %l\n" -or -type f -printf "%P\n" -or -type d -printf "%P/\n" | LC_ALL=C sort > $@
rm -rf tmp
# Test programs
data/NormalizationTest.txt:
$(MAKE) -C data NormalizationTest.txt
data/GraphemeBreakTest.txt:
$(MAKE) -C data GraphemeBreakTest.txt
test/tests.o: test/tests.c test/tests.h utf8proc.h
$(CC) $(UCFLAGS) -c -o test/tests.o test/tests.c
test/normtest: test/normtest.c test/tests.o utf8proc.o utf8proc.h test/tests.h
$(CC) $(UCFLAGS) test/normtest.c test/tests.o utf8proc.o -o $@
test/graphemetest: test/graphemetest.c test/tests.o utf8proc.o utf8proc.h test/tests.h
$(CC) $(UCFLAGS) test/graphemetest.c test/tests.o utf8proc.o -o $@
test/printproperty: test/printproperty.c test/tests.o utf8proc.o utf8proc.h test/tests.h
$(CC) $(UCFLAGS) test/printproperty.c test/tests.o utf8proc.o -o $@
test/charwidth: test/charwidth.c test/tests.o utf8proc.o utf8proc.h test/tests.h
$(CC) $(UCFLAGS) test/charwidth.c test/tests.o utf8proc.o -o $@
test/valid: test/valid.c test/tests.o utf8proc.o utf8proc.h test/tests.h
$(CC) $(UCFLAGS) test/valid.c test/tests.o utf8proc.o -o $@
test/iterate: test/iterate.c test/tests.o utf8proc.o utf8proc.h test/tests.h
$(CC) $(UCFLAGS) test/iterate.c test/tests.o utf8proc.o -o $@
test/case: test/case.c test/tests.o utf8proc.o utf8proc.h test/tests.h
$(CC) $(UCFLAGS) test/case.c test/tests.o utf8proc.o -o $@
check: test/normtest data/NormalizationTest.txt test/graphemetest data/GraphemeBreakTest.txt test/printproperty test/case test/charwidth test/valid test/iterate bench/bench.c bench/util.c bench/util.h utf8proc.o
$(MAKE) -C bench
test/normtest data/NormalizationTest.txt
test/graphemetest data/GraphemeBreakTest.txt
test/charwidth
test/valid
test/iterate
test/case
# utf8proc release history #
## Version 1.3.1 ##
2015-11-02:
- Do not export symbol for internal function `unsafe_encode_char()` ([#55]).
- Install relative symbolic links for shared libraries ([#58]).
- Enable and fix compiler warnings ([#55], [#58]).
- Add missing files to `make clean` ([#58]).
## Version 1.3 ##
2015-07-06:
- Updated for Unicode 8.0 ([#45]).
- New `utf8proc_tolower` and `utf8proc_toupper` functions, portable
replacements for `towlower` and `towupper` in the C library ([#40]).
- Don't treat Unicode "non-characters" as invalid, and improved
validity checking in general ([#35]).
- Prefix all typedefs with `utf8proc_`, e.g. `utf8proc_int32_t`,
to avoid collisions with other libraries ([#32]).
- Rename `DLLEXPORT` to `UTF8PROC_DLLEXPORT` to prevent collisions.
- Fix build breakage in the benchmark routines.
- More fine-grained Makefile variables (`PICFLAG` etcetera), so that
compilation flags can be selectively overridden, and in particular
so that `CFLAGS` can be changed without accidentally eliminating
necessary flags like `-fPIC` and `-std=c99` ([#43]).
- Updated character-width tables based on Unifont 8.0.01 ([#51]) and
the Unicode 8 character categories ([#47]).
## Version 1.2 ##
2015-03-28:
- Updated for Unicode 7.0 ([#6]).
- New function `utf8proc_grapheme_break(c1,c2)` that returns whether
there is a grapheme break between `c1` and `c2` ([#20]).
- New function `utf8proc_charwidth(c)` that returns the number of
column-positions that should be required for `c`; essentially a
portable replacment for `wcwidth(c)` ([#27]).
- New function `utf8proc_category(c)` that returns the Unicode
category of `c` (as one of the constants `UTF8PROC_CATEGORY_xx`).
Also, a function `utf8proc_category_string(c)` that returns the Unicode
category of `c` as a two-character string.
- `cmake` script `CMakeLists.txt`, in addition to `Makefile`, for
easier compilation on Windows ([#28]).
- Various `Makefile` improvements: a `make check` target to perform
tests ([#13]), `make install`, a rule to automate updating the Unicode
tables, etcetera.
- The shared library is now versioned (e.g. has a soname on GNU/Linux) ([#24]).
- C++/MSVC compatibility ([#17]).
- Most `#defined` constants are now `enums` ([#29]).
- New preprocessor constants `UTF8PROC_VERSION_MAJOR`,
`UTF8PROC_VERSION_MINOR`, and `UTF8PROC_VERSION_PATCH` for compile-time
detection of the API version.
- Doxygen-formatted documentation ([#29]).
- The Ruby and PostgreSQL plugins have been removed due to lack of testing ([#22]).
## Version 1.1.6 ##
2013-11-27:
- PostgreSQL 9.2 and 9.3 compatibility (lowercase `c` language name)
## Version 1.1.5 ##
2009-08-20:
- Use `RSTRING_PTR()` and `RSTRING_LEN()` instead of `RSTRING()->ptr` and
`RSTRING()->len` for ruby1.9 compatibility (and `#define` them, if not
existent)
2009-10-02:
- Patches for compatibility with Microsoft Visual Studio
2009-10-08:
- Fixes to make utf8proc usable in C++ programs
2009-10-16:
## Version 1.1.4 ##
2009-06-14:
- replaced C++ style comments for compatibility reasons
- added typecasts to suppress compiler warnings
- removed redundant source files for ruby-gemfile generation
2009-08-19:
- Changed copyright notice for Public Software Group e. V.
- Minor changes in the `README` file
## Version 1.1.3 ##
2008-10-04:
- Added a function `utf8proc_version` returning a string containing the version
number of the library.
- Included a target `libutf8proc.dylib` for MacOSX.
2009-05-01:
- PostgreSQL 8.3 compatibility (use of `SET_VARSIZE` macro)
## Version 1.1.2 ##
2007-07-25:
- Fixed a serious bug in the data file generator, which caused characters
being treated incorrectly, when stripping default ignorable characters or
calculating grapheme cluster boundaries.
## Version 1.1.1 ##
2007-06-25:
- Added a new PostgreSQL function `unistrip`, which behaves like `unifold`,
but also removes all character marks (e.g. accents).
2007-07-22:
- Changed license from BSD to MIT style.
- Added a new function `utf8proc_codepoint_valid` to the C library.
- Changed compiler flags in `Makefile` from `-g -O0` to `-O2`
- The ruby script, which was used to build the `utf8proc_data.c` file, is now
included in the distribution.
## Version 1.0.3 ##
2007-03-16:
- Fixed a bug in the ruby library, which caused an error, when splitting an
empty string at grapheme cluster boundaries (method `String#utf8chars`).
## Version 1.0.2 ##
2006-09-21:
- included a check in `Integer#utf8`, which raises an exception, if the given
code-point is invalid because of being too high (this was missing yet)
2006-12-26:
- added support for PostgreSQL version 8.2
## Version 1.0.1 ##
2006-09-20:
- included a gem file for the ruby version of the library
Release of version 1.0.1
## Version 1.0 ##
2006-09-17:
- added the `LUMP` option, which lumps certain characters together (see `lump.md`) (also used for the PostgreSQL `unifold` function)
- added the `STRIPMARK` option, which strips marking characters (or marks of composed characters)
- deprecated ruby method `String#char_ary` in favour of `String#utf8chars`
## Version 0.3 ##
2006-07-18:
- changed normalization from NFC to NFKC for postgresql unifold function
2006-08-04:
- added support to mark the beginning of a grapheme cluster with 0xFF (option: `CHARBOUND`)
- added the ruby method `String#chars`, which is returning an array of UTF-8 encoded grapheme clusters
- added `NLF2LF` transformation in postgresql `unifold` function
- added the `DECOMPOSE` option, if you neither use `COMPOSE` or `DECOMPOSE`, no normalization will be performed (different from previous versions)
- using integer constants rather than C-strings for character properties
- fixed (hopefully) a problem with the ruby library on Mac OS X, which occurred when compiler optimization was switched on
## Version 0.2 ##
2006-06-05:
- changed behaviour of PostgreSQL function to return NULL in case of invalid input, rather than raising an exceptional condition
- improved efficiency of PostgreSQL function (no transformation to C string is done)
2006-06-20:
- added -fpic compiler flag in Makefile
- fixed bug in the C code for the ruby library (usage of non-existent function)
## Version 0.1 ##
2006-06-02: initial release of version 0.1
[#6]: https://github.com/JuliaLang/utf8proc/issues/6
[#13]: https://github.com/JuliaLang/utf8proc/issues/13
[#17]: https://github.com/JuliaLang/utf8proc/issues/17
[#20]: https://github.com/JuliaLang/utf8proc/issues/20
[#22]: https://github.com/JuliaLang/utf8proc/issues/22
[#24]: https://github.com/JuliaLang/utf8proc/issues/24
[#27]: https://github.com/JuliaLang/utf8proc/issues/27
[#28]: https://github.com/JuliaLang/utf8proc/issues/28
[#29]: https://github.com/JuliaLang/utf8proc/issues/29
[#32]: https://github.com/JuliaLang/utf8proc/issues/32
[#35]: https://github.com/JuliaLang/utf8proc/issues/35
[#40]: https://github.com/JuliaLang/utf8proc/issues/40
[#43]: https://github.com/JuliaLang/utf8proc/issues/43
[#45]: https://github.com/JuliaLang/utf8proc/issues/45
[#47]: https://github.com/JuliaLang/utf8proc/issues/47
[#51]: https://github.com/JuliaLang/utf8proc/issues/51
[#55]: https://github.com/JuliaLang/utf8proc/issues/55
[#58]: https://github.com/JuliaLang/utf8proc/issues/58
# utf8proc
[![Build Status](https://travis-ci.org/JuliaLang/utf8proc.png)](https://travis-ci.org/JuliaLang/utf8proc)
[utf8proc](http://julialang.org/utf8proc/) is a small, clean C
library that provides Unicode normalization, case-folding, and other
operations for data in the [UTF-8
encoding](http://en.wikipedia.org/wiki/UTF-8). It was [initially
developed](http://www.public-software-group.org/utf8proc) by Jan
Behrens and the rest of the [Public Software
Group](http://www.public-software-group.org/), who deserve *nearly all
of the credit* for this package. With the blessing of the Public
Software Group, the [Julia developers](http://julialang.org/) have
taken over development of utf8proc, since the original developers have
moved to other projects.
(utf8proc is used for basic Unicode
support in the [Julia language](http://julialang.org/), and the Julia
developers became involved because they wanted to add Unicode 7 support and other features.)
(The original utf8proc package also includes Ruby and PostgreSQL plug-ins.
We removed those from utf8proc in order to focus exclusively on the C
library for the time being, but plan to add them back in or release them as separate packages.)
The utf8proc package is licensed under the
free/open-source [MIT "expat"
license](http://opensource.org/licenses/MIT) (plus certain Unicode
data governed by the similarly permissive [Unicode data
license](http://www.unicode.org/copyright.html#Exhibit1)); please see
the included `LICENSE.md` file for more detailed information.
## Quick Start
For compilation of the C library run `make`.
## General Information
The C library is found in this directory after successful compilation
and is named `libutf8proc.a` (for the static library) and
`libutf8proc.so` (for the dynamic library).
The Unicode version supported is 8.0.0.
For Unicode normalizations, the following options are used:
* Normalization Form C: `STABLE`, `COMPOSE`
* Normalization Form D: `STABLE`, `DECOMPOSE`
* Normalization Form KC: `STABLE`, `COMPOSE`, `COMPAT`
* Normalization Form KD: `STABLE`, `DECOMPOSE`, `COMPAT`
## C Library
The documentation for the C library is found in the `utf8proc.h` header file.
`utf8proc_map` is function you will most likely be using for mapping UTF-8
strings, unless you want to allocate memory yourself.
## To Do
See the Github [issues list](https://github.com/JuliaLang/utf8proc/issues).
## Contact
Bug reports, feature requests, and other queries can be filed at
the [utf8proc issues page on Github](https://github.com/JuliaLang/utf8proc/issues).
## See also
An independent Lua translation of this library, [lua-mojibake](https://github.com/differentprogramming/lua-mojibake), is also available.
branches:
only:
- master
- /release-.*/
notifications:
- provider: Email
on_build_success: false
on_build_failure: false
on_build_status_changed: false
build_script:
- ps: if ($env:APPVEYOR_PULL_REQUEST_NUMBER -and $env:APPVEYOR_BUILD_NUMBER -ne ((Invoke-RestMethod `
https://ci.appveyor.com/api/projects/$env:APPVEYOR_ACCOUNT_NAME/$env:APPVEYOR_PROJECT_SLUG/history?recordsNumber=50).builds | `
Where-Object pullRequestId -eq $env:APPVEYOR_PULL_REQUEST_NUMBER)[0].buildNumber) { `
throw "There are newer queued builds for this pull request, failing early." }
- mkdir msvc_static
- cd msvc_static
- cmake ..
- cmake --build .
- mkdir ..\msvc_shared
- cd ..\msvc_shared
- cmake .. -DBUILD_SHARED_LIBS=ON
- cmake --build .
- set PATH=C:\MinGW\bin;%PATH%
- C:\MinGW\msys\1.0\bin\sh --login -c "
cd /c/projects/utf8proc &&
mkdir mingw_static &&
cd mingw_static &&
cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON -G'MSYS Makefiles' &&
make &&
mkdir ../mingw_shared &&
cd ../mingw_shared &&
cmake .. -DCMAKE_VERBOSE_MAKEFILE=ON -DBUILD_SHARED_LIBS=ON -G'MSYS Makefiles' &&
make
"
on_finish:
# Uncomment the following line for interactive debugging, which
# will print login data for a temporary remote session after the
# build. This requires an RDP version 6 client, e.g., FreeRDP.
#- ps: $blockRdp = $true; iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))
CURL=curl
CC = cc
CFLAGS = -O2 -std=c99 -pedantic -Wall
all: bench
LIBUTF8PROC = ../utf8proc.o
bench: bench.o util.o $(LIBUTF8PROC)
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ bench.o util.o $(LIBUTF8PROC)
DATAURL = https://raw.githubusercontent.com/duerst/eprun/master/benchmark
DATAFILES = Deutsch_.txt Japanese_.txt Korean_.txt Vietnamese_.txt
$(DATAFILES):
$(CURL) -O $(DATAURL)/$@
bench.out: $(DATAFILES) bench
./bench -nfkc $(DATAFILES) > $@
# you may need make CPPFLAGS=... LDFLAGS=... to help it find ICU
icu: icu.o util.o
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ icu.o util.o -licuuc
icu.out: $(DATAFILES) icu
./icu $(DATAFILES) > $@
unistring: unistring.o util.o
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ unistring.o util.o -lunistring
unistring.out: $(DATAFILES) unistring
./unistring $(DATAFILES) > $@
.c.o:
$(CC) $(CPPFLAGS) -I.. $(CFLAGS) -c -o $@ $<
clean:
rm -rf *.o *.txt bench *.out icu unistring
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "utf8proc.h"
#include "util.h"
int main(int argc, char **argv)
{
int i;
int options = 0;
for (i = 1; i < argc; ++i) {
if (!strcmp(argv[i], "-nfkc")) {
options |= UTF8PROC_STABLE|UTF8PROC_COMPOSE|UTF8PROC_COMPAT;
continue;
}
if (!strcmp(argv[i], "-nfkd")) {
options |= UTF8PROC_STABLE|UTF8PROC_DECOMPOSE|UTF8PROC_COMPAT;
continue;
}
if (!strcmp(argv[i], "-nfc")) {
options |= UTF8PROC_STABLE|UTF8PROC_COMPOSE;
continue;
}
if (!strcmp(argv[i], "-nfd")) {
options |= UTF8PROC_STABLE|UTF8PROC_DECOMPOSE;
continue;
}
if (!strcmp(argv[i], "-casefold")) {
options |= UTF8PROC_CASEFOLD;
continue;
}
if (argv[i][0] == '-') {
fprintf(stderr, "unrecognized option: %s\n", argv[i]);
return EXIT_FAILURE;
}
size_t len;
uint8_t *src = readfile(argv[i], &len);
if (!src) {
fprintf(stderr, "error reading %s\n", argv[i]);
return EXIT_FAILURE;
}
uint8_t *dest;
mytime start = gettime();
for (int i = 0; i < 100; ++i) {
utf8proc_map(src, len, &dest, options);
free(dest);
}
printf("%s: %g\n", argv[i], elapsed(gettime(), start) / 100);
free(src);
}
return EXIT_SUCCESS;
}
#include <stdio.h>
#include <stdlib.h>
/* ICU4C */
#include <unicode/utypes.h>