Commit a9b741fb authored by Tobias Quathamer's avatar Tobias Quathamer

Add ISO 639-5. Thanks to Pander <pander@opentaal.org>

parent c2dbf465
......@@ -4,6 +4,7 @@ config.log
config.status
iso-codes.pc
iso_639/iso_639.tab
iso_639_5/rdf
# Ignore binary translation files and backup copies
*.mo
*~
......
SUBDIRS = iso_15924 iso_3166 iso_3166_2 iso_4217 iso_639 iso_639_3
SUBDIRS = iso_15924 iso_3166 iso_3166_2 iso_4217 iso_639 iso_639_3 iso_639_5
pkgconfigdir = $(datadir)/pkgconfig
pkgconfig_DATA = iso-codes.pc
......
......@@ -298,7 +298,7 @@ target_alias = @target_alias@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
SUBDIRS = iso_15924 iso_3166 iso_3166_2 iso_4217 iso_639 iso_639_3
SUBDIRS = iso_15924 iso_3166 iso_3166_2 iso_4217 iso_639 iso_639_3 iso_639_5
pkgconfigdir = $(datadir)/pkgconfig
pkgconfig_DATA = iso-codes.pc
EXTRA_DIST = \
......
TODO for iso-codes
==================
- Maybe add ISO 639-5
- Maybe switch to version 3 of LGPL
- Add man pages documenting ISO tables.
......@@ -2514,10 +2514,10 @@ fi
$as_echo "$gnu_msgfmt" >&6; }
DOMAINS="iso_15924 iso_3166 iso_4217 iso_639 iso_3166_2 iso_639_3"
DOMAINS="iso_15924 iso_3166 iso_4217 iso_639 iso_3166_2 iso_639_3 iso_639_5"
ac_config_files="$ac_config_files Makefile iso-codes.pc iso_15924/Makefile iso_3166/Makefile iso_3166_2/Makefile iso_4217/Makefile iso_639/Makefile iso_639_3/Makefile"
ac_config_files="$ac_config_files Makefile iso-codes.pc iso_15924/Makefile iso_3166/Makefile iso_3166_2/Makefile iso_4217/Makefile iso_639/Makefile iso_639_3/Makefile iso_639_5/Makefile"
cat >confcache <<\_ACEOF
......@@ -3249,6 +3249,7 @@ do
"iso_4217/Makefile") CONFIG_FILES="$CONFIG_FILES iso_4217/Makefile" ;;
"iso_639/Makefile") CONFIG_FILES="$CONFIG_FILES iso_639/Makefile" ;;
"iso_639_3/Makefile") CONFIG_FILES="$CONFIG_FILES iso_639_3/Makefile" ;;
"iso_639_5/Makefile") CONFIG_FILES="$CONFIG_FILES iso_639_5/Makefile" ;;
*) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
esac
......@@ -3709,4 +3710,3 @@ if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then
$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;}
fi
......@@ -38,7 +38,7 @@ fi
AC_MSG_RESULT($gnu_msgfmt)
AC_SUBST(MSGFMT_FLAGS)
DOMAINS="iso_15924 iso_3166 iso_4217 iso_639 iso_3166_2 iso_639_3"
DOMAINS="iso_15924 iso_3166 iso_4217 iso_639 iso_3166_2 iso_639_3 iso_639_5"
AC_SUBST([DOMAINS])
AC_CONFIG_FILES([
......@@ -50,7 +50,7 @@ iso_3166_2/Makefile
iso_4217/Makefile
iso_639/Makefile
iso_639_3/Makefile
iso_639_5/Makefile
])
AC_OUTPUT
DOMAIN = iso_639_5
VERSION = @PACKAGE_VERSION@
xmldir = $(datadir)/xml/iso-codes
xml_DATA = $(DOMAIN).xml
pofiles = $(wildcard $(srcdir)/*.po)
mofiles = $(patsubst $(srcdir)/%.po,%.mo, $(pofiles))
noinst_DATA = $(mofiles) $(xml_DATA:.xml=.pot)
iso_639_5.pot: iso_639_5.xml
../iso2pot.py --is-version ${VERSION} --comment id \
--fields name --outfile $@ $<
EXTRA_DIST = \
$(pofiles) \
$(xml_DATA) \
iso_639_5.pot
MOSTLYCLEANFILES = \
$(mofiles)
check-local: check-content
include $(top_srcdir)/rules.make
This diff is collapsed.
# Name: iso-dis-639_5-download.sh
# Date: 2014-03-22
# Version: 0.3
# Author: Pander <pander@opentaal.org>
#
# Main website for ISO 639-5:
# http://www.loc.gov/standards/iso639-5/
#
# Wikipedia entry for ISO 639-5:
# https://en.wikipedia.org/wiki/ISO_639-5
# This script will download almost 2000 files using almost 40 MB.
# For a clean download remove all .rdf files manually and rerun this script.
if [ ! -e rdf ]
then
mkdir rdf
fi
cd rdf
if [ ! -e iso639-5.rdf ]
then
wget http://id.loc.gov/vocabulary/iso639-5.rdf
fi
for i in `grep '<rdf:Description rdf:about' iso639-5.rdf|awk -F 'about="' '{print $2}'|awk -F '">' '{print $1}'`
do
I=`basename $i`
if [ ! -e $I.rdf ]
then
wget $i.rdf
fi
for j in `grep -A1 '<skos:narrower ' $I.rdf|grep '<rdf:Description rdf:about'|awk -F 'about="' '{print $2}'|awk -F '">' '{print $1}'`
do
J=`basename $j`
if [ ! -e $J.rdf ]
then
wget $j.rdf
fi
for k in `grep -A1 '<skos:narrower ' $J.rdf|grep '<rdf:Description rdf:about'|awk -F 'about="' '{print $2}'|awk -F '">' '{print $1}'`
do
K=`basename $k`
if [ ! -e $K.rdf ]
then
wget $k.rdf
fi
for l in `grep -A1 '<skos:narrower ' $K.rdf|grep '<rdf:Description rdf:about'|awk -F 'about="' '{print $2}'|awk -F '">' '{print $1}'`
do
L=`basename $l`
if [ ! -e $L.rdf ]
then
wget $l.rdf
fi
for m in `grep -A1 '<skos:narrower ' $L.rdf|grep '<rdf:Description rdf:about'|awk -F 'about="' '{print $2}'|awk -F '">' '{print $1}'`
do
M=`basename $m`
if [ ! -e $M.rdf ]
then
wget $m.rdf
fi
for n in `grep -A1 '<skos:narrower ' $M.rdf|grep '<rdf:Description rdf:about'|awk -F 'about="' '{print $2}'|awk -F '">' '{print $1}'`
do
N=`basename $n`
if [ ! -e $N.rdf ]
then
wget $n.rdf
fi
for o in `grep -A1 '<skos:narrower ' $N.rdf|grep '<rdf:Description rdf:about'|awk -F 'about="' '{print $2}'|awk -F '">' '{print $1}'`
do
O=`basename $o`
if [ ! -e $O.rdf ]
then
wget $o.rdf
fi
done
done
done
done
done
done
done
cd ..
#!/usr/bin/env python
# -*- coding: utf-8 -*-
u"""A script to generate iso_639_5.xml database for iso-codes.
@see http://www.loc.gov/standards/iso639-5/
@attention: File iso_639_5.xml will be written to the current directory.
@author: Pander
@license: LGPL
@contact: pander@opentaal.org
"""
__author__ = 'Pander <pander@opentaal.org>'
__version__ = '0.3'
import libxml2
import os.path
def parseChild(codes, parent, filename):
u"""Parses recursivly parses RDF files
@param codes: definitions for codes.
@type codes: dict
@param parents: parent codes for definition.
@type status: str
param filename: file name of child RDF file.
@type status: str
@return: decoded status.
"""
code = None
name = None
if not os.path.isfile('rdf/' + filename):
print 'WARNING, missing', filename, 'rerun download script, perhaps with increased recursion'
return
doc = libxml2.parseFile('rdf/' + filename)
root = doc.xpathEval('/*')
for d in root:
dd = d.children
while dd is not None:
if dd.name == 'Language': # ???.rdf top-level children
code = dd.prop('about')[-3:]
ddd = dd.children
while ddd is not None:
if ddd.name == 'authoritativeLabel' and ddd.prop('lang') == 'en':
name = ddd.content
if name and code:
if parent:
print 'ERROR, unexpected parent', code
exit(1)
else:
codes[code] = {'name':name, }
elif ddd.name == 'narrower':
dddd = ddd.children
while dddd is not None:
if dddd.name == 'Description':
fname = dddd.prop('about')
fname = fname[fname.find('/authorities/subjects/')+22:] + '.rdf'
parseChild(codes, code, fname)
dddd = dddd.next
ddd = ddd.next
elif dd.name == 'Topic': # sh*.rdf file
code = dd.prop('about')
code = code[code.find('/authorities/subjects/')+22:]
ddd = dd.children
while ddd is not None:
if ddd.name == 'authoritativeLabel' and ddd.prop('lang') == 'en':
name = ddd.content
if code and name:
if code in codes.keys():
definition = codes[code]
if definition['name'] == name and parent not in definition['parents']:
definition['parents'] = definition['parents'] + ',' + parent
codes[code] = definition
else:
codes[code] = {'name':name, 'parents':parent}
elif ddd.name == 'narrower':
dddd = ddd.children
while dddd is not None:
if dddd.name == 'Description':
fname = dddd.prop('about')
fname = fname[fname.find('/authorities/subjects/')+22:] + '.rdf'
parseChild(codes, code, fname)
dddd = dddd.next
ddd = ddd.next
dd = dd.next
if __name__ == '__main__':
# parse ISO 639-5 languages
codes = {}
doc = libxml2.parseFile('rdf/iso639-5.rdf') # top-level file
root = doc.xpathEval('/*')
for d in root:
dd = d.children
while dd is not None:
if dd.name == 'MADSScheme':
ddd = dd.children
while ddd is not None:
if ddd.name == 'hasTopMemberOfMADSScheme':
dddd = ddd.children
while dddd is not None:
if dddd.name == 'Language':
filename = None
ddddd = dddd.children
while ddddd is not None:
if ddddd.name == 'code':
filename = '%s.rdf' % ddddd.content
ddddd = ddddd.next
if filename:
parseChild(codes, None, filename)
else:
print 'ERROR, no filename found'
exit(1)
dddd = dddd.next
ddd = ddd.next
dd = dd.next
print 'Found %d languages' % len(codes)
# write XML file
outfile = file('iso_639_5.xml', 'w')
outfile.write("""<?xml version="1.0" encoding="UTF-8" ?>
<!--
This file gives a list of all languages in the ISO 639-5
standard, and is used to provide translations via gettext
Copyright © 2014 Pander <pander@opentaal.org>
This file is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this file; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Source: <http://www.loc.gov/standards/iso639-5/>
-->
<!DOCTYPE iso_639_5_entries [
<!ELEMENT iso_639_5_entries (iso_639_5_entry+)>
<!ELEMENT iso_639_5_entry EMPTY>
<!ATTLIST iso_639_5_entry
id CDATA #REQUIRED
parents CDATA #IMPLIED
name CDATA #REQUIRED
>
]>
<iso_639_5_entries>
""")
for code in sorted(codes.keys()):
definition = codes[code]
outfile.write('\t<iso_639_5_entry\n')
outfile.write('\t\tid="%s"\n' % code)
if 'parents' in definition.keys():
outfile.write('\t\tparents="%s"\n' % definition['parents'])
outfile.write('\t\tname="%s" />\n' % definition['name'])
outfile.write('</iso_639_5_entries>\n')
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment