Commit afb2975f authored by Tino Didriksen's avatar Tino Didriksen

Imported Upstream version 0.2.0~r62623

parents
INSTALL
Makefile
Makefile.in
aclocal.m4
autom4te.cache/
config.log
config.status
configure
install-sh
missing
modes/
mt-ar.mode
*.bin
*.pyc
.deps
Many thanks to:
- Prof. Janusz Danecki, for advice and providing a Maltese
dictionary and a grammar book,
- Georges Kass, PhD, for postediting the evaluation texts
(to be found in the dev/story/ subdirectory),
- John J. Camillieri, for translating the first evaluation
text (dev/story/story.mt.txt) to Maltese.
The Maltese analyser (apertium-mt-ar.mt.dix) is an extended
version the Maltese analyser from apertium-mt-he.
2012-2014, Kevin Brubeck Unhammer <unhammer@fsfe.org>
2012, Francis M. Tyers <ftyers@prompsit.com>
2012, Maria Fronczak <mariafronczak@users.sourceforge.net>
This diff is collapsed.
VERSION=0.2.0
LANG1=mlt
LANG2=ara
PREFIX1=$(LANG1)-$(LANG2)
PREFIX2=$(LANG2)-$(LANG1)
BASENAME=apertium-$(PREFIX1)
TARGETS_COMMON = \
$(PREFIX1).automorf.bin \
$(PREFIX1).autobil.bin $(PREFIX1).t1x.bin \
$(PREFIX1).t2x.bin $(PREFIX1).t3x.bin \
$(PREFIX1).autogen.bin $(PREFIX1).autopgen.bin \
$(PREFIX2).autotra.bin \
$(PREFIX1).rlx.bin \
$(PREFIX2).automorf.bin \
$(PREFIX2).autobil.bin $(PREFIX2).t1x.bin \
$(PREFIX2).autogen.bin \
$(PREFIX2).autopgen.bin
# This include defines goals for install-modes, .deps/.d and .mode files:
@ap_include@
# mlt-ara
$(PREFIX1).automorf.bin: $(BASENAME).$(LANG1).dix $(BASENAME).$(LANG1).acx
apertium-validate-dictionary $(BASENAME).$(LANG1).dix
lt-comp lr $(BASENAME).$(LANG1).dix $@ $(BASENAME).$(LANG1).acx
# used for both directions:
.deps/$(BASENAME).$(PREFIX1).dix: $(BASENAME).$(PREFIX1).dix lexchoicebil.xsl
if [ ! -d .deps ]; then mkdir .deps; fi
xsltproc lexchoicebil.xsl $(BASENAME).$(PREFIX1).dix > $@
$(PREFIX1).autobil.bin: .deps/$(BASENAME).$(PREFIX1).dix
apertium-validate-dictionary .deps/$(BASENAME).$(PREFIX1).dix
lt-comp lr .deps/$(BASENAME).$(PREFIX1).dix $@
$(PREFIX1).t1x.bin: $(BASENAME).$(PREFIX1).t1x
apertium-validate-transfer $(BASENAME).$(PREFIX1).t1x
apertium-preprocess-transfer $(BASENAME).$(PREFIX1).t1x $@
$(PREFIX1).t2x.bin: $(BASENAME).$(PREFIX1).t2x
apertium-validate-interchunk $(BASENAME).$(PREFIX1).t2x
apertium-preprocess-transfer $(BASENAME).$(PREFIX1).t2x $@
$(PREFIX1).t3x.bin: $(BASENAME).$(PREFIX1).t3x
apertium-validate-postchunk $(BASENAME).$(PREFIX1).t3x
apertium-preprocess-transfer $(BASENAME).$(PREFIX1).t3x $@
$(PREFIX1).autogen.bin: $(BASENAME).$(LANG2).dix
apertium-validate-dictionary $(BASENAME).$(LANG2).dix
lt-comp rl $(BASENAME).$(LANG2).dix $@
$(PREFIX1).autopgen.bin: $(BASENAME).post-$(LANG2).dix
apertium-validate-dictionary $(BASENAME).post-$(LANG2).dix
lt-comp lr $(BASENAME).post-$(LANG2).dix $@
$(PREFIX1).rlx.bin: $(BASENAME).$(PREFIX1).rlx
cg-comp $(BASENAME).$(PREFIX1).rlx $@
# ara-mlt
$(PREFIX2).automorf.bin: $(BASENAME).$(LANG2).dix $(BASENAME).$(LANG2).acx
apertium-validate-dictionary $(BASENAME).$(LANG2).dix
lt-comp lr $(BASENAME).$(LANG2).dix $@ $(BASENAME).$(LANG2).acx
$(PREFIX2).autobil.bin: .deps/$(BASENAME).$(PREFIX1).dix
apertium-validate-dictionary .deps/$(BASENAME).$(PREFIX1).dix
lt-comp rl .deps/$(BASENAME).$(PREFIX1).dix $@
$(PREFIX2).t1x.bin: $(BASENAME).$(PREFIX2).t1x
apertium-validate-transfer $(BASENAME).$(PREFIX2).t1x
apertium-preprocess-transfer $(BASENAME).$(PREFIX2).t1x $@
$(PREFIX2).autogen.bin: $(BASENAME).$(LANG1).dix
apertium-validate-dictionary $(BASENAME).$(LANG1).dix
lt-comp rl $(BASENAME).$(LANG1).dix $@ $(BASENAME).$(LANG1).acx
$(PREFIX2).autopgen.bin: $(BASENAME).post-$(LANG1).dix
apertium-validate-dictionary $(BASENAME).post-$(LANG1).dix
lt-comp lr $(BASENAME).post-$(LANG1).dix $@
$(PREFIX2).autotra.bin: $(BASENAME).translit.dix
apertium-validate-dictionary $(BASENAME).translit.dix
lt-comp lr $(BASENAME).translit.dix $@
###############################################################################
## Distribution
###############################################################################
EXTRA_DIST=$(BASENAME).$(LANG1).dix $(BASENAME).$(PREFIX1).dix \
$(BASENAME).$(LANG2).dix $(BASENAME).post-$(LANG2).dix \
$(BASENAME).post-$(LANG1).dix \
$(BASENAME).translit.dix \
$(PREFIX1).prob \
$(BASENAME).$(PREFIX1).rlx \
$(BASENAME).$(PREFIX1).t1x \
$(BASENAME).$(PREFIX1).t2x \
$(BASENAME).$(PREFIX1).t3x \
$(BASENAME).$(PREFIX2).t1x \
$(BASENAME).$(LANG1).acx \
$(BASENAME).$(LANG2).acx \
lexchoicebil.xsl \
modes.xml
###############################################################################
## Installation
###############################################################################
apertium_mlt_ardir=$(prefix)/share/apertium/$(BASENAME)/
apertium_mlt_ar_DATA=$(TARGETS_COMMON) \
$(PREFIX1).prob \
$(BASENAME).$(PREFIX1).t1x \
$(BASENAME).$(PREFIX1).t2x \
$(BASENAME).$(PREFIX1).t3x \
$(BASENAME).$(PREFIX2).t1x
# All modes are created by the same goal, listing several will just give problems with make -j2
noinst_DATA=modes/$(PREFIX1).mode
install-data-local: install-modes
###############################################################################
## Cleanup
###############################################################################
CLEANFILES = $(TARGETS_COMMON)
clean-local:
-rm -rf .deps modes
Maltese and Arabic
apertium-mt-ar
===============================================================================
This is an Apertium language pair for translating from Maltese to
Arabic. What you can use this language package for:
* Translating from Maltese to Arabic
* Morphological analysis of Maltese
* Part-of-speech tagging of Maltese
For information on the latter two points, see subheading "For more
information" below
Requirements
===============================================================================
You will need the following software installed:
* lttoolbox (>= 3.1.2)
* apertium (>= 3.1.1)
* vislcg3 (>= 0.9.7.8354)
If this does not make any sense, we recommend you look at: www.apertium.org
Compiling
===============================================================================
Given the requirements being installed, you should be able to just run:
$ ./configure
$ make
# make install
You can use ./autogen.sh instead of ./configure you're compiling from
SVN. If you're using a --prefix to ./configure, make sure it's the
same one you used to install apertium itself.
Testing
===============================================================================
If you are in the source directory after running make, the following
commands should work:
$ echo "Ilbieraħ kien kiesaħ ħafna." | apertium -d . mt-ar
امس كان باردا جدا.
After installing somewhere in $PATH, you should be able to do eg.
$ echo "Ilbieraħ kien kiesaħ ħafna." | apertium mt-ar
امس كان باردا جدا.
Files and data
===============================================================================
* apertium-mt-ar.mt.dix - Monolingual dictionary for Maltese
* apertium-mt-ar.ar.dix - Monolingual dictionary for Arabic
* apertium-mt-ar.mt-ar.dix - Bilingual dictionary
* apertium-mt-ar.mt-ar.rlx - Constraint Grammar for Maltese
* apertium-mt-ar.mt-ar.t1x - Chunker rules for translating into Arabic
* apertium-mt-ar.mt-ar.t2x - Interchunk rules for translating into Arabic
* apertium-mt-ar.mt-ar.t3x - Postchunk rules for translating into Arabic
* mt-ar.prob - Tagger model for Maltese
* modes.xml - Translation modes
For more information
===============================================================================
* http://wiki.apertium.org/wiki/Installation
* http://wiki.apertium.org/wiki/Using_an_lttoolbox_dictionary
* http://wiki.apertium.org/wiki/Constraint_Grammar
Help and support
===============================================================================
If you need help using this language pair or data, you can contact:
* Mailing list: apertium-stuff@lists.sourceforge.net
* IRC: #apertium on irc.freenode.net
See also the file AUTHORS included in this distribution.
This diff is collapsed.
<?xml version="1.0"?>
<analysis-chars>
</analysis-chars>
This diff is collapsed.
This diff is collapsed.
DELIMITERS = "<.>" "<:>" "<;>" "<!>" "<?>" "<|>" "<$.>" "<$:>" "<;>" "<$!>" "<$?>" "<$|>" sent;
LIST NOUN = n;
LIST VERB = vblex;
LIST DET = det;
LIST PRN = prn;
LIST PREP = pr;
LIST ADV = adv;
LIST ADJ = adj;
LIST DETDEF = (det def);
LIST VRBPART = (pprs) (pp);
LIST PAST = (past);
LIST PRES = (pres);
LIST IMP = (imp);
LIST CNJSUB = cnjsub;
LIST CNJADV = cnjadv;
LIST REL = rel;
LIST ITG = itg;
LIST SG = sg;
LIST PL = pl;
LIST NOUNSG = (n sg);
LIST NOUNPL = (n pl);
LIST NOUNPX = (n px1sg) (n px2sg) (n px3sg_m) (n px3sg_f) (n px1pl) (n px2pl) (n px3pl);
LIST VRBSG = (vblex sg) (vaux sg);
LIST VRBPL = (vblex pl) (vaux pl);
LIST BOS = (>>>) ;
LIST EOS = (<<<) ;
LIST TAPREP = ("ta'" pr);
LIST POSTDET = ("ieħor" det);
SET SENTBOUNDARY = CNJSUB | REL | ITG | EOS;
# Hey, that's crucial :)
SUBREADINGS = LTR ;
SECTION
# Silly rules, yes, but "I'll think about that tomorrow"
SELECT:r1 DET IF
(0 DET)
(0 PRN)
(1 NOUN) ;
SELECT:r2 VRBPL IF
(-1 VRBPL)
(0 VRBPL)
(0 VRBSG) ;
# li
SELECT:r3 CNJSUB IF
(-1 VERB)
(0 CNJSUB)
(0 REL) ;
SELECT:r4 REL IF
(-1 NOUN)
(0 CNJSUB)
(0 REL) ;
# oops.. 'fis-sens _li_ l-Misilmin jaħsbu li hu miktub direttament minn Alla'
# so, should be somehow refined
SELECT:r5 CNJSUB IF
(-1 ADV)
(0 CNJSUB)
(0 REL) ;
SELECT:r6 NOUN IF
(-1 PREP) # -1 PREP, -1/1 DETDEF
(0 NOUN)
(0 ADJ) ;
# fil-Malti ; hm, apparently there are constructions like l-veru Koran :(
SELECT:r7 NOUN IF
(-2 CNJSUB)
(-1 DETDEF)
(0 NOUN)
(0 ADJ)
(1 VERB) ;
# kienu jaħsbu li l-Malti ġej mill-Puniku
SELECT:r8 NOUN IF
(-1 DETDEF)
(0 NOUN)
(0 VERB) ;
# l-influwenza tal-Għarbi
SELECT:r9 NOUN IF
(NOT *-2 NOUN)
(-1 DETDEF)
(0 NOUN)
(0 ADJ)
(1 PRN) ;
# u l-kollettiv huwa
SELECT:r10 VRBPL IF
(-1 NOUNPL)
(0 VRBPL)
(0 VRBSG) ;
# ħafna
SELECT:r11 ADV IF
(-1 ADJ)
(0 ADV)
(0 DET)
(0 PRN) ;
SELECT:r12 ADV IF
(-1 VERB)
(0 ADV)
(0 DET)
(0 PRN) ;
SELECT:r13 VRBPL IF
(NOT *-1 NOUN BARRIER SENTBOUNDARY)
(0 VRBPL)
(0 VRBSG)
(1 DETDEF)
(2 NOUNPL);
SELECT:r14 ADJ IF
(0 ADJ)
(0 VRBPART);
SELECT:r15 PAST IF
(-1 PRN)
(0 PAST)
(0 IMP) ;
SELECT:r16 PREP IF
(0 PREP)
(0 NOUN)
(1 DETDEF)
(2 NOUN) ;
# iżda _skont_ l-Misilmin, il-ħabib tal-Profeta Abu Bakr, fost oħrajn, kien jikteb it-testi
SELECT:r17 NOUN IF
(NOT -2 NOUN)
(-1 DETDEF)
(0 NOUN)
(0 ADJ)
(NOT 1 NOUN)
SELECT:r18 DET IF
(0 DET)
(0 PRN)
(1 DETDEF)
(2 NOUN) ;
SELECT:r19 ADV IF
(0 ADV)
(0 PREP)
(1 PREP);
SELECT:r20 DET IF
(0 DET)
(0 PRN)
(1 DETDEF);
SELECT:r21 DET IF
(0 DET)
(0 PRN)
(1 NOUN);
SELECT:r22 DET IF
(0 DET)
(0 PRN)
(1 ADJ);
REMOVE:r23 NOUNPX IF
(-1 DETDEF)
SELECT:r24 VRBPL IF
(0 VRBPL)
(0 VRBSG)
(1 VRBPL);
SELECT:r25 VRBPL IF
(0 VRBPL)
(0 VRBSG LINK 0/1 PRN)
(1 CNJSUB);
# typical sg+prn.3.m.sg vs pl problem
# this sucks, but what boundary should be used?
# - not (NOT *-1 NOUN BARRIER SENTBOUNDARY)
SELECT:r26 NOUN IF
(0 NOUN)
(0 ADJ)
(1 ADJ)
(2 TAPREP);
SELECT:r27 POSTDET IF
(0 PRN)
(0 POSTDET)
(-1 NOUN);
SELECT:r27 NOUNPL IF
(0 NOUNPL)
(0 NOUNPX);
# OYKOYN! any better idea for "u b’hekk insibu l-ewwel għamliet ta’ flawti." ?
SELECT:r27 ADV IF
(0 ADV)
(0 CNJADV)
(NOT -1 BOS);
# "biss": "only" vs "however"
This diff is collapsed.
This diff is collapsed.
<?xml version="1.0" encoding="UTF-8"?>
<!-- -*- nxml -*- -->
<postchunk>
<section-def-cats>
<def-cat n="nom">
<cat-item name="nom"/>
</def-cat>
</section-def-cats>
<section-def-attrs>
<def-attr n="a_nom">
<attr-item tags="n"/>
<attr-item tags="np"/>
</def-attr>
</section-def-attrs>
<section-def-vars>
<def-var n="paraula"/>
</section-def-vars>
<section-rules>
<rule comment="CHUNK:">
<pattern>
<pattern-item n="nom"/>
</pattern>
<action>
<out>
<lu>
<clip pos="1" part="whole"/>
</lu>
</out>
</action>
</rule>
</section-rules>
</postchunk>
<?xml version="1.0"?>
<analysis-chars>
<!-- Make apostrophe variants equal ' -->
<char value="'">
<equiv-char value="’"/>
<equiv-char value="ʼ"/>
<equiv-char value="‘"/>
</char>
<!-- Common orthographical errors -->
<char value="Ċ">
<equiv-char value="C"/>
</char>
<char value="Ż">
<equiv-char value="Z"/>
</char>
<char value="Ġ">
<equiv-char value="G"/>
</char>
<char value="Ħ">
<equiv-char value="H"/>
</char>
<char value="ù">
<equiv-char value="u"/>
</char>
<char value="ò">
<equiv-char value="o"/>
</char>
<char value="à">
<equiv-char value="a"/>
</char>
<char value="ċ">
<equiv-char value="c"/>
</char>
<char value="ż">
<equiv-char value="z"/>
</char>
<char value="ġ">
<equiv-char value="g"/>
<equiv-char value="ġ"/>
</char>
<char value="ħ">
<equiv-char value="h"/>
</char>
</analysis-chars>
This diff is collapsed.
<?xml version="1.0" encoding="utf-8"?>
<tagger name="maltese">
<tagset>
<def-label name="ABBR">
<tags-item tags="abbr"/>
</def-label>
<def-label name="NEG">
<tags-item tags="neg"/>
</def-label>
<def-label name="VAUX-PP">
<tags-item tags="vaux.*.pp"/>
<tags-item tags="vaux.*.pp.*"/>
</def-label>
<def-label name="ADJ-COMP">
<tags-item tags="adj.comp"/>
</def-label>
<def-label name="ADJ">
<tags-item tags="adj.*.*"/>
<tags-item tags="vblex.*.pp.*.*"/>
<tags-item tags="vblex.*.pprs.*.*"/>
</def-label>
<def-label name="ADV">
<tags-item tags="adv"/>
<tags-item tags="adv.*"/>
</def-label>
<def-label name="COMMA" closed="true">
<tags-item tags="cm"/>
</def-label>
<def-label name="CNJADV" closed="true">
<tags-item tags="cnjadv"/>
</def-label>
<def-label name="CNJCOO" closed="true">
<tags-item tags="cnjcoo"/>
</def-label>
<def-label name="CNJSUB" closed="true">
<tags-item tags="cnjsub"/>
</def-label>
<def-label name="DET" closed="true">
<tags-item tags="det.*"/>
</def-label>
<!--
<def-label name="DET-DEF" closed="true">
<tags-item tags="det.def.*"/>
</def-label>
<def-label name="DET-DEM" closed="true">
<tags-item tags="det.dem.*"/>
</def-label>
<def-label name="DET-IND" closed="true">
<tags-item tags="det.ind.*"/>
</def-label>
<def-label name="DET-QNT" closed="true">
<tags-item tags="det.qnt.*"/>
</def-label>
<def-label name="DET-ORD" closed="true">
<tags-item tags="det.ord.*"/>
</def-label>
-->
<def-label name="INTERJ" closed="true">
<tags-item tags="ij"/>
</def-label>
<def-label name="N">
<tags-item tags="n.*.*"/>
<tags-item tags="np.*"/>
</def-label>
<def-label name="N-POSS">
<tags-item tags="n.*.*.px1pl"/>
<tags-item tags="n.*.*.px1sg"/>
<tags-item tags="n.*.*.px2pl"/>
<tags-item tags="n.*.*.px2sg"/>
<tags-item tags="n.*.*.px3pl"/>
<tags-item tags="n.*.*.px3sg_f"/>
<tags-item tags="n.*.*.px3sg_m"/>
</def-label>
<!--
<def-label name="NP">
<tags-item tags="np.*"/>
</def-label>
-->
<def-label name="NUM" closed="true">
<tags-item tags="num"/>
<tags-item tags="num.*"/>
</def-label>
<def-label name="PREP" closed="true">
<tags-item tags="pr"/>
</def-label>
<!-- <def-label name="PRN" closed="true">
<tags-item tags="prn.*"/>
</def-label> -->
<def-label name="PRN-DEF" closed="true">
<tags-item tags="prn.def.*"/>
</def-label>
<def-label name="PRN-DEM" closed="true">
<tags-item tags="prn.dem.*"/>
</def-label>
<def-label name="PRN-IND" closed="true">
<tags-item tags="prn.ind.*"/>
</def-label>
<def-label name="PRN-ITG" closed="true">
<tags-item tags="prn.itg.*"/>
</def-label>
<def-label name="PRN-RECIP" closed="true">
<tags-item tags="prn.recip.*"/>
</def-label>
<def-label name="PRN-REF" closed="true">
<tags-item tags="prn.ref"/>
<tags-item tags="prn.ref.*"/>
</def-label>
<def-label name="PRNP" closed="true">
<tags-item tags="prn.p1.*"/>
<tags-item tags="prn.p2.*"/>
<tags-item tags="prn.p3.*"/>
</def-label>
<def-label name="REL" closed="true">
<tags-item tags="rel.an.mf.sp"/>
</def-label>
<def-label name="VAUX-PL" closed="true">
<tags-item tags="vaux.*.*.*.*.pl"/>
</def-label>
<def-label name="VAUX-SG" closed="true">
<tags-item tags="vaux.*.*.*.*.sg"/>
</def-label>
<def-label name="VBLEX-IMP-SG">
<tags-item tags="vblex.*.imp.*.*.sg"/>
</def-label>
<def-label name="VBLEX-IMP-PL">
<tags-item tags="vblex.*.imp.*.*.pl"/>
</def-label>
<def-label name="VBLEX-PL">
<tags-item tags="vblex.*.past.*.*.pl"/>
<tags-item tags="vblex.*.pres.*.*.pl"/>
<tags-item tags="vblex.*.*.p3.m.pl"/> <!-- TODO: remove from mt.dix -->
</def-label>
<def-label name="VBLEX-SG">
<tags-item tags="vblex.*.past.*.*.sg"/>
<tags-item tags="vblex.*.pres.*.*.sg"/>
<tags-item tags="vblex.*.*.p3.*.sg.px3sg_m"/> <!-- TODO: to be removed as well -->
</def-label>
<!-- ???