This is a bash script to create dictionaries between two foreign languages (FL), eg. French-Spanish. FL1-FL2 bilingual dictionaries are extracted from en-FL1 and en-FL2 ding-formated dictionaries.
Required programs: bash, sed, grep, sort and uniq.
#!/bin/sh
# script copied and modified/improved from the
# DingMee Translator: http://chrm.info/cms/dingmee-translator
# GNU General Public License either version 3 of the License, or
# (at your option) any later version.
if ; then
echo "Usage: $0 <DICT1> <DICT2> <OUTPUT-FILE>"
exit
fi
DICT1=$1
DICT2=$2
OUTPUT=$3
a=0
while read line
do
# Find in another file
# lineEN=`echo "$line" | sed "s/\ ::\ .*$//g"`
lineEN=`echo "$line" | sed -e 's/\ ::\ .*$//g' -e 's/\/.*\/\ //' -e 's/]/\\\\]/g' -e 's//\\\\[/g'`
termEN=`echo "$lineEN" | sed "s/\ (.*//g"`
glossEN=`echo "$lineEN" | sed "s/*//"`
lineSOURCE=`echo "$line" | sed "s/.*::\ \(.*$\)/\1/g"`
# echo ""
# echo ""
# echo ""
# echo ""
TMP=`echo $lineEN | grep SEE`
if ; then
echo "--------- SEE --------"
echo ""
echo ""
continue
fi
# gloss-less entries must be excluded
# as they are poor quality
# and will match any gloss-entry of the same POS
if ; then
echo "--------- NON-GlOSS ENTRY --------"
echo ""
continue
fi
# lineDEST=`grep "$lineEN" $DICT2 | sed "s/.*::\ \(.*$\)/\1/g"`
lineDEST=`grep "$termEN.*$glossEN" $DICT2 | sed "s/.*::\ \(.*$\)/\1/g"`
if ; then
a=$(($a+1));
echo $a;
echo "-------- FOUND --------"
echo "$lineSOURCE :: $lineDEST" >> $OUTPUT
fi
done < $DICT1
# Cleanup
cat $OUTPUT | grep -v "SEE:" > ${OUTPUT}.new
cat ${OUTPUT}.new | sort | uniq > $OUTPUT
cat $OUTPUT | grep "::" > ${OUTPUT}.new
mv ${OUTPUT}.new $OUTPUT
echo "Ready. Generated file: $OUTPUT"