dc.contributor.author | Pirinen, Tommi A |
dc.contributor.author | Listenmaa, Inari |
dc.contributor.author | Johnson, Ryan |
dc.contributor.author | Tyers, Francis M. |
dc.contributor.author | Kuokkala, Juha |
dc.date.accessioned | 2017-03-30T15:15:15Z |
dc.date.available | 2017-03-30T15:15:15Z |
dc.date.issued | 2017-03-22 |
dc.identifier.uri | http://hdl.handle.net/11372/LRT-1992 |
dc.description | Omorfi is free and open source project containing various tools and data for handling Finnish texts in a linguistically motivated manner. The main components of this repository are: 1) a lexical database containing hundreds of thousands of words (c.f. lexical statistics), 2) a collection of scripts to convert lexical database into formats used by upstream NLP tools (c.f. lexical processing), 3) an autotools setup to build and install (or package, or deploy): the scripts, the database, and simple APIs / convenience processing tools, and 4) a collection of relatively simple APIs for a selection of languages and scripts to apply the NLP tools and access the database |
dc.language.iso | fin |
dc.publisher | University of Helsinki |
dc.rights | GNU General Public Licence, version 3 |
dc.rights.uri | http://opensource.org/licenses/GPL-3.0 |
dc.source.uri | https://github.com/flammie/omorfi |
dc.subject | morphological analysis |
dc.subject | morphological dictionary |
dc.title | Open morphology of Finnish |
dc.type | toolService |
metashare.ResourceInfo#ResourceComponentType#ToolServiceInfo.languageDependent | true |
metashare.ResourceInfo#ContentInfo.detailedType | tool |
dc.rights.label | PUB |
has.files | yes |
branding | LRT + Open Submissions |
contact.person | Tommi Pirinen tommi.antero.pirinen@uni-hamburg.de Universität Hamburg |
files.size | 7111437 |
files.count | 1 |
Soubory tohoto záznamu
- Název
- omorfi-20161115.tar.gz
- Velikost
- 6.78 MB
- Formát
- application/x-gzip
- Popis
- source code tree
- MD5
- 23de290f5c5a2c82874dfe302132d2b5
- omorfi-20161115
- README.md25 kB
- ChangeLog.old89 kB
- .gitignore1 kB
- THANKS714 B
- configure.ac15 kB
- src
- perl
- ktn2csv.pl8 kB
- examples
- paradigms.tsv158 kB
- stuff.tsv5 kB
- bash
- set-attribute.bash674 B
- omorfi-locate.sh1 kB
- omor2apertium.sed5 kB
- omorfi-disambiguate-text.sh1 kB
- omorfi-hyphenate.sh1 kB
- omorfi-segment.sh3 kB
- omor2apertium.sh1 kB
- remove-word.bash523 B
- omorfi-analyse-text.sh1 kB
- add-lex-data.bash2 kB
- generate-wordlist.sh755 B
- omorfi-analyse-tokenised.sh1 kB
- change-class.bash546 B
- add-word.bash881 B
- generate-all-wordforms.sh796 B
- omorfi-spell.sh1 kB
- omorfi-generate.sh1 kB
- omorfi.bash.in5 kB
- generate-analysedlist.bash667 B
- attributes
- prontype.tsv2 kB
- symbol-classes.tsv929 B
- plurale-tantum.tsv342 kB
- boundaries.tsv2 MB
- lexicalised-inflection.tsv68 kB
- clitics.tsv52 kB
- pronunciation.tsv10 kB
- proper-classes.tsv4 MB
- possessives.tsv5 kB
- adptype.tsv3 kB
- blacklisted.tsv120 kB
- style.tsv2 kB
- broken-paradigms.tsv11 kB
- abbr.tsv58 kB
- verb-arguments.tsv273 kB
- semantic.tsv139 kB
- numtype.tsv4 kB
- continuations
- stems.tsv2 MB
- inflections.tsv175 kB
- vislcg3
- omorfi.cg3114 kB
- docs
- lexemes.tsv2 kB
- paradigms.tsv66 kB
- stuff.tsv3 kB
- java
- externals
- fiwiktionary.bash544 B
- fiwikt2omorfi.bash4 kB
- joukahainen.xml6 MB
- kotus-sanalista_v1.xml3 MB
- phonology
- omorfi.twolc362 B
- Makefile.am363 B
- python
- generate-edit-distance.py3 kB
- tsvjoin.py7 kB
- omorfi-conllu.py14 kB
- generate-githubwiki.py7 kB
- guess-csv2tsv.py5 kB
- generate-yaml.py4 kB
- find-redundant-lexemes.py4 kB
- omorfi-factorise.py2 kB
- generate-monodix.py6 kB
- generate-kotus-sanalista.py4 kB
- omorfi-vislcg.py4 kB
- generate-lexcs.py14 kB
- tsv_expand.py7 kB
- generate-reweights.py3 kB
- omorfi-tokenise.py3 kB
- convert_tag_format.py12 kB
- omorfi-segment.py15 kB
- generate-twolcs.py3 kB
- omorfi
- gradation.py8 kB
- plurale_tantum.py10 kB
- parse_csv_data.py4 kB
- error_logging.py1 kB
- tdt_formatter.py13 kB
- test_lexc_multichars.py1 kB
- formatter.py3 kB
- monodix_formatter.py9 kB
- ftb3_formatter.py19 kB
- twolc_formatter.py6 kB
- apertium_formatter.py13 kB
- regex_formatter.py6 kB
- guess_feats.py6 kB
- stub.py27 kB
- kotus_sanalista_formatter.py1 kB
- string_manglers.py4 kB
- no_tags_formatter.py3 kB
- omor_strings_io.py0 B
- omorfi.py19 kB
- omor_formatter.py24 kB
- wordmap.py2 kB
- lexc_formatter.py3 kB
- experimental_xml_formatter.py3 kB
- __init__.py956 B
- labeled_segments_formatter.py11 kB
- settings.py5 kB
- guess_new_class.py243 kB
- giella_formatter.py16 kB
- generate-regexes.py2 kB
- lexemes.tsv13 MB
- tokeniser
- omorfi.tokenise.pmatch156 B
- Makefile.am50 kB
- paradigms
- stub-deletions.tsv53 kB
- suffix-regexes.tsv61 kB
- morphophonology.tsv184 kB
- test-scripts
- lemmas-match-regexes.py4 kB
- consistent-attribute-keys.bash1 kB
- lemmas-match-regexes.bash179 B
- valid-datatypes.bash427 B
- constraint-unique-keys.bash524 B
- voikko
- voikko-fi_FI.pro258 B
- index.xml.in3 kB
- stats
- word_id.blacklist3 MB
- spell-checking
- edit-distance-1.text123 kB
- edit-distance-0.text752 B
- Makefile.am1 kB
- edit-distance-infinity.text126 kB
- perl
- .pre-commit-config.yaml290 B
- autogen.sh44 kB
- setup.cfg72 B
- README.bindist1 kB
- .travis.yml318 B
- INSTALL9 kB
- NEWS6 kB
- TODO640 B
- Makefile.am945 B
- COPYING34 kB
- test
- coverage-tatoeba.bash652 B
- coverage-ftb1-2014.sh655 B
- count_tsv.awk897 B
- tokenisation-test-set.text429 B
- times2wiki.bash337 B
- coverages-all-slow.bash.in1 kB
- faithfulness.py10 kB
- newstest2015-fien-ref.en.text142 kB
- regress-coveragelogs.bash769 B
- prop-corpus-tests.py24 kB
- coverages-all-quick.sh.in560 B
- gtd-tests.tsv109 kB
- faithfulness2wiki.bash943 B
- Makefile.am608 B
- newstest2015-fien-src.fi.text149 kB
- wordforms.list73 kB
- faithfulness-ftb-3.1-quick.sh.in684 B
- find_errs.awk239 B
- clusterstuff.slurm.in324 B
- clusterstuff.pbs.in260 B
- speed-test.sh.in622 B
- sizes2wiki.bash651 B
- coverages2wiki.bash1 kB
- test-header.yaml.in209 B
- blacklist4 kB
- graylist5 kB
- ftc-test.py4 kB
- conllu-fi-ud-test.sh.in734 B
- faithfulness-ftb-3.1.sh.in675 B
- whitelist1 kB
- regress-coveragelogs.py4 kB
- scripts-runnable.sh282 B
- get-covered.bash7 kB
- coverage-fast-alls.freqs353 kB
- conllu-fi-ftb-ud-test.sh.in766 B
- corpus-tests.py17 kB
- rough-tests.sh638 B
- coverage.py3 kB
- ftc-test.sh324 B
- conllu-compare.py6 kB
- omorfi.pc.in203 B
- man
- omorfi-analyse-text.11 kB
- omorfi-analyse-tokenised.11 kB
- omorfi-generate.1906 B
- omorfi-spell.1827 B
- Makefile.am392 B
- omorfi-hyphenate.1931 B
- omorfi-analyse.11 kB
- omorfi-factorise.11 kB
- omorfi-disambiguate-text.11 kB
- omorfi-segment.11 kB
- doc
- omorfi-tagchart.svg74 kB
- omorfi-flowchart-2015.svg30 kB
- Makefile.am129 B
- omorfi-tagchart.dia2 kB
- omorfi-flowchart-2015.dia5 kB
- Doxyfile7 kB
- AUTHORS151 B
- pax_global_header52 B