dc.contributor.author | Straka, Milan |
dc.date.accessioned | 2022-07-26T11:56:56Z |
dc.date.available | 2022-07-26T11:56:56Z |
dc.date.issued | 2020-08-31 |
dc.identifier.uri | http://hdl.handle.net/11234/1-4798 |
dc.description | Tokenizer, POS Tagger, Lemmatizer and Parser models for 99 treebanks of 63 languages of Universal Depenencies 2.6 Treebanks, created solely using UD 2.6 data (https://hdl.handle.net/11234/1-3226). The model documentation including performance can be found at https://ufal.mff.cuni.cz/udpipe/2/models#universal_dependencies_26_models . To use these models, you need UDPipe version 2.0, which you can download from https://ufal.mff.cuni.cz/udpipe/2 . |
dc.language.iso | afr |
dc.language.iso | ara |
dc.language.iso | hye |
dc.language.iso | bel |
dc.language.iso | bul |
dc.language.iso | cat |
dc.language.iso | ces |
dc.language.iso | chu |
dc.language.iso | cop |
dc.language.iso | cym |
dc.language.iso | dan |
dc.language.iso | deu |
dc.language.iso | ell |
dc.language.iso | eng |
dc.language.iso | est |
dc.language.iso | eus |
dc.language.iso | fas |
dc.language.iso | fin |
dc.language.iso | fra |
dc.language.iso | fro |
dc.language.iso | gla |
dc.language.iso | gle |
dc.language.iso | glg |
dc.language.iso | got |
dc.language.iso | grc |
dc.language.iso | heb |
dc.language.iso | hin |
dc.language.iso | hrv |
dc.language.iso | hun |
dc.language.iso | ind |
dc.language.iso | ita |
dc.language.iso | jpn |
dc.language.iso | kor |
dc.language.iso | lat |
dc.language.iso | lav |
dc.language.iso | lit |
dc.language.iso | lzh |
dc.language.iso | mar |
dc.language.iso | mlt |
dc.language.iso | nld |
dc.language.iso | nno |
dc.language.iso | nob |
dc.language.iso | orv |
dc.language.iso | pcm |
dc.language.iso | pol |
dc.language.iso | por |
dc.language.iso | ron |
dc.language.iso | rus |
dc.language.iso | slk |
dc.language.iso | slv |
dc.language.iso | sme |
dc.language.iso | spa |
dc.language.iso | srp |
dc.language.iso | swe |
dc.language.iso | tam |
dc.language.iso | tel |
dc.language.iso | tur |
dc.language.iso | uig |
dc.language.iso | ukr |
dc.language.iso | urd |
dc.language.iso | vie |
dc.language.iso | wof |
dc.language.iso | wol |
dc.language.iso | zho |
dc.publisher | Charles University, Faculty of Mathematics and Physics, Institute of Formal and Applied Linguistics (UFAL) |
dc.relation.isreplacedby | http://hdl.handle.net/11234/1-4804 |
dc.rights | Creative Commons - Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0) |
dc.rights.uri | http://creativecommons.org/licenses/by-nc-sa/4.0/ |
dc.source.uri | https://ufal.mff.cuni.cz/udpipe/2 |
dc.subject | tokenizer |
dc.subject | POS tagger |
dc.subject | lemmatization |
dc.subject | tagger |
dc.subject | parser |
dc.subject | dependency parser |
dc.title | Universal Dependencies 2.6 models for UDPipe 2 (2020-08-31) |
dc.type | toolService |
metashare.ResourceInfo#ResourceComponentType#ToolServiceInfo.languageDependent | true |
metashare.ResourceInfo#ContentInfo.detailedType | tool |
dc.rights.label | PUB |
has.files | yes |
branding | LINDAT / CLARIAH-CZ |
demo.uri | https://lindat.mff.cuni.cz/services/udpipe/ |
contact.person | Milan Straka straka@ufal.mff.cuni.cz Charles University, Faculty of Mathematics and Physics, Institute of Formal and Applied Linguistics (UFAL) |
sponsor | Ministerstvo školství, mládeže a tělovýchovy České republiky LM2018101 LINDAT/CLARIAH-CZ: Digitální výzkumná infrastruktura pro jazykové technologie, umění a humanitní vědy nationalFunds |
files.size | 6210656126 |
files.count | 1 |
Files in this item
This item is
Creative Commons - Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0)
Publicly Available
and licensed under:Creative Commons - Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0)
- Name
- udpipe2-ud-2.6-200830.tar.gz
- Size
- 5.78 GB
- Format
- application/x-gzip
- Description
- Universal Dependencies 2.6 models for UDPipe 2 (2020-08-31)
- MD5
- c83a2fa05d5ad5271313b8ee6bf2b79c
- udpipe2-ud-2.6-200830
- LICENSE20 kB
- sv_all-ud-2.6-200830.model
- weights.data-00000-of-0000193 MB
- sv_talbanken.tokenizer200 kB
- weights.index2 kB
- checkpoint71 B
- options.json981 B
- sv_lines.tokenizer34 kB
- mappings.pickle416 kB
- et_all-ud-2.6-200830.model
- weights.data-00000-of-00001141 MB
- options.json933 B
- weights.index2 kB
- checkpoint71 B
- et_edt.tokenizer210 kB
- et_ewt.tokenizer207 kB
- mappings.pickle981 kB
- lt_all-ud-2.6-200830.model
- weights.data-00000-of-0000189 MB
- lt_alksnis.tokenizer36 kB
- checkpoint71 B
- weights.index2 kB
- options.json957 B
- lt_hse.tokenizer199 kB
- mappings.pickle486 kB
- la_all-ud-2.6-200830.model
- weights.data-00000-of-00001143 MB
- options.json1 kB
- checkpoint71 B
- weights.index2 kB
- la_llct.tokenizer31 kB
- la_perseus.tokenizer31 kB
- mappings.pickle1 MB
- fa_seraji-ud-2.6-200830.model
- weights.data-00000-of-0000189 MB
- options.json849 B
- checkpoint71 B
- weights.index2 kB
- fa_seraji.tokenizer46 kB
- mappings.pickle234 kB
- no_all-ud-2.6-200830.model
- weights.data-00000-of-00001122 MB
- no_nynorsk.tokenizer206 kB
- checkpoint71 B
- options.json1 kB
- weights.index2 kB
- no_nynorsklia.tokenizer197 kB
- no_bokmaal.tokenizer207 kB
- mappings.pickle612 kB
- pl_all-ud-2.6-200830.model
- pl_pdb.tokenizer233 kB
- weights.data-00000-of-00001142 MB
- checkpoint71 B
- options.json933 B
- weights.index2 kB
- pl_lfg.tokenizer35 kB
- mappings.pickle1 MB
- ar_padt-ud-2.6-200830.model
- weights.data-00000-of-00001109 MB
- ar_padt.tokenizer137 kB
- options.json835 B
- checkpoint71 B
- weights.index2 kB
- mappings.pickle4 MB
- cs_all-ud-2.6-200830.model
- weights.index2 kB
- options.json1 kB
- cs_fictree.tokenizer36 kB
- weights.data-00000-of-00001249 MB
- cs_pdt.tokenizer214 kB
- checkpoint71 B
- cs_cac.tokenizer34 kB
- mappings.pickle4 MB
- cs_cltt.tokenizer39 kB
- he_htb-ud-2.6-200830.model
- weights.data-00000-of-0000191 MB
- weights.index2 kB
- options.json828 B
- checkpoint71 B
- he_htb.tokenizer311 kB
- mappings.pickle269 kB
- vi_vtb-ud-2.6-200830.model
- weights.data-00000-of-0000150 MB
- weights.index2 kB
- options.json828 B
- checkpoint71 B
- vi_vtb.tokenizer215 kB
- mappings.pickle49 kB
- el_gdt-ud-2.6-200830.model
- weights.data-00000-of-0000158 MB
- weights.index2 kB
- checkpoint71 B
- options.json828 B
- el_gdt.tokenizer39 kB
- mappings.pickle187 kB
- en_all-ud-2.6-200830.model
- weights.index2 kB
- options.json1 kB
- en_lines.tokenizer33 kB
- weights.data-00000-of-00001109 MB
- checkpoint71 B
- en_partut.tokenizer35 kB
- mappings.pickle1 MB
- en_ewt.tokenizer206 kB
- en_gum.tokenizer41 kB
- sr_set-ud-2.6-200830.model
- weights.data-00000-of-0000189 MB
- weights.index2 kB
- sr_set.tokenizer34 kB
- checkpoint71 B
- options.json828 B
- mappings.pickle236 kB
- mr_ufal-ud-2.6-200830.model
- weights.data-00000-of-0000149 MB
- options.json835 B
- weights.index2 kB
- checkpoint71 B
- mr_ufal.tokenizer196 kB
- mappings.pickle31 kB
- de_all-ud-2.6-200830.model
- weights.data-00000-of-00001249 MB
- checkpoint71 B
- options.json933 B
- weights.index2 kB
- de_hdt.tokenizer208 kB
- de_gsd.tokenizer238 kB
- mappings.pickle2 MB
- da_ddt-ud-2.6-200830.model
- weights.data-00000-of-0000185 MB
- checkpoint71 B
- options.json828 B
- weights.index2 kB
- da_ddt.tokenizer35 kB
- mappings.pickle156 kB
- grc_all-ud-2.6-200830.model
- weights.data-00000-of-00001170 MB
- grc_proiel.tokenizer221 kB
- weights.index2 kB
- checkpoint71 B
- options.json990 B
- grc_perseus.tokenizer42 kB
- mappings.pickle1 MB
- sl_all-ud-2.6-200830.model
- weights.data-00000-of-00001101 MB
- checkpoint71 B
- options.json933 B
- weights.index2 kB
- sl_ssj.tokenizer35 kB
- sl_sst.tokenizer30 kB
- mappings.pickle596 kB
- hi_hdtb-ud-2.6-200830.model
- weights.data-00000-of-0000193 MB
- options.json835 B
- weights.index2 kB
- checkpoint71 B
- mappings.pickle6 MB
- hi_hdtb.tokenizer203 kB
- hr_set-ud-2.6-200830.model
- weights.data-00000-of-00001102 MB
- weights.index2 kB
- checkpoint71 B
- options.json828 B
- hr_set.tokenizer36 kB
- mappings.pickle404 kB
- tr_imst-ud-2.6-200830.model
- weights.data-00000-of-0000156 MB
- options.json835 B
- weights.index2 kB
- checkpoint71 B
- tr_imst.tokenizer51 kB
- mappings.pickle182 kB
- ug_udt-ud-2.6-200830.model
- weights.data-00000-of-0000152 MB
- weights.index2 kB
- options.json828 B
- checkpoint71 B
- ug_udt.tokenizer196 kB
- mappings.pickle406 kB
- eu_bdt-ud-2.6-200830.model
- weights.data-00000-of-0000190 MB
- weights.index2 kB
- checkpoint71 B
- options.json828 B
- eu_bdt.tokenizer34 kB
- mappings.pickle245 kB
- id_gsd-ud-2.6-200830.model
- weights.data-00000-of-0000187 MB
- id_gsd.tokenizer51 kB
- checkpoint71 B
- weights.index2 kB
- options.json828 B
- mappings.pickle1 MB
- be_hse-ud-2.6-200830.model
- weights.data-00000-of-0000150 MB
- checkpoint71 B
- weights.index2 kB
- options.json828 B
- mappings.pickle55 kB
- be_hse.tokenizer212 kB
- ga_idt-ud-2.6-200830.model
- weights.data-00000-of-0000157 MB
- weights.index2 kB
- options.json828 B
- checkpoint71 B
- mappings.pickle124 kB
- ga_idt.tokenizer34 kB
- mt_mudt-ud-2.6-200830.model
- weights.data-00000-of-0000152 MB
- weights.index2 kB
- checkpoint71 B
- options.json835 B
- mt_mudt.tokenizer35 kB
- mappings.pickle53 kB
- hu_szeged-ud-2.6-200830.model
- weights.data-00000-of-0000153 MB
- options.json849 B
- weights.index2 kB
- checkpoint71 B
- hu_szeged.tokenizer34 kB
- mappings.pickle102 kB
- sme_giella-ud-2.6-200830.model
- weights.data-00000-of-0000153 MB
- checkpoint71 B
- options.json856 B
- weights.index2 kB
- sme_giella.tokenizer33 kB
- mappings.pickle87 kB
- cop_scriptorium-ud-2.6-200830.model
- weights.data-00000-of-0000149 MB
- weights.index2 kB
- options.json891 B
- checkpoint71 B
- cop_scriptorium.tokenizer265 kB
- mappings.pickle121 kB
- zh_gsd-ud-2.6-200830.model
- weights.data-00000-of-0000191 MB
- checkpoint71 B
- weights.index2 kB
- options.json828 B
- zh_gsd.tokenizer1019 kB
- mappings.pickle239 kB
- fro_srcmf-ud-2.6-200830.model
- weights.data-00000-of-0000187 MB
- weights.index2 kB
- options.json849 B
- checkpoint71 B
- fro_srcmf.tokenizer33 kB
- mappings.pickle168 kB
- af_afribooms-ud-2.6-200830.model
- weights.data-00000-of-0000152 MB
- weights.index2 kB
- checkpoint71 B
- options.json870 B
- af_afribooms.tokenizer34 kB
- mappings.pickle71 kB
- te_mtg-ud-2.6-200830.model
- weights.data-00000-of-0000148 MB
- weights.index2 kB
- options.json828 B
- checkpoint71 B
- te_mtg.tokenizer33 kB
- mappings.pickle103 kB
- sk_snk-ud-2.6-200830.model
- weights.data-00000-of-0000193 MB
- sk_snk.tokenizer37 kB
- checkpoint71 B
- options.json828 B
- weights.index2 kB
- mappings.pickle423 kB
- lv_lvtb-ud-2.6-200830.model
- weights.data-00000-of-00001108 MB
- checkpoint71 B
- options.json835 B
- weights.index2 kB
- lv_lvtb.tokenizer37 kB
- mappings.pickle8 MB
- fi_all-ud-2.6-200830.model
- weights.data-00000-of-00001149 MB
- fi_ftb.tokenizer203 kB
- fi_tdt.tokenizer48 kB
- weights.index2 kB
- checkpoint71 B
- options.json933 B
- mappings.pickle1 MB
- hy_armtdp-ud-2.6-200830.model
- weights.data-00000-of-0000158 MB
- options.json849 B
- weights.index2 kB
- checkpoint71 B
- mappings.pickle991 kB
- hy_armtdp.tokenizer222 kB
- ur_udtb-ud-2.6-200830.model
- weights.data-00000-of-0000186 MB
- ur_udtb.tokenizer37 kB
- options.json835 B
- weights.index2 kB
- checkpoint71 B
- mappings.pickle287 kB
- es_all-ud-2.6-200830.model
- weights.data-00000-of-00001136 MB
- options.json951 B
- weights.index2 kB
- checkpoint71 B
- es_ancora.tokenizer225 kB
- mappings.pickle1 MB
- es_gsd.tokenizer70 kB
- sa_vedic-ud-2.6-200830.model
- weights.data-00000-of-0000152 MB
- checkpoint71 B
- weights.index2 kB
- options.json842 B
- sa_vedic.tokenizer189 kB
- mappings.pickle81 kB
- gl_all-ud-2.6-200830.model
- weights.data-00000-of-0000188 MB
- gl_treegal.tokenizer206 kB
- weights.index2 kB
- checkpoint71 B
- options.json957 B
- mappings.pickle175 kB
- gl_ctg.tokenizer226 kB
- cy_ccg-ud-2.6-200830.model
- weights.data-00000-of-0000151 MB
- options.json828 B
- checkpoint71 B
- weights.index2 kB
- cy_ccg.tokenizer33 kB
- mappings.pickle49 kB
- pcm_nsc-ud-2.6-200830.model
- weights.data-00000-of-0000180 MB
- checkpoint71 B
- options.json835 B
- weights.index2 kB
- pcm_nsc.tokenizer32 kB
- mappings.pickle5 MB
- nl_all-ud-2.6-200830.model
- weights.data-00000-of-00001105 MB
- options.json993 B
- checkpoint71 B
- weights.index2 kB
- nl_alpino.tokenizer35 kB
- mappings.pickle750 kB
- nl_lassysmall.tokenizer36 kB
- ja_gsd-ud-2.6-200830.model
- weights.data-00000-of-0000192 MB
- ja_gsd.tokenizer843 kB
- weights.index2 kB
- options.json828 B
- checkpoint71 B
- mappings.pickle3 MB
- got_proiel-ud-2.6-200830.model
- weights.data-00000-of-0000156 MB
- options.json856 B
- weights.index2 kB
- checkpoint71 B
- mappings.pickle251 kB
- got_proiel.tokenizer31 kB
- MODELS.txt19 kB
- bg_btb-ud-2.6-200830.model
- weights.data-00000-of-0000196 MB
- bg_btb.tokenizer212 kB
- options.json828 B
- checkpoint71 B
- weights.index2 kB
- mappings.pickle512 kB
- wo_wtb-ud-2.6-200830.model
- weights.data-00000-of-0000151 MB
- checkpoint71 B
- weights.index2 kB
- options.json828 B
- wo_wtb.tokenizer204 kB
- mappings.pickle94 kB
- la_proiel-ud-2.6-200830.model
- weights.data-00000-of-00001103 MB
- weights.index2 kB
- checkpoint71 B
- options.json849 B
- mappings.pickle665 kB
- la_proiel.tokenizer217 kB
- la_ittb-ud-2.6-200830.model
- weights.data-00000-of-0000196 MB
- la_ittb.tokenizer30 kB
- weights.index2 kB
- checkpoint71 B
- options.json835 B
- mappings.pickle363 kB
- fr_all-ud-2.6-200830.model
- fr_gsd.tokenizer252 kB
- options.json1 kB
- weights.index2 kB
- fr_partut.tokenizer34 kB
- weights.data-00000-of-00001130 MB
- checkpoint71 B
- mappings.pickle782 kB
- fr_sequoia.tokenizer36 kB
- fr_spoken.tokenizer32 kB
- uk_iu-ud-2.6-200830.model
- weights.data-00000-of-0000196 MB
- options.json821 B
- weights.index2 kB
- checkpoint71 B
- uk_iu.tokenizer225 kB
- mappings.pickle5 MB
- ru_all-ud-2.6-200830.model
- weights.data-00000-of-00001190 MB
- weights.index2 kB
- options.json1 kB
- checkpoint71 B
- ru_gsd.tokenizer225 kB
- ru_taiga.tokenizer43 kB
- mappings.pickle2 MB
- ru_syntagrus.tokenizer217 kB
- zh_gsdsimp-ud-2.6-200830.model
- zh_gsdsimp.tokenizer990 kB
- weights.data-00000-of-0000191 MB
- checkpoint71 B
- options.json856 B
- weights.index2 kB
- mappings.pickle237 kB
- orv_all-ud-2.6-200830.model
- weights.data-00000-of-00001123 MB
- orv_rnc.tokenizer204 kB
- options.json960 B
- checkpoint71 B
- weights.index2 kB
- orv_torot.tokenizer206 kB
- mappings.pickle1001 kB
- it_all-ud-2.6-200830.model
- options.json1 kB
- weights.index2 kB
- it_twittiro.tokenizer39 kB
- it_partut.tokenizer207 kB
- it_postwita.tokenizer51 kB
- weights.data-00000-of-00001131 MB
- it_vit.tokenizer212 kB
- checkpoint71 B
- mappings.pickle1 MB
- it_isdt.tokenizer226 kB
- pt_all-ud-2.6-200830.model
- weights.data-00000-of-00001116 MB
- pt_gsd.tokenizer224 kB
- options.json951 B
- checkpoint71 B
- weights.index2 kB
- pt_bosque.tokenizer224 kB
- mappings.pickle753 kB
- gd_arcosg-ud-2.6-200830.model
- weights.data-00000-of-0000152 MB
- gd_arcosg.tokenizer34 kB
- checkpoint71 B
- options.json849 B
- weights.index2 kB
- mappings.pickle60 kB
- ta_ttb-ud-2.6-200830.model
- weights.data-00000-of-0000149 MB
- weights.index2 kB
- options.json828 B
- checkpoint71 B
- ta_ttb.tokenizer215 kB
- mappings.pickle241 kB
- README.html111 kB
- ko_all-ud-2.6-200830.model
- weights.data-00000-of-00001158 MB
- ko_kaist.tokenizer232 kB
- weights.index2 kB
- options.json945 B
- checkpoint71 B
- ko_gsd.tokenizer586 kB
- mappings.pickle1 MB
- cu_proiel-ud-2.6-200830.model
- weights.data-00000-of-0000159 MB
- checkpoint71 B
- weights.index2 kB
- options.json849 B
- mappings.pickle299 kB
- cu_proiel.tokenizer202 kB
- ro_all-ud-2.6-200830.model
- weights.data-00000-of-00001144 MB
- ro_nonstandard.tokenizer35 kB
- checkpoint71 B
- weights.index2 kB
- options.json981 B
- ro_rrt.tokenizer39 kB
- mappings.pickle1 MB
- lzh_kyoto-ud-2.6-200830.model
- weights.data-00000-of-0000182 MB
- options.json849 B
- checkpoint71 B
- weights.index2 kB
- lzh_kyoto.tokenizer945 kB
- mappings.pickle245 kB
- ca_ancora-ud-2.6-200830.model
- weights.data-00000-of-00001106 MB
- weights.index2 kB
- options.json849 B
- checkpoint71 B
- mappings.pickle854 kB
- ca_ancora.tokenizer221 kB