Show simple item record

 
dc.contributor.author Nivre, Joakim
dc.contributor.author Agić, Željko
dc.contributor.author Ahrenberg, Lars
dc.contributor.author Aranzabe, Maria Jesus
dc.contributor.author Asahara, Masayuki
dc.contributor.author Atutxa, Aitziber
dc.contributor.author Ballesteros, Miguel
dc.contributor.author Bauer, John
dc.contributor.author Bengoetxea, Kepa
dc.contributor.author Bhat, Riyaz Ahmad
dc.contributor.author Bick, Eckhard
dc.contributor.author Bosco, Cristina
dc.contributor.author Bouma, Gosse
dc.contributor.author Bowman, Sam
dc.contributor.author Candito, Marie
dc.contributor.author Cebiroğlu Eryiğit, Gülşen
dc.contributor.author Celano, Giuseppe G. A.
dc.contributor.author Chalub, Fabricio
dc.contributor.author Choi, Jinho
dc.contributor.author Çöltekin, Çağrı
dc.contributor.author Connor, Miriam
dc.contributor.author Davidson, Elizabeth
dc.contributor.author de Marneffe, Marie-Catherine
dc.contributor.author de Paiva, Valeria
dc.contributor.author Diaz de Ilarraza, Arantza
dc.contributor.author Dobrovoljc, Kaja
dc.contributor.author Dozat, Timothy
dc.contributor.author Droganova, Kira
dc.contributor.author Dwivedi, Puneet
dc.contributor.author Eli, Marhaba
dc.contributor.author Erjavec, Tomaž
dc.contributor.author Farkas, Richárd
dc.contributor.author Foster, Jennifer
dc.contributor.author Freitas, Cláudia
dc.contributor.author Gajdošová, Katarína
dc.contributor.author Galbraith, Daniel
dc.contributor.author Garcia, Marcos
dc.contributor.author Ginter, Filip
dc.contributor.author Goenaga, Iakes
dc.contributor.author Gojenola, Koldo
dc.contributor.author Gökırmak, Memduh
dc.contributor.author Goldberg, Yoav
dc.contributor.author Gómez Guinovart, Xavier
dc.contributor.author Gonzáles Saavedra, Berta
dc.contributor.author Grioni, Matias
dc.contributor.author Grūzītis, Normunds
dc.contributor.author Guillaume, Bruno
dc.contributor.author Habash, Nizar
dc.contributor.author Hajič, Jan
dc.contributor.author Hà Mỹ, Linh
dc.contributor.author Haug, Dag
dc.contributor.author Hladká, Barbora
dc.contributor.author Hohle, Petter
dc.contributor.author Ion, Radu
dc.contributor.author Irimia, Elena
dc.contributor.author Johannsen, Anders
dc.contributor.author Jørgensen, Fredrik
dc.contributor.author Kaşıkara, Hüner
dc.contributor.author Kanayama, Hiroshi
dc.contributor.author Kanerva, Jenna
dc.contributor.author Kotsyba, Natalia
dc.contributor.author Krek, Simon
dc.contributor.author Laippala, Veronika
dc.contributor.author Lê Hồng, Phương
dc.contributor.author Lenci, Alessandro
dc.contributor.author Ljubešić, Nikola
dc.contributor.author Lyashevskaya, Olga
dc.contributor.author Lynn, Teresa
dc.contributor.author Makazhanov, Aibek
dc.contributor.author Manning, Christopher
dc.contributor.author Mărănduc, Cătălina
dc.contributor.author Mareček, David
dc.contributor.author Martínez Alonso, Héctor
dc.contributor.author Martins, André
dc.contributor.author Mašek, Jan
dc.contributor.author Matsumoto, Yuji
dc.contributor.author McDonald, Ryan
dc.contributor.author Missilä, Anna
dc.contributor.author Mititelu, Verginica
dc.contributor.author Miyao, Yusuke
dc.contributor.author Montemagni, Simonetta
dc.contributor.author More, Amir
dc.contributor.author Mori, Shunsuke
dc.contributor.author Moskalevskyi, Bohdan
dc.contributor.author Muischnek, Kadri
dc.contributor.author Mustafina, Nina
dc.contributor.author Müürisep, Kaili
dc.contributor.author Nguyễn Thị, Lương
dc.contributor.author Nguyễn Thị Minh, Huyền
dc.contributor.author Nikolaev, Vitaly
dc.contributor.author Nurmi, Hanna
dc.contributor.author Ojala, Stina
dc.contributor.author Osenova, Petya
dc.contributor.author Øvrelid, Lilja
dc.contributor.author Pascual, Elena
dc.contributor.author Passarotti, Marco
dc.contributor.author Perez, Cenel-Augusto
dc.contributor.author Perrier, Guy
dc.contributor.author Petrov, Slav
dc.contributor.author Piitulainen, Jussi
dc.contributor.author Plank, Barbara
dc.contributor.author Popel, Martin
dc.contributor.author Pretkalniņa, Lauma
dc.contributor.author Prokopidis, Prokopis
dc.contributor.author Puolakainen, Tiina
dc.contributor.author Pyysalo, Sampo
dc.contributor.author Rademaker, Alexandre
dc.contributor.author Ramasamy, Loganathan
dc.contributor.author Real, Livy
dc.contributor.author Rituma, Laura
dc.contributor.author Rosa, Rudolf
dc.contributor.author Saleh, Shadi
dc.contributor.author Sanguinetti, Manuela
dc.contributor.author Saulīte, Baiba
dc.contributor.author Schuster, Sebastian
dc.contributor.author Seddah, Djamé
dc.contributor.author Seeker, Wolfgang
dc.contributor.author Seraji, Mojgan
dc.contributor.author Shakurova, Lena
dc.contributor.author Shen, Mo
dc.contributor.author Sichinava, Dmitry
dc.contributor.author Silveira, Natalia
dc.contributor.author Simi, Maria
dc.contributor.author Simionescu, Radu
dc.contributor.author Simkó, Katalin
dc.contributor.author Šimková, Mária
dc.contributor.author Simov, Kiril
dc.contributor.author Smith, Aaron
dc.contributor.author Suhr, Alane
dc.contributor.author Sulubacak, Umut
dc.contributor.author Szántó, Zsolt
dc.contributor.author Taji, Dima
dc.contributor.author Tanaka, Takaaki
dc.contributor.author Tsarfaty, Reut
dc.contributor.author Tyers, Francis
dc.contributor.author Uematsu, Sumire
dc.contributor.author Uria, Larraitz
dc.contributor.author van Noord, Gertjan
dc.contributor.author Varga, Viktor
dc.contributor.author Vincze, Veronika
dc.contributor.author Washington, Jonathan North
dc.contributor.author Žabokrtský, Zdeněk
dc.contributor.author Zeldes, Amir
dc.contributor.author Zeman, Daniel
dc.contributor.author Zhu, Hanzhi
dc.date.accessioned 2017-03-14T13:16:19Z
dc.date.available 2017-03-14T13:16:19Z
dc.date.issued 2017-03-13
dc.identifier.uri http://hdl.handle.net/11234/1-1983
dc.description Universal Dependencies is a project that seeks to develop cross-linguistically consistent treebank annotation for many languages, with the goal of facilitating multilingual parser development, cross-lingual learning, and parsing research from a language typology perspective. The annotation scheme is based on (universal) Stanford dependencies (de Marneffe et al., 2006, 2008, 2014), Google universal part-of-speech tags (Petrov et al., 2012), and the Interset interlingua for morphosyntactic tagsets (Zeman, 2008). This release is special in that the treebanks will be used as training/development data in the CoNLL 2017 shared task (http://universaldependencies.org/conll17/). Test data are not released, except for the few treebanks that do not take part in the shared task. 64 treebanks will be in the shared task, and they correspond to the following 45 languages: Ancient Greek, Arabic, Basque, Bulgarian, Catalan, Chinese, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, Galician, German, Gothic, Greek, Hebrew, Hindi, Hungarian, Indonesian, Irish, Italian, Japanese, Kazakh, Korean, Latin, Latvian, Norwegian, Old Church Slavonic, Persian, Polish, Portuguese, Romanian, Russian, Slovak, Slovenian, Spanish, Swedish, Turkish, Ukrainian, Urdu, Uyghur and Vietnamese. This release fixes a bug in http://hdl.handle.net/11234/1-1976. Changed files: ud-tools-v2.0.tgz (conllu_to_text.pl, conllu_to_conllx.pl; added text_without_spaces.pl), ud-treebanks-conll2017.tgz (fi_ftb-ud-train.txt, he-ud-train.txt, it-ud-train.txt, pt_br-ud-train.txt, es-ud-train.txt) and ud-treebanks-v2.0.tgz (fi_ftb-ud-train.txt, he-ud-train.txt, it-ud-train.txt, pt_br-ud-train.txt, es-ud-train.txt, ar_nyuad-ud-dev.txt, ar_nyuad-ud-test.txt, ar_nyuad-ud-train.txt, cop-ud-dev.txt, cop-ud-test.txt, cop-ud-train.txt, sa-ud-dev.txt, sa-ud-test.txt, sa-ud-train.txt).
dc.language.iso grc
dc.language.iso ara
dc.language.iso eus
dc.language.iso bul
dc.language.iso hrv
dc.language.iso ces
dc.language.iso dan
dc.language.iso nld
dc.language.iso eng
dc.language.iso est
dc.language.iso fin
dc.language.iso fra
dc.language.iso deu
dc.language.iso got
dc.language.iso ell
dc.language.iso heb
dc.language.iso hin
dc.language.iso hun
dc.language.iso ind
dc.language.iso gle
dc.language.iso ita
dc.language.iso jpn
dc.language.iso lat
dc.language.iso nor
dc.language.iso chu
dc.language.iso fas
dc.language.iso pol
dc.language.iso por
dc.language.iso ron
dc.language.iso slv
dc.language.iso spa
dc.language.iso swe
dc.language.iso tam
dc.language.iso cat
dc.language.iso zho
dc.language.iso glg
dc.language.iso kaz
dc.language.iso lav
dc.language.iso rus
dc.language.iso tur
dc.language.iso cop
dc.language.iso san
dc.language.iso slk
dc.language.iso ukr
dc.language.iso uig
dc.language.iso vie
dc.language.iso bel
dc.language.iso kor
dc.language.iso lit
dc.language.iso urd
dc.publisher Universal Dependencies Consortium
dc.relation.replaces http://hdl.handle.net/11234/1-1827
dc.relation.isreplacedby http://hdl.handle.net/11234/1-2515
dc.rights Licence Universal Dependencies v2.0
dc.rights.uri https://lindat.mff.cuni.cz/repository/xmlui/page/licence-UD-2.0
dc.source.uri http://universaldependencies.org/
dc.subject treebank
dc.subject dependency
dc.subject syntax
dc.subject morphology
dc.subject harmonized annotation
dc.subject interset
dc.subject universal tagset
dc.subject stanford dependencies
dc.title Universal Dependencies 2.0
dc.type corpus
metashare.ResourceInfo#ContentInfo.mediaType text
dc.rights.label PUB
has.files yes
branding LINDAT / CLARIN
demo.uri http://bionlp-www.utu.fi/dep_search
demo.uri https://lindat.mff.cuni.cz/services/pmltq/#!/treebanks
contact.person Joakim Nivre joakim.nivre@lingfil.uu.se Uppsala University
contact.person Daniel Zeman zeman@ufal.mff.cuni.cz Charles University in Prague, UFAL
sponsor Grantová agentura České republiky 15-10472S Morphologically and Syntactically Annotated Corpora of Many Languages nationalFunds
size.info 11814230 tokens
size.info 12102983 words
size.info 630518 sentences
files.size 418607328
files.count 4
featuredService.pmltq Ancient Greek|http://lindat.mff.cuni.cz/services/pmltq/ud20_grc/
featuredService.pmltq Ancient Greek - PROIEL|http://lindat.mff.cuni.cz/services/pmltq/ud20_grc_proiel/
featuredService.pmltq Arabic|http://lindat.mff.cuni.cz/services/pmltq/ud20_ar/
featuredService.pmltq Basque|http://lindat.mff.cuni.cz/services/pmltq/ud20_eu/
featuredService.pmltq Belarusian|http://lindat.mff.cuni.cz/services/pmltq/ud20_be/
featuredService.pmltq Bulgarian|http://lindat.mff.cuni.cz/services/pmltq/ud20_bg/
featuredService.pmltq Catalan|http://lindat.mff.cuni.cz/services/pmltq/ud20_ca/
featuredService.pmltq Chinese|http://lindat.mff.cuni.cz/services/pmltq/ud20_zh/
featuredService.pmltq Coptic|http://lindat.mff.cuni.cz/services/pmltq/ud20_cop/
featuredService.pmltq Croatian|http://lindat.mff.cuni.cz/services/pmltq/ud20_hr/
featuredService.pmltq Czech|http://lindat.mff.cuni.cz/services/pmltq/ud20_cs/
featuredService.pmltq Czech - CAC|http://lindat.mff.cuni.cz/services/pmltq/ud20_cs_cac/
featuredService.pmltq Czech - CLTT (legal)|http://lindat.mff.cuni.cz/services/pmltq/ud20_cs_cltt/
featuredService.pmltq Danish|http://lindat.mff.cuni.cz/services/pmltq/ud20_da/
featuredService.pmltq Dutch|http://lindat.mff.cuni.cz/services/pmltq/ud20_nl/
featuredService.pmltq Dutch - LassySmall|http://lindat.mff.cuni.cz/services/pmltq/ud20_nl_lassy/
featuredService.pmltq English|http://lindat.mff.cuni.cz/services/pmltq/ud20_en/
featuredService.pmltq English - LinES|http://lindat.mff.cuni.cz/services/pmltq/ud20_en_lines/
featuredService.pmltq English - ParTUT|http://lindat.mff.cuni.cz/services/pmltq/ud20_en_partut/
featuredService.pmltq Estonian|http://lindat.mff.cuni.cz/services/pmltq/ud20_et/
featuredService.pmltq Finnish|http://lindat.mff.cuni.cz/services/pmltq/ud20_fi/
featuredService.pmltq Finnish - FTB|http://lindat.mff.cuni.cz/services/pmltq/ud20_fi_ftb/
featuredService.pmltq French|http://lindat.mff.cuni.cz/services/pmltq/ud20_fr/
featuredService.pmltq French - ParTUT|http://lindat.mff.cuni.cz/services/pmltq/ud20_fr_partut/
featuredService.pmltq French - Sequoia|http://lindat.mff.cuni.cz/services/pmltq/ud20_fr_sequoia/
featuredService.pmltq Galician|http://lindat.mff.cuni.cz/services/pmltq/ud20_gl/
featuredService.pmltq Galician - TreeGal|http://lindat.mff.cuni.cz/services/pmltq/ud20_gl_treegal/
featuredService.pmltq German|http://lindat.mff.cuni.cz/services/pmltq/ud20_de/
featuredService.pmltq Gothic|http://lindat.mff.cuni.cz/services/pmltq/ud20_got/
featuredService.pmltq Greek|http://lindat.mff.cuni.cz/services/pmltq/ud20_el/
featuredService.pmltq Hebrew|http://lindat.mff.cuni.cz/services/pmltq/ud20_he/
featuredService.pmltq Hindi|http://lindat.mff.cuni.cz/services/pmltq/ud20_hi/
featuredService.pmltq Hungarian|http://lindat.mff.cuni.cz/services/pmltq/ud20_hu/
featuredService.pmltq Indonesian|http://lindat.mff.cuni.cz/services/pmltq/ud20_id/
featuredService.pmltq Irish|http://lindat.mff.cuni.cz/services/pmltq/ud20_ga/
featuredService.pmltq Italian|http://lindat.mff.cuni.cz/services/pmltq/ud20_it/
featuredService.pmltq Italian - ParTUT|http://lindat.mff.cuni.cz/services/pmltq/ud20_it_partut/
featuredService.pmltq Japanese|http://lindat.mff.cuni.cz/services/pmltq/ud20_ja/
featuredService.pmltq Kazakh|http://lindat.mff.cuni.cz/services/pmltq/ud20_kk/
featuredService.pmltq Korean|http://lindat.mff.cuni.cz/services/pmltq/ud20_ko/
featuredService.pmltq Latin|http://lindat.mff.cuni.cz/services/pmltq/ud20_la/
featuredService.pmltq Latin - ITTB|http://lindat.mff.cuni.cz/services/pmltq/ud20_la_ittb/
featuredService.pmltq Latin - PROIEL|http://lindat.mff.cuni.cz/services/pmltq/ud20_la_proiel/
featuredService.pmltq Latvian|http://lindat.mff.cuni.cz/services/pmltq/ud20_lv/
featuredService.pmltq Lithuanian|http://lindat.mff.cuni.cz/services/pmltq/ud20_lt/
featuredService.pmltq Norwegian - Bokmaal|http://lindat.mff.cuni.cz/services/pmltq/ud20_no_bokmaal/
featuredService.pmltq Norwegian - Nynorsk|http://lindat.mff.cuni.cz/services/pmltq/ud20_no_nynorsk/
featuredService.pmltq Old Church Slavonic|http://lindat.mff.cuni.cz/services/pmltq/ud20_cu/
featuredService.pmltq Persian|http://lindat.mff.cuni.cz/services/pmltq/ud20_fa/
featuredService.pmltq Polish|http://lindat.mff.cuni.cz/services/pmltq/ud20_pl/
featuredService.pmltq Portuguese|http://lindat.mff.cuni.cz/services/pmltq/ud20_pt/
featuredService.pmltq Portuguese - BR|http://lindat.mff.cuni.cz/services/pmltq/ud20_pt_br/
featuredService.pmltq Romanian|http://lindat.mff.cuni.cz/services/pmltq/ud20_ro/
featuredService.pmltq Russian|http://lindat.mff.cuni.cz/services/pmltq/ud20_ru/
featuredService.pmltq Russian - SynTagRus|http://lindat.mff.cuni.cz/services/pmltq/ud20_ru_syntagrus/
featuredService.pmltq Sanskrit|http://lindat.mff.cuni.cz/services/pmltq/ud20_sa/
featuredService.pmltq Slovak|http://lindat.mff.cuni.cz/services/pmltq/ud20_sk/
featuredService.pmltq Slovenian|http://lindat.mff.cuni.cz/services/pmltq/ud20_sl/
featuredService.pmltq Slovenian - SST|http://lindat.mff.cuni.cz/services/pmltq/ud20_sl_sst/
featuredService.pmltq Spanish|http://lindat.mff.cuni.cz/services/pmltq/ud20_es/
featuredService.pmltq Spanish - AnCora|http://lindat.mff.cuni.cz/services/pmltq/ud20_es_ancora/
featuredService.pmltq Swedish|http://lindat.mff.cuni.cz/services/pmltq/ud20_sv/
featuredService.pmltq Swedish - LinES|http://lindat.mff.cuni.cz/services/pmltq/ud20_sv_lines/
featuredService.pmltq Tamil|http://lindat.mff.cuni.cz/services/pmltq/ud20_ta/
featuredService.pmltq Turkish|http://lindat.mff.cuni.cz/services/pmltq/ud20_tr/
featuredService.pmltq Ukrainian|http://lindat.mff.cuni.cz/services/pmltq/ud20_uk/
featuredService.pmltq Urdu|http://lindat.mff.cuni.cz/services/pmltq/ud20_ur/
featuredService.pmltq Uyghur|http://lindat.mff.cuni.cz/services/pmltq/ud20_ug/
featuredService.pmltq Vietnamese|http://lindat.mff.cuni.cz/services/pmltq/ud20_vi/


 Files in this item

 Download all files in item (399.22 MB)
This item is
Publicly Available
and licensed under:
Licence Universal Dependencies v2.0
GNU General Public License, version 3.0 Distributed under Creative Commons
Icon
Name
ud-treebanks-v2.0.tgz
Size
180.67 MB
Format
application/x-gzip
Description
Treebank data
MD5
0fc5576ebade87a0733cc323d529d784
 Download file  Preview
 File Preview  
  • ud-treebanks-v2.0
    • UD_French
      • stats.xml8 kB
      • fr-ud-dev.conllu2 MB
      • fr-ud-dev.txt184 kB
      • README.txt8 kB
      • LICENSE.txt17 kB
      • fr-ud-train.txt1 MB
      • fr-ud-train.conllu21 MB
    • UD_Portuguese-BR
      • pt_br-ud-dev.conllu1 MB
      • stats.xml3 kB
      • README.txt8 kB
      • pt_br-ud-train.conllu11 MB
      • LICENSE.txt17 kB
      • pt_br-ud-dev.txt156 kB
      • pt_br-ud-train.txt1 MB
    • UD_Latin
      • stats.xml7 kB
      • la-ud-train.conllu645 kB
      • README.txt2 kB
      • la-ud-train.txt47 kB
      • la-ud-dev.txt57 kB
      • LICENSE.txt279 B
      • la-ud-dev.conllu813 kB
    • UD_Latvian
      • stats.xml12 kB
      • README.txt2 kB
      • lv-ud-dev.txt60 kB
      • lv-ud-train.txt215 kB
      • LICENSE.txt20 kB
      • lv-ud-train.conllu2 MB
      • lv-ud-dev.conllu795 kB
    • UD_Chinese
      • stats.xml5 kB
      • README.txt784 B
      • zh-ud-train.conllu5 MB
      • zh-ud-train.txt411 kB
      • zh-ud-dev.conllu673 kB
      • LICENSE.txt282 B
      • zh-ud-dev.txt53 kB
    • UD_Catalan
      • stats.xml10 kB
      • ca-ud-dev.txt289 kB
      • ca-ud-dev.conllu3 MB
      • ca-ud-train.conllu26 MB
      • README.txt523 B
      • ca-ud-train.txt2 MB
      • LICENSE.txt68 B
    • UD_Estonian
      • stats.xml12 kB
      • et-ud-dev.txt69 kB
      • README.md2 kB
      • et-ud-train.conllu1 MB
      • LICENSE.txt279 B
      • et-ud-dev.conllu797 kB
      • et-ud-train.txt131 kB
    • UD_Hebrew
      • stats.xml12 kB
      • he-ud-train.conllu9 MB
      • README.txt3 kB
      • he-ud-train.txt833 kB
      • he-ud-dev.txt71 kB
      • LICENSE.txt249 B
      • he-ud-dev.conllu838 kB
    • UD_Finnish-FTB
      • fi_ftb-ud-train.conllu9 MB
      • stats.xml16 kB
      • LICENSE.txt1 kB
      • README.txt2 kB
      • fi_ftb-ud-dev.conllu1 MB
      • fi_ftb-ud-train.txt904 kB
      • fi_ftb-ud-dev.txt111 kB
      • COPYING34 kB
      • COPYING.LESSER7 kB
    • UD_Swedish-LinES
      • stats.xml3 kB
      • sv_lines-ud-dev.txt91 kB
      • sv_lines-ud-train.txt265 kB
      • README.txt4 kB
      • sv_lines-ud-train.conllu2 MB
      • sv_lines-ud-dev.conllu718 kB
      • LICENSE.txt18 kB
    • UD_German
      • stats.xml10 kB
      • de-ud-dev.txt71 kB
      • README.txt9 kB
      • de-ud-dev.conllu792 kB
      • LICENSE.txt17 kB
      • de-ud-train.conllu16 MB
      • de-ud-train.txt1 MB
    • UD_Urdu
      • stats.xml10 kB
      • ur-ud-dev.txt115 kB
      • README.txt2 kB
      • ur-ud-train.conllu11 MB
      • LICENSE.txt247 B
      • ur-ud-train.txt853 kB
      • ur-ud-dev.conllu1 MB
    • UD_Basque
      • stats.xml14 kB
      • eu-ud-dev.conllu1 MB
      • README.txt1 kB
      • eu-ud-train.conllu4 MB
      • eu-ud-dev.txt151 kB
      • LICENSE.txt171 B
      • eu-ud-train.txt459 kB
    • UD_Galician
      • stats.xml3 kB
      • README.txt1 kB
      • LICENSE.txt173 B
      • gl-ud-dev.txt159 kB
      • gl-ud-train.txt426 kB
      • gl-ud-train.conllu4 MB
      • gl-ud-dev.conllu1 MB
    • UD_Bulgarian
      • stats.xml12 kB
      • BTB-biblio.bib1 kB
      • bg-ud-dev.conllu1 MB
      • bg-ud-train.conllu10 MB
      • README.txt5 kB
      • bg-ud-train.txt1 MB
      • LICENSE.txt327 B
      • bg-ud-dev.txt155 kB
    • UD_Slovak
      • stats.xml12 kB
      • sk-ud-train.conllu6 MB
      • README.txt2 kB
      • sk-ud-dev.txt77 kB
      • sk-ud-dev.conllu1 MB
      • LICENSE.txt202 B
      • sk-ud-train.txt447 kB
    • UD_Vietnamese
      • stats.xml3 kB
      • README.txt598 B
      • vi-ud-dev.txt69 kB
      • LICENSE.txt19 kB
      • vi-ud-train.txt127 kB
      • vi-ud-dev.conllu522 kB
      • vi-ud-train.conllu939 kB
    • UD_Belarusian
      • be-ud-train.txt42 kB
      • be-ud-dev.conllu132 kB
      • be-ud-test.conllu124 kB
      • be-ud-train.conllu350 kB
      • be-ud-dev.txt15 kB
      • stats.xml11 kB
      • be-ud-test.txt15 kB
      • LICENSE.txt1021 B
      • README.txt4 kB
    • UD_Coptic
      • cop-ud-test.txt7 kB
      • cop-ud-train.txt51 kB
      • cop-ud-dev.txt6 kB
      • cop-ud-test.conllu61 kB
      • cop-ud-train.conllu442 kB
      • stats.xml4 kB
      • LICENSE.txt538 B
      • README.txt3 kB
      • cop-ud-dev.conllu53 kB
    • UD_Indonesian
      • stats.xml3 kB
      • id-ud-train.txt575 kB
      • README.txt8 kB
      • id-ud-dev.conllu511 kB
      • id-ud-train.conllu3 MB
      • LICENSE.txt17 kB
      • id-ud-dev.txt74 kB
    • UD_Russian-SynTagRus
      • stats.xml11 kB
      • ru_syntagrus-ud-dev.conllu10 MB
      • ru_syntagrus-ud-train.txt9 MB
      • README.txt3 kB
      • LICENSE.txt188 B
      • ru_syntagrus-ud-train.conllu78 MB
      • ru_syntagrus-ud-dev.txt1 MB
    • UD_Galician-TreeGal
      • stats.xml9 kB
      • README.md1 kB
      • gl_treegal-ud-dev.txt52 kB
      • gl_treegal-ud-train.conllu324 kB
      • LICENSE.txt14 kB
      • gl_treegal-ud-train.txt24 kB
      • gl_treegal-ud-dev.conllu705 kB
    • UD_Polish
      • stats.xml11 kB
      • README.txt2 kB
      • pl-ud-dev.conllu940 kB
      • LICENSE.txt68 B
      • pl-ud-train.txt383 kB
      • pl-ud-train.conllu5 MB
      • pl-ud-dev.txt62 kB
    • UD_Arabic
      • stats.xml10 kB
      • ar-ud-dev.txt241 kB
      • README.md3 kB
      • ar-ud-train.txt1 MB
      • ar-ud-train.conllu36 MB
      • LICENSE.txt19 kB
      • ar-ud-dev.conllu4 MB
    • UD_Ancient_Greek-PROIEL
      • stats.xml13 kB
      • grc_proiel-ud-train.conllu19 MB
      • grc_proiel-ud-dev.conllu1 MB
      • README.md1 kB
      • grc_proiel-ud-train.txt2 MB
      • LICENSE.txt279 B
      • grc_proiel-ud-dev.txt159 kB
    • UD_Gothic
      • stats.xml9 kB
      • got-ud-dev.txt62 kB
      • README.md1 kB
      • got-ud-train.conllu3 MB
      • LICENSE.txt279 B
      • got-ud-train.txt221 kB
      • got-ud-dev.conllu932 kB
    • UD_Romanian
      • stats.xml12 kB
      • README.txt3 kB
      • ro-ud-dev.txt98 kB
      • ro-ud-dev.conllu1 MB
      • ro-ud-train.conllu13 MB
      • LICENSE.txt66 B
      • ro-ud-train.txt1 MB
    • UD_Swedish
      • stats.xml9 kB
      • sv-ud-dev.conllu764 kB
      • sv-ud-train.txt402 kB
      • README.txt6 kB
      • sv-ud-train.conllu5 MB
      • LICENSE.txt20 kB
      • sv-ud-dev.txt58 kB
    • UD_Korean
      • ko-ud-dev.txt103 kB
      • stats.xml2 kB
      • ko-ud-train.txt485 kB
      • README.txt8 kB
      • ko-ud-dev.conllu561 kB
      • LICENSE.txt15 kB
      • ko-ud-train.conllu2 MB
    • UD_English
      • en-ud-dev.txt123 kB
      • stats.xml8 kB
      • en-ud-train.txt985 kB
      • README.txt5 kB
      • en-ud-dev.conllu1 MB
      • en-ud-train.conllu11 MB
      • LICENSE.txt19 kB
    • UD_Uyghur
      • stats.xml2 kB
      • ug-ud-dev.txt20 kB
      • README.txt651 B
      • LICENSE.txt202 B
      • ug-ud-dev.conllu110 kB
    • UD_Russian
      • stats.xml12 kB
      • README.txt578 B
      • ru-ud-dev.conllu1 MB
      • ru-ud-train.txt796 kB
      • ru-ud-train.conllu6 MB
      • LICENSE.txt282 B
      • ru-ud-dev.txt124 kB
    • UD_Persian
      • stats.xml9 kB
      • README.md4 kB
      • fa-ud-train.txt995 kB
      • fa-ud-train.conllu7 MB
      • fa-ud-dev.conllu989 kB
      • LICENSE.txt110 B
      • fa-ud-dev.txt133 kB
    • UD_Dutch
      • stats.xml11 kB
      • README.txt4 kB
      • nl-ud-train.txt1018 kB
      • nl-ud-train.conllu14 MB
      • LICENSE.txt19 kB
      • nl-ud-dev.txt62 kB
      • nl-ud-dev.conllu900 kB
    • UD_Greek
      • stats.xml12 kB
      • el-ud-train.txt440 kB
      • el-ud-dev.conllu916 kB
      • README.md2 kB
      • el-ud-dev.txt107 kB
      • LICENSE.txt398 B
      • el-ud-train.conllu3 MB
    • UD_Italian-ParTUT
      • stats.xml8 kB
      • it_partut-ud-dev.conllu873 kB
      • README.md3 kB
      • it_partut-ud-train.conllu1 MB
      • it_partut-ud-train.txt151 kB
      • LICENSE.txt18 kB
      • it_partut-ud-dev.txt73 kB
    • UD_Czech
      • cs-ud-dev.conllu16 MB
      • stats.xml19 kB
      • README.txt4 kB
      • cs-ud-train.txt7 MB
      • cs-ud-train.conllu118 MB
      • LICENSE.txt19 kB
      • cs-ud-dev.txt989 kB
    • UD_Czech-CLTT
      • stats.xml12 kB
      • cs_cltt-ud-dev.txt72 kB
      • README.txt1 kB
      • LICENSE.txt265 B
      • cs_cltt-ud-train.conllu1 MB
      • cs_cltt-ud-dev.conllu990 kB
      • cs_cltt-ud-train.txt113 kB
    • UD_Latin-PROIEL
      • stats.xml9 kB
      • README.md1 kB
      • la_proiel-ud-dev.conllu1 MB
      • LICENSE.txt279 B
      • la_proiel-ud-train.conllu13 MB
      • la_proiel-ud-dev.txt77 kB
      • la_proiel-ud-train.txt915 kB
    • UD_French-Sequoia
      • fr_sequoia-ud-train.txt267 kB
      • stats.xml8 kB
      • README.md1 kB
      • fr_sequoia-ud-train.conllu2 MB
      • LICENSE.txt4 kB
      • fr_sequoia-ud-dev.txt52 kB
      • fr_sequoia-ud-dev.conllu608 kB
    • UD_Dutch-LassySmall
      • nl_lassysmall-ud-dev.conllu566 kB
      • stats.xml7 kB
      • README.txt1 kB
      • LICENSE.txt392 B
      • nl_lassysmall-ud-train.conllu4 MB
      • nl_lassysmall-ud-train.txt453 kB
      • nl_lassysmall-ud-dev.txt58 kB
    • UD_Lithuanian
      • lt-ud-dev.conllu89 kB
      • lt-ud-dev.txt6 kB
      • lt-ud-test.conllu90 kB
      • lt-ud-test.txt6 kB
      • lt-ud-train.txt20 kB
      • stats.xml10 kB
      • LICENSE.txt202 B
      • README.txt1 kB
      • lt-ud-train.conllu259 kB
    • UD_Turkish
      • stats.xml13 kB
      • tr-ud-train.txt246 kB
      • tr-ud-train.conllu2 MB
      • tr-ud-dev.conllu787 kB
      • README.txt556 B
      • LICENSE.txt20 kB
      • tr-ud-dev.txt63 kB
    • UD_Ukrainian
      • stats.xml15 kB
      • uk-ud-train.txt4 kB
      • README.txt409 B
      • uk-ud-dev.conllu1 MB
      • uk-ud-train.conllu37 kB
      • LICENSE.txt172 B
      • uk-ud-dev.txt120 kB
    • UD_French-ParTUT
      • stats.xml8 kB
      • fr_partut-ud-train.txt33 kB
      • README.md3 kB
      • fr_partut-ud-dev.conllu713 kB
      • LICENSE.txt18 kB
      • fr_partut-ud-train.conllu398 kB
      • fr_partut-ud-dev.txt61 kB
    • UD_Croatian
      • stats.xml11 kB
      • hr-ud-train.conllu11 MB
      • README.md3 kB
      • hr-ud-dev.txt86 kB
      • LICENSE.txt233 B
      • hr-ud-dev.conllu1017 kB
      • hr-ud-train.txt995 kB
    • UD_Norwegian-Bokmaal
      • stats.xml9 kB
      • no_bokmaal-ud-train.conllu14 MB
      • no_bokmaal-ud-dev.txt195 kB
      • README.md6 kB
      • no_bokmaal-ud-dev.conllu2 MB
      • LICENSE.txt68 B
      • no_bokmaal-ud-train.txt1 MB
    • UD_Slovenian-SST
      • stats.xml11 kB
      • sl_sst-ud-dev.txt53 kB
      • sl_sst-ud-train.conllu828 kB
      • README.txt3 kB
      • LICENSE.txt441 B
      • sl_sst-ud-train.txt49 kB
      • sl_sst-ud-dev.conllu873 kB
    • UD_Tamil
      • ta-ud-test.conllu336 kB
      • ta-ud-dev.txt24 kB
      • ta-ud-train.conllu1 MB
      • ta-ud-train.txt123 kB
      • stats.xml16 kB
      • LICENSE.txt280 B
      • README.txt2 kB
      • ta-ud-test.txt37 kB
      • ta-ud-dev.conllu215 kB
    • UD_Danish
      • stats.xml9 kB
      • README.txt5 kB
      • da-ud-train.conllu4 MB
      • da-ud-dev.txt56 kB
      • LICENSE.txt19 kB
      • da-ud-train.txt434 kB
      • da-ud-dev.conllu629 kB
    • UD_Finnish
      • stats.xml18 kB
      • fi-ud-dev.conllu1 MB
      • fi-ud-train.txt1 MB
      • README.txt1 kB
      • fi-ud-train.conllu11 MB
      • LICENSE.txt24 kB
      • fi-ud-dev.txt136 kB
    • UD_Spanish-AnCora
      • es_ancora-ud-dev.conllu3 MB
      • stats.xml10 kB
      • es_ancora-ud-dev.txt275 kB
      • es_ancora-ud-train.txt2 MB
      • README.txt507 B
      • LICENSE.txt68 B
      • es_ancora-ud-train.conllu28 MB
    • UD_Latin-ITTB
      • stats.xml11 kB
      • la_ittb-ud-dev.conllu949 kB
      • README.txt2 kB
      • LICENSE.txt19 kB
      • la_ittb-ud-train.txt1 MB
      • la_ittb-ud-train.conllu23 MB
      • la_ittb-ud-dev.txt59 kB
    • UD_Slovenian
      • stats.xml11 kB
      • sl-ud-train.conllu9 MB
      • README.txt3 kB
      • sl-ud-dev.conllu1 MB
      • sl-ud-train.txt621 kB
      • LICENSE.txt543 B
      • sl-ud-dev.txt80 kB
    • UD_Kazakh
      • stats.xml10 kB
      • README.txt1 kB
      • kk-ud-dev.txt5 kB
      • LICENSE.txt206 B
      • kk-ud-dev.conllu39 kB
    • UD_Hungarian
      • stats.xml16 kB
      • README.txt3 kB
      • hu-ud-dev.txt83 kB
      • LICENSE.txt30 B
      • hu-ud-train.conllu1 MB
      • hu-ud-train.txt137 kB
      • hu-ud-dev.conllu982 kB
    • UD_Portuguese
      • pt-ud-dev.txt53 kB
      • pt-ud-dev.conllu875 kB
      • stats.xml10 kB
      • pt-ud-train.txt1023 kB
      • README.txt6 kB
      • pt-ud-train.conllu16 MB
      • LICENSE.txt269 B
    • UD_Hindi
      • stats.xml13 kB
      • README.txt2 kB
      • hi-ud-train.txt3 MB
      • hi-ud-train.conllu39 MB
      • LICENSE.txt249 B
      • hi-ud-dev.conllu4 MB
      • hi-ud-dev.txt427 kB
    • UD_English-ParTUT
      • en_partut-ud-train.conllu1 MB
      • stats.xml8 kB
      • README.md3 kB
      • en_partut-ud-train.txt135 kB
      • en_partut-ud-dev.txt65 kB
      • en_partut-ud-dev.conllu656 kB
      • LICENSE.txt18 kB
    • UD_Spanish
      • stats.xml9 kB
      • README.txt9 kB
      • LICENSE.txt17 kB
      • es-ud-train.conllu22 MB
      • es-ud-dev.txt191 kB
      • es-ud-dev.conllu2 MB
      • es-ud-train.txt1 MB
    • UD_Japanese
      • stats.xml3 kB
      • ja-ud-dev.conllu612 kB
      • ja-ud-train.txt802 kB
      • README.txt8 kB
      • ja-ud-dev.txt57 kB
      • LICENSE.txt17 kB
      • ja-ud-train.conllu8 MB
    • UD_Irish
      • stats.xml11 kB
      • README.txt3 kB
      • ga-ud-dev.txt55 kB
      • ga-ud-dev.conllu681 kB
      • LICENSE.txt13 B
      • ga-ud-train.conllu201 kB
      • ga-ud-train.txt16 kB
    • UD_Italian
      • it-ud-dev.conllu738 kB
      • stats.xml9 kB
      • README.md8 kB
      • it-ud-train.txt1 MB
      • LICENSE.txt22 kB
      • it-ud-dev.txt59 kB
      • it-ud-train.conllu16 MB
    • UD_Ancient_Greek
      • stats.xml9 kB
      • grc-ud-dev.conllu1 MB
      • README.txt2 kB
      • grc-ud-train.conllu14 MB
      • grc-ud-dev.txt250 kB
      • grc-ud-train.txt1 MB
      • LICENSE.txt279 B
    • UD_Arabic-NYUAD
      • ar_nyuad-ud-test.conllu5 MB
      • stats.xml6 kB
      • README.txt2 kB
      • LICENSE.txt202 B
      • ar_nyuad-ud-dev.txt123 kB
      • ar_nyuad-ud-train.txt982 kB
      • ar_nyuad-ud-train.conllu44 MB
      • ar_nyuad-ud-test.txt123 kB
      • ar_nyuad-ud-dev.conllu5 MB
      • merge.jar91 kB
    • UD_English-LinES
      • stats.xml2 kB
      • en_lines-ud-train.txt239 kB
      • en_lines-ud-dev.txt82 kB
      • README.txt4 kB
      • en_lines-ud-dev.conllu695 kB
      • en_lines-ud-train.conllu1 MB
      • LICENSE.txt18 kB
    • UD_Sanskrit
      • sa-ud-test.txt4 kB
      • sa-ud-train.txt11 kB
      • sa-ud-dev.txt4 kB
      • stats.xml13 kB
      • sa-ud-test.conllu33 kB
      • LICENSE.txt202 B
      • README.txt941 B
      • sa-ud-train.conllu100 kB
      • sa-ud-dev.conllu39 kB
    • UD_Czech-CAC
      • cs_cac-ud-dev.conllu1 MB
      • stats.xml18 kB
      • cs_cac-ud-train.conllu47 MB
      • cs_cac-ud-dev.txt72 kB
      • README.txt4 kB
      • LICENSE.txt265 B
      • cs_cac-ud-train.txt2 MB
    • UD_Norwegian-Nynorsk
      • stats.xml8 kB
      • no_nynorsk-ud-train.conllu14 MB
      • no_nynorsk-ud-train.txt1 MB
      • README.md3 kB
      • LICENSE.txt68 B
      • no_nynorsk-ud-dev.txt166 kB
      • no_nynorsk-ud-dev.conllu1 MB
    • UD_Old_Church_Slavonic
      • stats.xml12 kB
      • cu-ud-train.conllu3 MB
      • README.md1 kB
      • cu-ud-dev.conllu1 MB
      • cu-ud-dev.txt97 kB
      • cu-ud-train.txt369 kB
      • LICENSE.txt279 B
Icon
Name
ud-treebanks-conll2017.tgz
Size
174.86 MB
Format
application/x-gzip
Description
Training and development data for the CoNLL 2017 shared task
MD5
f4869f28c376c360c740ef8caafdfffd
 Download file  Preview
 File Preview  
  • ud-treebanks-conll2017
    • UD_French
      • fr-ud-dev.conllu2 MB
      • stats.xml8 kB
      • fr-ud-dev.txt184 kB
      • README.txt8 kB
      • LICENSE.txt17 kB
      • fr-ud-train.txt1 MB
      • fr-ud-train.conllu21 MB
    • UD_Portuguese-BR
      • pt_br-ud-dev.conllu1 MB
      • stats.xml3 kB
      • README.txt8 kB
      • pt_br-ud-train.conllu11 MB
      • LICENSE.txt17 kB
      • pt_br-ud-dev.txt156 kB
      • pt_br-ud-train.txt1 MB
    • UD_Latin
      • stats.xml7 kB
      • la-ud-train.conllu1 MB
      • README.txt2 kB
      • la-ud-train.txt104 kB
      • LICENSE.txt279 B
    • UD_Latvian
      • stats.xml12 kB
      • README.txt2 kB
      • lv-ud-dev.txt60 kB
      • lv-ud-train.txt215 kB
      • LICENSE.txt20 kB
      • lv-ud-train.conllu2 MB
      • lv-ud-dev.conllu795 kB
    • UD_Chinese
      • stats.xml5 kB
      • README.txt784 B
      • zh-ud-train.conllu5 MB
      • zh-ud-train.txt411 kB
      • zh-ud-dev.conllu673 kB
      • LICENSE.txt282 B
      • zh-ud-dev.txt53 kB
    • UD_Catalan
      • ca-ud-dev.txt289 kB
      • ca-ud-dev.conllu3 MB
      • stats.xml10 kB
      • ca-ud-train.conllu26 MB
      • README.txt523 B
      • ca-ud-train.txt2 MB
      • LICENSE.txt68 B
    • UD_Estonian
      • stats.xml12 kB
      • et-ud-dev.txt69 kB
      • README.md2 kB
      • et-ud-train.conllu1 MB
      • et-ud-dev.conllu797 kB
      • LICENSE.txt279 B
      • et-ud-train.txt131 kB
    • UD_Finnish-FTB
      • fi_ftb-ud-train.conllu9 MB
      • stats.xml16 kB
      • LICENSE.txt1 kB
      • README.txt2 kB
      • fi_ftb-ud-dev.conllu1 MB
      • fi_ftb-ud-train.txt904 kB
      • fi_ftb-ud-dev.txt111 kB
      • COPYING34 kB
      • COPYING.LESSER7 kB
    • UD_Hebrew
      • stats.xml12 kB
      • he-ud-train.conllu9 MB
      • README.txt3 kB
      • he-ud-dev.txt71 kB
      • he-ud-train.txt833 kB
      • LICENSE.txt249 B
      • he-ud-dev.conllu838 kB
    • UD_Swedish-LinES
      • sv_lines-ud-dev.txt91 kB
      • stats.xml3 kB
      • sv_lines-ud-train.txt265 kB
      • README.txt4 kB
      • sv_lines-ud-train.conllu2 MB
      • sv_lines-ud-dev.conllu718 kB
      • LICENSE.txt18 kB
    • UD_German
      • stats.xml10 kB
      • de-ud-dev.txt71 kB
      • README.txt9 kB
      • de-ud-dev.conllu792 kB
      • LICENSE.txt17 kB
      • de-ud-train.conllu16 MB
      • de-ud-train.txt1 MB
    • UD_Urdu
      • stats.xml10 kB
      • README.txt2 kB
      • ur-ud-dev.txt115 kB
      • ur-ud-train.conllu11 MB
      • LICENSE.txt247 B
      • ur-ud-train.txt853 kB
      • ur-ud-dev.conllu1 MB
    • UD_Basque
      • stats.xml14 kB
      • eu-ud-dev.conllu1 MB
      • README.txt1 kB
      • eu-ud-train.conllu4 MB
      • LICENSE.txt171 B
      • eu-ud-dev.txt151 kB
      • eu-ud-train.txt459 kB
    • UD_Galician
      • stats.xml3 kB
      • README.txt1 kB
      • gl-ud-dev.txt159 kB
      • LICENSE.txt173 B
      • gl-ud-train.txt426 kB
      • gl-ud-train.conllu4 MB
      • gl-ud-dev.conllu1 MB
    • UD_Bulgarian
      • stats.xml12 kB
      • bg-ud-dev.conllu1 MB
      • BTB-biblio.bib1 kB
      • bg-ud-train.conllu10 MB
      • README.txt5 kB
      • bg-ud-train.txt1 MB
      • LICENSE.txt327 B
      • bg-ud-dev.txt155 kB
    • UD_Slovak
      • stats.xml12 kB
      • sk-ud-train.conllu6 MB
      • README.txt2 kB
      • sk-ud-dev.txt77 kB
      • LICENSE.txt202 B
      • sk-ud-dev.conllu1 MB
      • sk-ud-train.txt447 kB
    • UD_Vietnamese
      • stats.xml3 kB
      • README.txt598 B
      • vi-ud-dev.txt69 kB
      • LICENSE.txt19 kB
      • vi-ud-train.txt127 kB
      • vi-ud-dev.conllu522 kB
      • vi-ud-train.conllu939 kB
    • UD_Indonesian
      • stats.xml3 kB
      • id-ud-train.txt575 kB
      • README.txt8 kB
      • id-ud-dev.conllu511 kB
      • id-ud-train.conllu3 MB
      • LICENSE.txt17 kB
      • id-ud-dev.txt74 kB
    • UD_Russian-SynTagRus
      • stats.xml11 kB
      • ru_syntagrus-ud-dev.conllu10 MB
      • README.txt3 kB
      • ru_syntagrus-ud-train.txt9 MB
      • LICENSE.txt188 B
      • ru_syntagrus-ud-train.conllu78 MB
      • ru_syntagrus-ud-dev.txt1 MB
    • UD_Galician-TreeGal
      • stats.xml9 kB
      • README.md1 kB
      • gl_treegal-ud-train.conllu1 MB
      • LICENSE.txt14 kB
      • gl_treegal-ud-train.txt76 kB
    • UD_Polish
      • stats.xml11 kB
      • README.txt2 kB
      • pl-ud-train.txt383 kB
      • pl-ud-dev.conllu940 kB
      • LICENSE.txt68 B
      • pl-ud-train.conllu5 MB
      • pl-ud-dev.txt62 kB
    • UD_Arabic
      • stats.xml10 kB
      • ar-ud-dev.txt241 kB
      • README.md3 kB
      • ar-ud-train.txt1 MB
      • ar-ud-train.conllu36 MB
      • LICENSE.txt19 kB
      • ar-ud-dev.conllu4 MB
    • UD_Gothic
      • stats.xml9 kB
      • got-ud-dev.txt62 kB
      • README.md1 kB
      • got-ud-train.conllu3 MB
      • LICENSE.txt279 B
      • got-ud-train.txt221 kB
      • got-ud-dev.conllu932 kB
    • UD_Ancient_Greek-PROIEL
      • grc_proiel-ud-train.conllu19 MB
      • grc_proiel-ud-dev.conllu1 MB
      • stats.xml13 kB
      • README.md1 kB
      • grc_proiel-ud-train.txt2 MB
      • LICENSE.txt279 B
      • grc_proiel-ud-dev.txt159 kB
    • UD_Romanian
      • stats.xml12 kB
      • ro-ud-dev.txt98 kB
      • README.txt3 kB
      • ro-ud-dev.conllu1 MB
      • ro-ud-train.conllu13 MB
      • LICENSE.txt66 B
      • ro-ud-train.txt1 MB
    • UD_Swedish
      • sv-ud-dev.conllu764 kB
      • stats.xml9 kB
      • sv-ud-train.txt402 kB
      • README.txt6 kB
      • sv-ud-train.conllu5 MB
      • LICENSE.txt20 kB
      • sv-ud-dev.txt58 kB
    • UD_English
      • en-ud-dev.txt123 kB
      • stats.xml8 kB
      • en-ud-train.txt985 kB
      • README.txt5 kB
      • en-ud-dev.conllu1 MB
      • en-ud-train.conllu11 MB
      • LICENSE.txt19 kB
    • UD_Korean
      • stats.xml2 kB
      • ko-ud-dev.txt103 kB
      • ko-ud-train.txt485 kB
      • README.txt8 kB
      • ko-ud-dev.conllu561 kB
      • LICENSE.txt15 kB
      • ko-ud-train.conllu2 MB
    • UD_Uyghur
      • stats.xml2 kB
      • ug-ud-train.conllu110 kB
      • ug-ud-train.txt20 kB
      • README.txt651 B
      • LICENSE.txt202 B
    • UD_Russian
      • stats.xml12 kB
      • README.txt578 B
      • ru-ud-dev.conllu1 MB
      • ru-ud-train.txt796 kB
      • ru-ud-train.conllu6 MB
      • LICENSE.txt282 B
      • ru-ud-dev.txt124 kB
    • UD_Persian
      • stats.xml9 kB
      • README.md4 kB
      • fa-ud-train.txt995 kB
      • fa-ud-dev.conllu989 kB
      • LICENSE.txt110 B
      • fa-ud-train.conllu7 MB
      • fa-ud-dev.txt133 kB
    • UD_Greek
      • stats.xml12 kB
      • el-ud-dev.conllu916 kB
      • el-ud-train.txt440 kB
      • README.md2 kB
      • el-ud-dev.txt107 kB
      • LICENSE.txt398 B
      • el-ud-train.conllu3 MB
    • UD_Dutch
      • stats.xml11 kB
      • README.txt4 kB
      • nl-ud-train.txt1018 kB
      • LICENSE.txt19 kB
      • nl-ud-train.conllu14 MB
      • nl-ud-dev.txt62 kB
      • nl-ud-dev.conllu900 kB
    • UD_Italian-ParTUT
      • stats.xml8 kB
      • README.md3 kB
      • it_partut-ud-dev.conllu873 kB
      • it_partut-ud-train.conllu1 MB
      • it_partut-ud-train.txt151 kB
      • LICENSE.txt18 kB
      • it_partut-ud-dev.txt73 kB
    • UD_Czech
      • cs-ud-dev.conllu16 MB
      • stats.xml19 kB
      • README.txt4 kB
      • cs-ud-train.conllu118 MB
      • cs-ud-train.txt7 MB
      • LICENSE.txt19 kB
      • cs-ud-dev.txt989 kB
    • UD_Czech-CLTT
      • stats.xml12 kB
      • cs_cltt-ud-dev.txt72 kB
      • README.txt1 kB
      • LICENSE.txt265 B
      • cs_cltt-ud-train.conllu1 MB
      • cs_cltt-ud-dev.conllu990 kB
      • cs_cltt-ud-train.txt113 kB
    • UD_Latin-PROIEL
      • stats.xml9 kB
      • README.md1 kB
      • la_proiel-ud-dev.conllu1 MB
      • la_proiel-ud-train.conllu13 MB
      • LICENSE.txt279 B
      • la_proiel-ud-dev.txt77 kB
      • la_proiel-ud-train.txt915 kB
    • UD_French-Sequoia
      • stats.xml8 kB
      • fr_sequoia-ud-train.txt267 kB
      • README.md1 kB
      • fr_sequoia-ud-train.conllu2 MB
      • LICENSE.txt4 kB
      • fr_sequoia-ud-dev.txt52 kB
      • fr_sequoia-ud-dev.conllu608 kB
    • UD_Dutch-LassySmall
      • stats.xml7 kB
      • nl_lassysmall-ud-dev.conllu566 kB
      • README.txt1 kB
      • LICENSE.txt392 B
      • nl_lassysmall-ud-train.conllu4 MB
      • nl_lassysmall-ud-train.txt453 kB
      • nl_lassysmall-ud-dev.txt58 kB
    • UD_Turkish
      • stats.xml13 kB
      • tr-ud-train.txt246 kB
      • tr-ud-dev.conllu787 kB
      • tr-ud-train.conllu2 MB
      • README.txt556 B
      • LICENSE.txt20 kB
      • tr-ud-dev.txt63 kB
    • UD_Ukrainian
      • stats.xml15 kB
      • uk-ud-train.txt125 kB
      • README.txt409 B
      • uk-ud-train.conllu1 MB
      • LICENSE.txt172 B
    • UD_French-ParTUT
      • stats.xml8 kB
      • fr_partut-ud-train.txt95 kB
      • README.md3 kB
      • LICENSE.txt18 kB
      • fr_partut-ud-train.conllu1 MB
    • UD_Croatian
      • stats.xml11 kB
      • hr-ud-train.conllu11 MB
      • README.md3 kB
      • hr-ud-dev.txt86 kB
      • LICENSE.txt233 B
      • hr-ud-dev.conllu1017 kB
      • hr-ud-train.txt995 kB
    • UD_Norwegian-Bokmaal
      • stats.xml9 kB
      • no_bokmaal-ud-train.conllu14 MB
      • README.md6 kB
      • no_bokmaal-ud-dev.txt195 kB
      • no_bokmaal-ud-dev.conllu2 MB
      • LICENSE.txt68 B
      • no_bokmaal-ud-train.txt1 MB
    • UD_Slovenian-SST
      • stats.xml11 kB
      • sl_sst-ud-train.conllu1 MB
      • README.txt3 kB
      • LICENSE.txt441 B
      • sl_sst-ud-train.txt103 kB
    • UD_Danish
      • stats.xml9 kB
      • README.txt5 kB
      • da-ud-train.conllu4 MB
      • da-ud-dev.txt56 kB
      • LICENSE.txt19 kB
      • da-ud-train.txt434 kB
      • da-ud-dev.conllu629 kB
    • UD_Finnish
      • stats.xml18 kB
      • fi-ud-dev.conllu1 MB
      • fi-ud-train.txt1 MB
      • README.txt1 kB
      • fi-ud-train.conllu11 MB
      • LICENSE.txt24 kB
      • fi-ud-dev.txt136 kB
    • UD_Spanish-AnCora
      • es_ancora-ud-dev.conllu3 MB
      • stats.xml10 kB
      • es_ancora-ud-dev.txt275 kB
      • README.txt507 B
      • es_ancora-ud-train.txt2 MB
      • LICENSE.txt68 B
      • es_ancora-ud-train.conllu28 MB
    • UD_Latin-ITTB
      • stats.xml11 kB
      • la_ittb-ud-dev.conllu949 kB
      • README.txt2 kB
      • LICENSE.txt19 kB
      • la_ittb-ud-train.txt1 MB
      • la_ittb-ud-train.conllu23 MB
      • la_ittb-ud-dev.txt59 kB
    • UD_Slovenian
      • stats.xml11 kB
      • sl-ud-train.conllu9 MB
      • README.txt3 kB
      • sl-ud-dev.conllu1 MB
      • sl-ud-train.txt621 kB
      • LICENSE.txt543 B
      • sl-ud-dev.txt80 kB
    • UD_Hungarian
      • stats.xml16 kB
      • README.txt3 kB
      • hu-ud-dev.txt83 kB
      • LICENSE.txt30 B
      • hu-ud-train.conllu1 MB
      • hu-ud-train.txt137 kB
      • hu-ud-dev.conllu982 kB
    • UD_Kazakh
      • stats.xml10 kB
      • kk-ud-train.txt5 kB
      • README.txt1 kB
      • kk-ud-train.conllu39 kB
      • LICENSE.txt206 B
    • UD_Portuguese
      • pt-ud-dev.conllu875 kB
      • stats.xml10 kB
      • pt-ud-dev.txt53 kB
      • pt-ud-train.txt1023 kB
      • README.txt6 kB
      • pt-ud-train.conllu16 MB
      • LICENSE.txt269 B
    • UD_Hindi
      • stats.xml13 kB
      • README.txt2 kB
      • hi-ud-train.txt3 MB
      • hi-ud-train.conllu39 MB
      • LICENSE.txt249 B
      • hi-ud-dev.conllu4 MB
      • hi-ud-dev.txt427 kB
    • UD_English-ParTUT
      • stats.xml8 kB
      • en_partut-ud-train.conllu1 MB
      • README.md3 kB
      • en_partut-ud-train.txt135 kB
      • en_partut-ud-dev.txt65 kB
      • en_partut-ud-dev.conllu656 kB
      • LICENSE.txt18 kB
    • UD_Spanish
      • stats.xml9 kB
      • README.txt9 kB
      • LICENSE.txt17 kB
      • es-ud-train.conllu22 MB
      • es-ud-dev.txt191 kB
      • es-ud-dev.conllu2 MB
      • es-ud-train.txt1 MB
    • UD_Japanese
      • stats.xml3 kB
      • ja-ud-dev.conllu612 kB
      • ja-ud-train.txt802 kB
      • README.txt8 kB
      • ja-ud-dev.txt57 kB
      • LICENSE.txt17 kB
      • ja-ud-train.conllu8 MB
    • UD_Irish
      • stats.xml11 kB
      • README.txt3 kB
      • LICENSE.txt13 B
      • ga-ud-train.conllu882 kB
      • ga-ud-train.txt72 kB
    • UD_Italian
      • it-ud-dev.conllu738 kB
      • stats.xml9 kB
      • README.md8 kB
      • it-ud-train.txt1 MB
      • LICENSE.txt22 kB
      • it-ud-dev.txt59 kB
      • it-ud-train.conllu16 MB
    • UD_Ancient_Greek
      • stats.xml9 kB
      • grc-ud-dev.conllu1 MB
      • README.txt2 kB
      • grc-ud-dev.txt250 kB
      • grc-ud-train.conllu14 MB
      • grc-ud-train.txt1 MB
      • LICENSE.txt279 B
    • UD_English-LinES
      • stats.xml2 kB
      • en_lines-ud-dev.txt82 kB
      • en_lines-ud-train.txt239 kB
      • README.txt4 kB
      • en_lines-ud-dev.conllu695 kB
      • en_lines-ud-train.conllu1 MB
      • LICENSE.txt18 kB
    • UD_Czech-CAC
      • cs_cac-ud-dev.conllu1 MB
      • stats.xml18 kB
      • cs_cac-ud-dev.txt72 kB
      • cs_cac-ud-train.conllu47 MB
      • README.txt4 kB
      • LICENSE.txt265 B
      • cs_cac-ud-train.txt2 MB
    • UD_Norwegian-Nynorsk
      • stats.xml8 kB
      • no_nynorsk-ud-train.conllu14 MB
      • README.md3 kB
      • no_nynorsk-ud-train.txt1 MB
      • LICENSE.txt68 B
      • no_nynorsk-ud-dev.txt166 kB
      • no_nynorsk-ud-dev.conllu1 MB
    • UD_Old_Church_Slavonic
      • stats.xml12 kB
      • cu-ud-train.conllu3 MB
      • README.md1 kB
      • cu-ud-dev.txt97 kB
      • cu-ud-train.txt369 kB
      • cu-ud-dev.conllu1 MB
      • LICENSE.txt279 B
Icon
Name
ud-documentation-v2.0.tgz
Size
43.5 MB
Format
application/x-gzip
Description
Documentation
MD5
fbe08dd83675da3ac1e54a5ee67d1a69
 Download file  Preview
 File Preview  
    • ...0 B
  • ud-documentation-v2.0
    • markdown-source
      • _so-dep
        • name.md140 B
        • nsubj.md153 B
        • advcl.md163 B
        • vocative.md152 B
        • csubjpass.md169 B
        • mark.md142 B
        • xcomp.md161 B
        • remnant.md161 B
        • compound.md152 B
        • csubj.md153 B
        • reparandum.md169 B
        • mwe.md155 B
        • amod.md155 B
        • nmod.md152 B
        • nummod.md156 B
        • dobj.md149 B
        • iobj.md151 B
        • acl.md158 B
        • case.md148 B
        • punct.md149 B
        • advmod.md158 B
        • neg.md151 B
        • ccomp.md156 B
        • parataxis.md155 B
        • auxpass.md159 B
        • goeswith.md153 B
        • appos.md159 B
        • cc.md156 B
        • det.md144 B
        • discourse.md163 B
        • list.md140 B
        • cop.md140 B
        • aux_.md311 B
        • foreign.md155 B
        • root.md140 B
        • expl.md145 B
        • dislocated.md167 B
        • nsubjpass.md169 B
        • conj.md144 B
        • dep.md156 B
      • _fo-overview
        • feat-all.md667 B
        • syntax.md190 B
        • feat-index.md291 B
        • specific-syntax.md217 B
        • tokenization.md200 B
        • morphology.md214 B
        • introduction.md173 B
        • pos-all.md663 B
        • dep-index.md296 B
        • dep-all.md671 B
        • pos-index.md288 B
      • _mr-pos
        • CONJ.md158 B
        • PRON.md141 B
        • VERB.md138 B
        • ADV.md138 B
        • NUM.md139 B
        • ADJ.md141 B
        • SCONJ.md161 B
        • SYM.md138 B
        • DET.md142 B
        • NOUN.md138 B
        • PROPN.md147 B
        • INTJ.md146 B
        • AUX_.md314 B
        • PUNCT.md147 B
        • ADP.md142 B
        • X.md133 B
        • PART.md142 B
      • _ja-overview
        • feat-all.md667 B
        • syntax.md8 kB
        • feat-index.md291 B
        • specific-syntax.md6 kB
        • tokenization.md1 kB
        • morphology.md2 kB
        • introduction.md28 kB
        • pos-all.md663 B
        • dep-index.md296 B
        • pos-index.md288 B
        • dep-all.md671 B
      • index.md3 kB
      • new_language.md3 kB
      • _lt-pos
        • PRON.md141 B
        • CONJ.md158 B
        • ADV.md138 B
        • VERB.md138 B
        • NUM.md139 B
        • ADJ.md141 B
        • SCONJ.md161 B
        • SYM.md138 B
        • DET.md142 B
        • NOUN.md138 B
        • PROPN.md147 B
        • INTJ.md146 B
        • PUNCT.md147 B
        • ADP.md142 B
        • AUX_.md314 B
        • X.md133 B
        • PART.md142 B
      • _ja-dep
        • name.md147 B
        • advcl.md542 B
        • nsubj.md292 B
        • vocative.md238 B
        • csubjpass.md386 B
        • mark.md345 B
        • xcomp.md99 B
        • remnant.md192 B
        • compound.md303 B
        • csubj.md577 B
        • reparandum.md95 B
        • mwe.md553 B
        • amod.md407 B
        • nmod.md398 B
        • nummod.md197 B
        • dobj.md268 B
        • iobj.md275 B
        • acl.md817 B
        • case.md615 B
        • punct.md285 B
        • advmod.md299 B
        • neg.md331 B
        • ccomp.md457 B
        • parataxis.md791 B
        • auxpass.md462 B
        • goeswith.md113 B
        • appos.md241 B
        • cc.md241 B
        • det.md488 B
        • discourse.md285 B
        • list.md194 B
        • cop.md968 B
        • aux_.md599 B
        • foreign.md256 B
        • root.md77 B
        • expl.md84 B
        • dislocated.md813 B
        • nsubjpass.md285 B
        • conj.md284 B
        • dep.md85 B
      • _bxr-feat
        • VerbForm.md170 B
        • Person.md145 B
        • Number.md145 B
        • Aspect.md145 B
        • Poss.md145 B
        • Gender.md145 B
        • Definite.md164 B
        • Negative.md180 B
        • PronType.md158 B
        • Reflex.md148 B
        • NumType.md153 B
        • Case.md139 B
        • Degree.md159 B
        • Tense.md142 B
        • Voice.md142 B
        • Mood.md139 B
        • Animacy.md148 B
      • flags
        • svg
          • BI.svg960 B
          • HM.svg1 kB
          • IQ-KRD.svg672 B
          • US-TX.svg717 B
          • KZ.svg10 kB
          • FI.svg275 B
          • US-OR.svg126 kB
          • US-RI.svg22 kB
          • MK.svg386 B
          • SA-AL.svg32 kB
          • SR.svg394 B
          • PE.svg373 B
          • GF.svg903 B
          • SN.svg728 B
          • PL.svg243 B
          • TW.svg611 B
          • VA.svg163 kB
          • BR.svg4 kB
          • BG.svg288 B
          • RS.svg264 kB
          • AE.svg321 B
          • FR.svg249 B
          • ZA.svg627 B
          • BN.svg23 kB
          • LV.svg198 B
          • EE.svg298 B
          • RO.svg259 B
          • AW.svg495 B
          • AL.svg6 kB
          • CA.svg1 kB
          • PK.svg707 B
          • UM.svg875 B
          • NR.svg714 B
          • GA.svg256 B
          • ME.svg86 kB
          • IL.svg1 kB
          • LC.svg322 B
          • XK.svg19 kB
          • GS.svg114 kB
          • GH.svg338 B
          • CZ.svg271 B
          • US-WA.svg251 kB
          • US-MD.svg1 kB
          • ML.svg288 B
          • NU.svg1 kB
          • KH.svg12 kB
        • png
          • FO.png1 kB
          • KI.png16 kB
          • SG.png38 kB
          • CU.png8 kB
          • IM.png15 kB
          • BW.png994 B
          • PM.png172 kB
          • XK.png26 kB
          • MS.png31 kB
          • LU.png559 B
          • KW.png2 kB
          • RW.png20 kB
          • CF.png1 kB
          • BH.png4 kB
          • GL.png5 kB
          • MD.png73 kB
          • TD.png651 B
          • KH.png17 kB
          • IL.png7 kB
          • BV.png1 kB
          • RU-BU.png5 kB
          • AX.png1 kB
          • PL.png427 B
          • NP.png24 kB
          • VN.png7 kB
          • MR.png11 kB
          • LT.png562 B
          • TR.png16 kB
          • US-ID.png78 kB
          • ST.png23 kB
          • EA.png30 kB
          • BG.png559 B
          • US-NH.png78 kB
          • AI.png21 kB
          • NA.png13 kB
          • MC.png243 B
          • UA.png704 B
          • FM.png3 kB
          • TC.png29 kB
          • KG.png28 kB
          • SE.png1 kB
          • US-NV.png31 kB
          • US-VT.png60 kB
          • PK.png12 kB
          • AW.png7 kB
          • OM.png8 kB
          • US-TX.png8 kB
          • NO.png1 kB
          • GY.png2 kB
          • US-CA.png57 kB
          • MQ.png11 kB
          • LS.png11 kB
          • SS.png10 kB
          • RU.png517 B
          • CD.png4 kB
          • BF.png5 kB
          • PY.png10 kB
          • US-MI.png70 kB
          • US-CO.png29 kB
          • SD.png1 kB
          • CR.png565 B
          • BT.png84 kB
          • IQ-KRD.png52 kB
          • MP.png95 kB
          • LR.png4 kB
          • SR.png3 kB
          • CC.png11 kB
          • YT.png64 kB
          • BE.png279 B
          • AG.png10 kB
          • GI.png13 kB
          • MA.png4 kB
          • FK.png80 kB
          • US-AR.png14 kB
          • LC.png17 kB
          • TA.png116 kB
          • KE.png19 kB
          • SC.png3 kB
          • DO.png24 kB
          • US-OR.png52 kB
          • RE.png648 B
          • BS.png2 kB
          • YE.png518 B
          • AU.png20 kB
          • US-UT.png367 kB
          • HU.png677 B
          • GW.png4 kB
          • MO.png13 kB
          • UM.png12 kB
          • TO.png281 B
          • US-IA.png42 kB
          • US-PA.png104 kB
          • RS.png134 kB
          • US-WA.png89 kB
          • PW.png3 kB
          • BD.png2 kB
          • US-NE.png87 kB
          • AF.png37 kB
          • GH.png2 kB
          • FJ.png46 kB
          • LB.png12 kB
          • SB.png9 kB
          • CP.png648 B
          • BR.png23 kB
          • AT.png240 B
          • PH.png18 kB
          • HT.png11 kB
          • GB-ENG.png270 B
          • US-KY.png66 kB
          • NL.png518 B
          • MN.png42 kB
          • TN.png19 kB
          • KR.png17 kB
          • CA.png7 kB
          • US-ND.png86 kB
          • AE.png850 B
          • NZ.png16 kB
          • UZ.png1 kB
          • GG.png1 kB
          • FI.png930 B
          • US-IN.png13 kB
          • LA.png2 kB
          • SA.png14 kB
          • DM.png31 kB
          • JE.png25 kB
          • CO.png551 B
          • ZA.png3 kB
          • BQ.png518 B
          • US-MT.png43 kB
          • PG.png14 kB
          • AS.png45 kB
          • MORAVA.png95 kB
          • VI.png70 kB
          • GU.png17 kB
          • MM.png24 kB
          • TM.png71 kB
          • SO.png4 kB
          • US-NC.png23 kB
          • BB.png19 kB
          • US-VA.png223 kB
          • AD.png57 kB
          • US-ME.png62 kB
          • UY.png28 kB
          • GF.png7 kB
          • US-RI.png22 kB
          • CN.png4 kB
          • US-MS.png14 kB
          • PF.png17 kB
          • AR.png25 kB
          • WF.png1 kB
          • HR.png26 kB
          • GT.png43 kB
          • ML.png246 B
          • TL.png4 kB
          • KP.png17 kB
          • DZ.png12 kB
          • SN.png6 kB
          • IT.png1 kB
          • BA.png6 kB
          • PT.png28 kB
          • AC.png102 kB
          • US-MD.png7 kB
          • MZ.png22 kB
          • GE.png6 kB
          • TZ.png3 kB
          • US-IL.png95 kB
          • DK.png169 B
          • CM.png2 kB
          • IE.png838 B
          • BO.png768 B
          • AQ.png5 kB
          • PE.png352 B
          • GB-NIR.png15 kB
          • NI.png28 kB
          • VG.png65 kB
          • GS.png49 kB
          • MK.png14 kB
          • TK.png27 kB
          • SM.png89 kB
          • RO.png359 B
          • ZM.png58 kB
          • US-GA.png6 kB
          • IS.png3 kB
          • PS.png2 kB
          • WS.png30 kB
          • VU.png12 kB
          • MY.png50 kB
          • GD.png11 kB
          • EH.png3 kB
          • DJ.png6 kB
          • CL.png11 kB
          • ID.png408 B
          • BN.png50 kB
          • GB-WLS.png118 kB
          • GR.png867 B
          • TJ.png12 kB
          • KN.png5 kB
          • SL.png216 B
          • JP.png5 kB
          • CZ.png2 kB
          • US-WY.png51 kB
          • IR.png6 kB
          • PR.png12 kB
          • MX.png50 kB
          • US-SD.png66 kB
          • US-AL.png2 kB
          • SZ.png15 kB
          • EG.png26 kB
          • CK.png13 kB
          • IC.png25 kB
          • QA.png3 kB
          • BM.png62 kB
          • AO.png6 kB
          • NG.png1 kB
          • VE.png9 kB
          • GQ.png28 kB
          • UG.png15 kB
          • US-AZ.png24 kB
          • LK.png59 kB
          • EU.png11 kB
          • KM.png3 kB
          • SK.png15 kB
          • JO.png2 kB
          • CY.png6 kB
          • IQ.png6 kB
          • US-MA.png75 kB
          • US-DE.png51 kB
          • NU.png5 kB
          • MW.png11 kB
          • US-SC.png15 kB
          • GB.png4 kB
          • LY.png3 kB
          • TW.png6 kB
          • US-AK.png6 kB
          • SY.png3 kB
          • US-OK.png47 kB
          • US-WI.png59 kB
          • US-NM.png5 kB
          • BL.png648 B
          • US-MO.png212 kB
          • HN.png7 kB
          • US-KS.png626 kB
          • NF.png12 kB
          • GP.png30 kB
          • MH.png14 kB
          • FR.png648 B
          • ET.png12 kB
          • TH.png520 B
          • SJ.png1 kB
          • CX.png19 kB
          • BZ.png91 kB
          • MV.png3 kB
          • GA.png205 B
          • TV.png20 kB
          • KZ.png23 kB
          • SX.png28 kB
          • EE.png581 B
          • DG.png46 kB
          • CI.png239 B
          • AM.png679 B
          • US-MN.png41 kB
          • PA.png4 kB
          • HM.png20 kB
          • US-TN.png9 kB
          • US-CT.png61 kB
          • NE.png3 kB
          • VC.png1 kB
          • MG.png650 B
          • LI.png22 kB
          • TG.png3 kB
          • ES.png30 kB
          • SI.png12 kB
          • GB-SCT.png3 kB
          • JM.png1 kB
          • CW.png6 kB
          • US-WV.png62 kB
          • IO.png46 kB
          • BY.png5 kB
          • US-LA.png97 kB
          • MU.png851 B
          • US.png12 kB
          • KY.png72 kB
          • US-HI.png4 kB
          • ZW.png17 kB
          • CH.png1 kB
          • BJ.png1 kB
          • AL.png11 kB
          • GN.png242 B
          • MF.png648 B
          • HSB.png2 kB
          • TF.png648 B
          • ER.png14 kB
          • SH.png53 kB
          • CV.png5 kB
          • IN.png30 kB
          • US-NY.png93 kB
          • PN.png62 kB
          • AZ.png9 kB
          • ES-GA.png33 kB
          • NR.png1 kB
          • MT.png12 kB
          • LV.png257 B
          • TT.png18 kB
          • EC.png120 kB
          • SV.png39 kB
          • DE.png562 B
          • US-OH.png13 kB
          • CG.png1 kB
          • US-FL.png96 kB
          • US-NJ.png76 kB
          • BI.png7 kB
          • HK.png20 kB
          • NC.png14 kB
          • VA.png36 kB
          • GM.png584 B
          • ME.png32 kB
      • makedata.sh1 kB
      • _yue-feat
        • VerbForm.md170 B
        • Person.md145 B
        • Number.md145 B
        • Aspect.md145 B
        • Poss.md145 B
        • Gender.md145 B
        • Definite.md164 B
        • Negative.md180 B
        • Reflex.md148 B
        • PronType.md158 B
        • NumType.md153 B
        • Case.md139 B
        • Degree.md159 B
        • Tense.md142 B
        • Voice.md142 B
        • Mood.md139 B
        • Animacy.md148 B
      • _no-dep
        • name.md599 B
        • advcl.md785 B
        • nsubj.md844 B
        • vocative.md105 B
        • csubjpass.md465 B
        • mark.md460 B
        • compound-prt.md1 kB
        • xcomp.md948 B
        • remnant.md544 B
        • compound.md215 B
        • csubj.md542 B
        • reparandum.md121 B
        • mwe.md119 B
        • amod.md521 B
        • nmod.md1 kB
        • nummod.md423 B
        • dobj.md264 B
        • acl-relcl.md1 kB
        • iobj.md611 B
        • acl.md589 B
        • case.md620 B
        • punct.md853 B
        • advmod.md343 B
        • neg.md425 B
        • ccomp.md501 B
        • parataxis.md783 B
        • auxpass.md305 B
        • goeswith.md442 B
        • appos.md662 B
        • cc.md636 B
        • det.md700 B
        • discourse.md403 B
        • list.md104 B
        • cop.md602 B
        • aux_.md777 B
        • foreign.md302 B
        • root.md218 B
        • expl.md2 kB
        • dislocated.md119 B
        • nsubjpass.md570 B
        • conj.md663 B
        • dep.md237 B
      • _sa-dep
        • name.md140 B
        • advcl.md163 B
        • nsubj.md153 B
        • vocative.md152 B
        • csubjpass.md169 B
        • mark.md142 B
        • xcomp.md161 B
        • remnant.md161 B
        • compound.md152 B
        • csubj.md153 B
        • reparandum.md169 B
        • mwe.md155 B
        • amod.md155 B
        • nmod.md152 B
        • nummod.md156 B
        • dobj.md149 B
        • iobj.md151 B
        • acl.md158 B
        • case.md148 B
        • punct.md149 B
        • advmod.md158 B
        • neg.md151 B
        • ccomp.md156 B
        • parataxis.md155 B
        • auxpass.md159 B
        • goeswith.md153 B
        • appos.md159 B
        • cc.md156 B
        • det.md144 B
        • discourse.md163 B
        • list.md140 B
        • cop.md140 B
        • aux_.md311 B
        • foreign.md155 B
        • root.md140 B
        • expl.md145 B
        • dislocated.md167 B
        • nsubjpass.md169 B
        • conj.md144 B
        • dep.md156 B
      • _da-pos
        • PRON.md1 kB
        • CONJ.md329 B
        • ADV.md548 B
        • VERB.md612 B
        • NUM.md785 B
        • ADJ.md723 B
        • SCONJ.md389 B
        • SYM.md253 B
        • DET.md807 B
        • NOUN.md275 B
        • PROPN.md440 B
        • INTJ.md361 B
        • PUNCT.md309 B
        • ADP.md257 B
        • AUX_.md1 kB
        • X.md487 B
        • PART.md595 B
      • _pt-overview
        • feat-all.md667 B
        • syntax.md245 B
        • feat-index.md291 B
        • specific-syntax.md217 B
        • tokenization.md200 B
        • morphology.md214 B
        • introduction.md2 kB
        • pos-all.md663 B
        • dep-index.md296 B
        • dep-all.md671 B
        • pos-index.md288 B
      • _la-pos
        • CONJ.md158 B
        • PRON.md141 B
        • VERB.md138 B
        • ADV.md138 B
        • NUM.md139 B
        • ADJ.md141 B
        • SCONJ.md161 B
        • SYM.md138 B
        • DET.md142 B
        • NOUN.md138 B
        • PROPN.md147 B
        • INTJ.md146 B
        • PUNCT.md147 B
        • AUX_.md314 B
        • ADP.md142 B
        • X.md133 B
        • PART.md142 B
      • _hi-overview
        • feat-all.md667 B
        • syntax.md190 B
        • feat-index.md291 B
        • tokenization.md200 B
        • specific-syntax.md217 B
        • morphology.md214 B
        • introduction.md173 B
        • pos-all.md663 B
        • dep-index.md296 B
        • dep-all.md671 B
        • pos-index.md288 B
      • _nl-pos
        • PRON.md141 B
        • CONJ.md158 B
        • ADV.md138 B
        • VERB.md138 B
        • NUM.md139 B
        • ADJ.md141 B
        • SCONJ.md161 B
        • SYM.md138 B
        • DET.md142 B
        • NOUN.md138 B
        • PROPN.md147 B
        • INTJ.md146 B
        • ADP.md142 B
        • PUNCT.md147 B
        • AUX_.md314 B
        • X.md133 B
        • PART.md142 B
      • _ar-pos
        • CONJ.md158 B
        • PRON.md141 B
        • VERB.md138 B
        • ADV.md138 B
        • NUM.md139 B
        • ADJ.md141 B
        • SCONJ.md161 B
        • SYM.md138 B
        • DET.md142 B
        • NOUN.md138 B
        • PROPN.md147 B
        • INTJ.md146 B
        • AUX_.md314 B
        • PUNCT.md147 B
        • ADP.md142 B
        • X.md133 B
        • PART.md142 B
      • _vi-feat
        • VerbForm.md170 B
        • Person.md145 B
        • Number.md145 B
        • Aspect.md145 B
        • Poss.md145 B
        • Definite.md164 B
        • Gender.md145 B
        • Negative.md180 B
        • PronType.md158 B
        • Reflex.md148 B
        • NumType.md153 B
        • Case.md139 B
        • Degree.md159 B
        • Voice.md142 B
        • Tense.md142 B
        • Mood.md139 B
        • Animacy.md148 B
      • _en-pos
        • CONJ.md117 B
        • PRON.md563 B
        • VERB.md256 B
        • ADV.md206 B
        • NUM.md110 B
        • ADJ.md135 B
        • SCONJ.md530 B
        • SYM.md189 B
        • DET.md275 B
        • NOUN.md155 B
        • PROPN.md371 B
        • INTJ.md109 B
        • AUX_.md451 B
        • PUNCT.md209 B
        • ADP.md275 B
        • X.md235 B
        • PART.md551 B
      • _hr-feat
        • VerbForm.md170 B
        • Person.md145 B
        • Number.md145 B
        • Aspect.md145 B
        • Poss.md145 B
        • Gender.md145 B
        • Definite.md164 B
        • Negative.md180 B
        • Reflex.md148 B
        • PronType.md158 B
        • NumType.md153 B
        • Case.md139 B
        • Degree.md159 B
        • Tense.md142 B
        • Mood.md139 B
        • Voice.md142 B
        • Animacy.md148 B
      • _be-pos
        • CONJ.md158 B
        • PRON.md141 B
        • VERB.md138 B
        • ADV.md138 B
        • NUM.md139 B
        • ADJ.md141 B
        • SCONJ.md161 B
        • SYM.md138 B
        • DET.md142 B
        • NOUN.md138 B
        • PROPN.md147 B
        • INTJ.md146 B
        • AUX_.md314 B
        • PUNCT.md147 B
        • ADP.md142 B
        • X.md133 B
        • PART.md142 B
      • _v2
        • segmentation.md4 kB
        • language-specific.md7 kB
        • coordination.md3 kB
        • enhanced.md9 kB
        • index.md2 kB
        • features.md49 kB
        • summary.md8 kB
        • ellipsis.md8 kB
        • mwe.md4 kB
        • core-dependents.md10 kB
        • copula.md16 kB
        • sentence-mood.md80 B
        • postags.md17 kB
        • minimaldoc.md5 kB
        • function.md5 kB
        • conll-u.md3 kB
        • semantic-categories.md2 kB
      • _cu-dep
        • name.md140 B
        • advcl.md163 B
        • nsubj.md153 B
        • vocative.md152 B
        • csubjpass.md169 B
        • mark.md142 B
        • xcomp.md161 B
        • remnant.md161 B
        • compound.md152 B
        • csubj.md153 B
        • reparandum.md169 B
        • mwe.md155 B
        • amod.md155 B
        • nmod.md152 B
        • nummod.md156 B
        • dobj.md149 B
        • iobj.md151 B
        • acl.md158 B
        • case.md148 B
        • punct.md149 B
        • advmod.md158 B
        • neg.md151 B
        • ccomp.md156 B
        • parataxis.md155 B
        • auxpass.md159 B
        • goeswith.md153 B
        • appos.md159 B
        • cc.md156 B
        • det.md144 B
        • discourse.md163 B
        • list.md140 B
        • cop.md140 B
        • aux_.md311 B
        • foreign.md155 B
        • root.md140 B
        • expl.md145 B
        • dislocated.md167 B
        • nsubjpass.md169 B
        • conj.md144 B
        • dep.md156 B
      • _ca-feat
        • Definite.md164 B
        • Aspect.md145 B
        • Number.md145 B
        • AdvType.md148 B
        • Reflex.md148 B
        • Voice.md142 B
        • PrepCase.md151 B
        • Degree.md159 B
        • Tense.md142 B
        • Gender.md145 B
        • VerbForm.md170 B
        • Mood.md139 B
        • Poss.md145 B
        • Person.md145 B
        • NumType.md153 B
        • PronType.md158 B
        • AdpType.md148 B
        • NumForm.md148 B
        • Negative.md180 B
        • Animacy.md148 B
        • PunctType.md154 B
        • Polite.md145 B
        • Case.md139 B
        • Number-psor.md163 B
        • PunctSide.md154 B
      • _eu-pos
        • CONJ.md158 B
        • PRON.md141 B
        • ADV.md471 B
        • VERB.md765 B
        • NUM.md139 B
        • ADJ.md332 B
        • SCONJ.md161 B
        • SYM.md138 B
        • DET.md142 B
        • NOUN.md261 B
        • PROPN.md287 B
        • INTJ.md146 B
        • PUNCT.md147 B
        • ADP.md142 B
        • AUX_.md314 B
        • X.md133 B
        • PART.md142 B
      • _kmr-overview
        • feat-all.md671 B
        • syntax.md191 B
        • feat-index.md294 B
        • specific-syntax.md218 B
        • tokenization.md201 B
        • morphology.md215 B
        • introduction.md174 B
        • pos-all.md667 B
        • dep-index.md299 B
        • dep-all.md675 B
        • pos-index.md291 B
      • _de-dep
        • name.md1 kB
        • advcl.md163 B
        • nsubj.md511 B
        • vocative.md152 B
        • csubjpass.md321 B
        • mark.md142 B
        • compound-prt.md814 B
        • xcomp.md161 B
        • remnant.md161 B
        • compound.md152 B
        • csubj.md585 B
        • reparandum.md169 B
        • mwe.md155 B
        • amod.md326 B
        • nmod.md152 B
        • nummod.md324 B
        • dobj.md284 B
        • iobj.md285 B
        • acl-relcl.md942 B
        • acl.md542 B
        • case.md881 B
        • nmod-poss.md155 B
        • punct.md149 B
        • advmod.md158 B
        • neg.md471 B
        • ccomp.md618 B
        • parataxis.md155 B
        • auxpass.md159 B
        • goeswith.md153 B
        • appos.md605 B
        • cc.md156 B
        • det.md567 B
        • discourse.md163 B
        • list.md140 B
        • cop.md140 B
        • aux_.md311 B
        • foreign.md155 B
        • root.md140 B
        • expl.md145 B
        • dislocated.md167 B
        • nsubjpass.md251 B
        • conj.md144 B
        • dep.md156 B
      • _id-dep
        • name.md140 B
        • advcl.md163 B
        • nsubj.md153 B
        • vocative.md152 B
        • csubjpass.md169 B
        • mark.md142 B
        • xcomp.md161 B
        • remnant.md161 B
        • compound.md152 B
        • csubj.md153 B
        • reparandum.md169 B
        • mwe.md155 B
        • amod.md155 B
        • nmod.md152 B
        • nummod.md156 B
        • dobj.md149 B
        • iobj.md151 B
        • acl.md158 B
        • case.md148 B
        • punct.md149 B
        • advmod.md158 B
        • neg.md151 B
        • ccomp.md156 B
        • parataxis.md155 B
        • auxpass.md159 B
        • goeswith.md153 B
        • appos.md159 B
        • cc.md156 B
        • det.md144 B
        • discourse.md163 B
        • list.md140 B
        • cop.md140 B
        • aux_.md311 B
        • foreign.md155 B
        • root.md140 B
        • expl.md145 B
        • dislocated.md167 B
        • nsubjpass.md169 B
        • conj.md144 B
        • dep.md156 B
Icon
Name
ud-tools-v2.0.tgz
Size
192.19 KB
Format
application/x-gzip
Description
Tools
MD5
68c0c53a740a87eaeae18cd528a915c2
 Download file  Preview
 File Preview  
  • ud-tools-v2.0
    • conllu-stats.pl60 kB
    • conllu-w2t.py2 kB
    • validate.py32 kB
    • text_without_spaces.pl2 kB
    • compat
      • argparse.pyc65 kB
      • argparse.py85 kB
      • __init__.pyc140 B
      • __init__.py0 B
    • udlib.pm7 kB
    • mwtoken-stats.pl1 kB
    • overlap.py2 kB
    • example-data
      • tanl.conll471 B
    • survey_deprel_subtypes.pl7 kB
    • v2-conversion
      • convert.py3 kB
      • README.md1 kB
      • processors_en.py1 kB
      • processors_universal.py5 kB
      • depgraph_utils.py6 kB
      • nmod_obl_adjudication.py3 kB
    • README.txt5 kB
    • package_ud_release.sh4 kB
    • survey_features.pl8 kB
    • check_files.pl32 kB
    • conllu_to_conllx.pl2 kB
    • file_util.pyc2 kB
    • create_iso_639_3_symlinks.py175 kB
    • conllu-formconvert.py3 kB
    • runtests.sh1 kB
    • conllu-stats.py6 kB
    • test-cases
      • nonvalid
        • lowercase-postag.conllu141 B
        • lowercase-feature-value-in-empty.conllu251 B
        • empty-head.conllu114 B
        • invalid-deps-id.conllu173 B
        • misordered-feature.conllu243 B
        • invalid-deps-syntax.conllu194 B
        • missing-final-line.conllu155 B
        • empty-field.conllu100 B
        • head-not-0-deprel-root.conllu172 B
        • ambiguous-feature.conllu175 B
        • empty-sentence.conllu163 B
        • trailing-tab.conllu127 B
        • lowercase-postag-in-empty.conllu239 B
        • malformed_deps.conllu683 B
        • head-0-deprel-not-root.conllu133 B
        • head-not-empty-in-empty.conllu224 B
        • lowercase-feature.conllu238 B
        • duplicate-value.conllu137 B
        • tanl-broken.conllu690 B
        • misordered-layered-feature.conllu239 B
        • whitespace_nonv.conllu274 B
        • misplaced-comment-end.conllu429 B
        • dos-newlines.conllu698 B
        • duplicate-layered-feature.conllu182 B
        • cyclic-deps.conllu227 B
        • invalid-head.conllu164 B
        • id-with-extra-0.conllu140 B
        • duplicate-feature.conllu142 B
        • overlapping-multiword.conllu387 B
        • token_with_cols_filled.conllu385 B
        • self-cycle-deps.conllu164 B
        • nonsequential-id.conllu176 B
        • space-in-field.conllu127 B
        • extra-empty-line.conllu287 B
        • lowercase-feature-in-empty.conllu244 B
        • id-starting-from-2.conllu247 B
        • self-cycle-head.conllu190 B
        • invalid-range.conllu375 B
        • uppercase-deps-deprel.conllu206 B
        • multiword-with-pos.conllu407 B
        • empty-node-without-dependent.conllu251 B
        • uppercase-deprel.conllu184 B
        • invalid-deps-order.conllu409 B
        • missing-space-after.conllu176 B
        • extra-field.conllu130 B
        • invalid-range-format.conllu380 B
        • duplicate-id.conllu171 B
        • deprel-not-empty-in-empty.conllu233 B
        • misplaced-comment-mid.conllu401 B
        • misordered-multiword.conllu392 B
        • lowercase-value.conllu237 B
      • valid
        • maximal-empty-node.conllu273 B
        • whitespace.conllu294 B
        • multiple-features.conllu295 B
        • id_test_part2.conllu375 B
        • empty-nodes.conllu324 B
        • id_test_part1.conllu375 B
        • tanl.conllu673 B
        • empty-file.conllu0 B
        • minimal-empty-node.conllu239 B
        • layered-features.conllu272 B
    • validate_all.sh943 B
    • conllu_to_text.pl8 kB
    • LICENSE.txt17 kB
    • mergept.pl2 kB
    • check_sentence_ids.pl1 kB
    • conll_convert_tags_to_uposf.pl1 kB
    • data
      • deprel.swl20 B
      • deprel.cu93 B
      • feat_val.cu41 B
      • deprel.cs107 B
      • feat_val.cs435 B
      • feat_val.no_nynorsk0 B
      • deprel.no_nynorsk77 B
      • feat_val.en_esl0 B
      • feat_val.ru_syntagrus14 B
      • feat_val.sl_sst176 B
      • deprel.ca53 B
      • feat_val.ca268 B
      • tokens_w_space.lv54 B
      • cpos.ud79 B
      • feat_val.grc_proiel23 B
      • deprel.grc_proiel93 B
      • feat_val.fi_ftb304 B
      • deprel.bg31 B
      • feat_val.bg0 B
      • deprel.be40 B
      • feat_val.be14 B
      • deprel.en_lines109 B
      • deprel.sv_lines74 B
      • deprel.nl32 B
      • feat_val.nl368 B
      • tokens_w_space.kk1 kB
      • deprel.ar55 B
      • feat_val.es_ancora268 B
      • feat_val.ar73 B
      • deprel.zh145 B
      • feat_val.zh40 B
      • feat_val.fr_partut6 B
      • feat_val.grc23 B
      • feat_val.nl_lassysmall368 B
      • feat_val.bxr0 B
      • deprel.no_bokmaal77 B
      • feat_val.no_bokmaal0 B
      • deprel.ja_ktc0 B
      • feat_val.lv76 B
      • deprel.lv49 B
      • deprel.lt52 B
      • feat_val.lt139 B
      • tokens_w_space.vi3 B
      • feat_val.la13 B
      • deprel.la12 B
      • deprel.ru_syntagrus119 B
      • deprel.fr_partut83 B
      • feat_val.ko1 B
      • deprel.ko30 B
      • tokens_w_space.ud27 B
      • feat_val.kk177 B
      • deprel.kk70 B
      • feat_val.got30 B
      • deprel.en_esl76 B
      • deprel.es_ancora41 B
      • feat_val.la_proiel40 B
      • deprel.sl_sst101 B
      • feat_val.cop0 B
      • deprel.cs_cltt107 B
      • deprel.ja10 B
      • feat_val.ja0 B
      • feats.ud121 B
      • deprel.ar_nyuad30 B
      • tokens_w_space.sv96 B
      • deprel.vi10 B
      • feat_val.vi0 B
      • deprel.fi_ftb113 B
      • deprel.it117 B
      • feat_val.it26 B
      • feat_val.cs_cltt380 B
      • feat_val.en_lines0 B
      • deprel.pt_br85 B
      • deprel.grc12 B
      • deprel.nl_lassysmall53 B
      • deprel.ur11 B
      • feat_val.ur35 B
      • tokens_w_space.fr10 B
      • feat_val.pt_br0 B
      • deprel.id36 B
      • deprel.bxr9 B
      • feat_val.id0 B
      • deprel.uk100 B
      • feat_val.uk161 B
      • tokens_w_space.fi78 B
      • deprel.ug191 B
      • feat_val.ug395 B
      • feat_val.cs_cac435 B
      • deprel.ud237 B
      • feat_val.ud1 kB
      • feat_val.hu202 B
      • deprel.hu251 B
      • deprel.hr65 B
      • feat_val.hr90 B
      • feat_val.en_partut28 B
      • feat_val.sv_lines0 B
      • deprel.hi41 B
      • feat_val.hi34 B
      • deprel.tr56 B
      • feat_val.tr457 B
      • deprel.he155 B
      • feat_val.he235 B
      • feat_val.swl0 B
      • deprel.la_proiel93 B
      • deprel.ta36 B
      • feat_val.ta57 B
      • feat_val.gl0 B
      • deprel.gl0 B
      • feat_val.testsuite106 B
      • deprel.sv84 B
      • feat_val.sv15 B
      • feat_val.ga364 B
      • deprel.ga125 B
      • deprel.sl34 B
      • feat_val.sl176 B
      • deprel.sk107 B
      • feat_val.sk406 B
      • deprel.got93 B
      • feat_val.fr_sequoia0 B
      • deprel.fr_sequoia74 B
      • deprel.sa11 B
      • feat_val.sa0 B
      • feat_val.fr0 B
      • deprel.fr74 B
      • feat_val.it_partut26 B
      • feat_val.fo26 B
      • deprel.fo0 B
      • deprel.cop0 B
      • feat_val.gl_treegal60 B
      • deprel.gl_treegal43 B
      • feat_val.ru17 B
      • deprel.ru75 B
      • feat_val.fi499 B
      • deprel.fi146 B
      • feat_val.ro180 B
      • deprel.ro157 B
      • deprel.fa122 B
      • feat_val.fa0 B
      • feat_val.eu329 B
      • deprel.eu0 B
      • feat_val.et95 B
      • deprel.et77 B
      • feat_val.es26 B
      • deprel.es51 B
      • deprel.la_ittb53 B
      • feat_val.en0 B
      • deprel.en139 B
      • feat_val.ja_ktc0 B
      • feat_val.el0 B
      • deprel.el11 B
      • deprel.en_partut141 B
      • feat_val.la_ittb40 B
      • deprel.it_partut118 B
      • deprel.pt84 B
      • feat_val.pt54 B
      • deprel.de63 B
      • feat_val.de98 B
      • feat_val.pl120 B
      • deprel.da49 B
      • deprel.cs_cac107 B
      • deprel.pl107 B
      • feat_val.da111 B
      • feat_val.ar_nyuad73 B
    • file_util.py2 kB
    • csort.pm7 kB

Show simple item record