Show simple item record

 
dc.contributor.author Nivre, Joakim
dc.contributor.author Agić, Željko
dc.contributor.author Ahrenberg, Lars
dc.contributor.author Aranzabe, Maria Jesus
dc.contributor.author Asahara, Masayuki
dc.contributor.author Atutxa, Aitziber
dc.contributor.author Ballesteros, Miguel
dc.contributor.author Bauer, John
dc.contributor.author Bengoetxea, Kepa
dc.contributor.author Bhat, Riyaz Ahmad
dc.contributor.author Bick, Eckhard
dc.contributor.author Bosco, Cristina
dc.contributor.author Bouma, Gosse
dc.contributor.author Bowman, Sam
dc.contributor.author Candito, Marie
dc.contributor.author Cebiroğlu Eryiğit, Gülşen
dc.contributor.author Celano, Giuseppe G. A.
dc.contributor.author Chalub, Fabricio
dc.contributor.author Choi, Jinho
dc.contributor.author Çöltekin, Çağrı
dc.contributor.author Connor, Miriam
dc.contributor.author Davidson, Elizabeth
dc.contributor.author de Marneffe, Marie-Catherine
dc.contributor.author de Paiva, Valeria
dc.contributor.author Diaz de Ilarraza, Arantza
dc.contributor.author Dobrovoljc, Kaja
dc.contributor.author Dozat, Timothy
dc.contributor.author Droganova, Kira
dc.contributor.author Dwivedi, Puneet
dc.contributor.author Eli, Marhaba
dc.contributor.author Erjavec, Tomaž
dc.contributor.author Farkas, Richárd
dc.contributor.author Foster, Jennifer
dc.contributor.author Freitas, Cláudia
dc.contributor.author Gajdošová, Katarína
dc.contributor.author Galbraith, Daniel
dc.contributor.author Garcia, Marcos
dc.contributor.author Ginter, Filip
dc.contributor.author Goenaga, Iakes
dc.contributor.author Gojenola, Koldo
dc.contributor.author Gökırmak, Memduh
dc.contributor.author Goldberg, Yoav
dc.contributor.author Gómez Guinovart, Xavier
dc.contributor.author Gonzáles Saavedra, Berta
dc.contributor.author Grioni, Matias
dc.contributor.author Grūzītis, Normunds
dc.contributor.author Guillaume, Bruno
dc.contributor.author Habash, Nizar
dc.contributor.author Hajič, Jan
dc.contributor.author Hà Mỹ, Linh
dc.contributor.author Haug, Dag
dc.contributor.author Hladká, Barbora
dc.contributor.author Hohle, Petter
dc.contributor.author Ion, Radu
dc.contributor.author Irimia, Elena
dc.contributor.author Johannsen, Anders
dc.contributor.author Jørgensen, Fredrik
dc.contributor.author Kaşıkara, Hüner
dc.contributor.author Kanayama, Hiroshi
dc.contributor.author Kanerva, Jenna
dc.contributor.author Kotsyba, Natalia
dc.contributor.author Krek, Simon
dc.contributor.author Laippala, Veronika
dc.contributor.author Lê Hồng, Phương
dc.contributor.author Lenci, Alessandro
dc.contributor.author Ljubešić, Nikola
dc.contributor.author Lyashevskaya, Olga
dc.contributor.author Lynn, Teresa
dc.contributor.author Makazhanov, Aibek
dc.contributor.author Manning, Christopher
dc.contributor.author Mărănduc, Cătălina
dc.contributor.author Mareček, David
dc.contributor.author Martínez Alonso, Héctor
dc.contributor.author Martins, André
dc.contributor.author Mašek, Jan
dc.contributor.author Matsumoto, Yuji
dc.contributor.author McDonald, Ryan
dc.contributor.author Missilä, Anna
dc.contributor.author Mititelu, Verginica
dc.contributor.author Miyao, Yusuke
dc.contributor.author Montemagni, Simonetta
dc.contributor.author More, Amir
dc.contributor.author Mori, Shunsuke
dc.contributor.author Moskalevskyi, Bohdan
dc.contributor.author Muischnek, Kadri
dc.contributor.author Mustafina, Nina
dc.contributor.author Müürisep, Kaili
dc.contributor.author Nguyễn Thị, Lương
dc.contributor.author Nguyễn Thị Minh, Huyền
dc.contributor.author Nikolaev, Vitaly
dc.contributor.author Nurmi, Hanna
dc.contributor.author Ojala, Stina
dc.contributor.author Osenova, Petya
dc.contributor.author Øvrelid, Lilja
dc.contributor.author Pascual, Elena
dc.contributor.author Passarotti, Marco
dc.contributor.author Perez, Cenel-Augusto
dc.contributor.author Perrier, Guy
dc.contributor.author Petrov, Slav
dc.contributor.author Piitulainen, Jussi
dc.contributor.author Plank, Barbara
dc.contributor.author Popel, Martin
dc.contributor.author Pretkalniņa, Lauma
dc.contributor.author Prokopidis, Prokopis
dc.contributor.author Puolakainen, Tiina
dc.contributor.author Pyysalo, Sampo
dc.contributor.author Rademaker, Alexandre
dc.contributor.author Ramasamy, Loganathan
dc.contributor.author Real, Livy
dc.contributor.author Rituma, Laura
dc.contributor.author Rosa, Rudolf
dc.contributor.author Saleh, Shadi
dc.contributor.author Sanguinetti, Manuela
dc.contributor.author Saulīte, Baiba
dc.contributor.author Schuster, Sebastian
dc.contributor.author Seddah, Djamé
dc.contributor.author Seeker, Wolfgang
dc.contributor.author Seraji, Mojgan
dc.contributor.author Shakurova, Lena
dc.contributor.author Shen, Mo
dc.contributor.author Sichinava, Dmitry
dc.contributor.author Silveira, Natalia
dc.contributor.author Simi, Maria
dc.contributor.author Simionescu, Radu
dc.contributor.author Simkó, Katalin
dc.contributor.author Šimková, Mária
dc.contributor.author Simov, Kiril
dc.contributor.author Smith, Aaron
dc.contributor.author Suhr, Alane
dc.contributor.author Sulubacak, Umut
dc.contributor.author Szántó, Zsolt
dc.contributor.author Taji, Dima
dc.contributor.author Tanaka, Takaaki
dc.contributor.author Tsarfaty, Reut
dc.contributor.author Tyers, Francis
dc.contributor.author Uematsu, Sumire
dc.contributor.author Uria, Larraitz
dc.contributor.author van Noord, Gertjan
dc.contributor.author Varga, Viktor
dc.contributor.author Vincze, Veronika
dc.contributor.author Washington, Jonathan North
dc.contributor.author Žabokrtský, Zdeněk
dc.contributor.author Zeldes, Amir
dc.contributor.author Zeman, Daniel
dc.contributor.author Zhu, Hanzhi
dc.date.accessioned 2017-03-14T13:16:19Z
dc.date.available 2017-03-14T13:16:19Z
dc.date.issued 2017-03-13
dc.identifier.uri http://hdl.handle.net/11234/1-1983
dc.description Universal Dependencies is a project that seeks to develop cross-linguistically consistent treebank annotation for many languages, with the goal of facilitating multilingual parser development, cross-lingual learning, and parsing research from a language typology perspective. The annotation scheme is based on (universal) Stanford dependencies (de Marneffe et al., 2006, 2008, 2014), Google universal part-of-speech tags (Petrov et al., 2012), and the Interset interlingua for morphosyntactic tagsets (Zeman, 2008). This release is special in that the treebanks will be used as training/development data in the CoNLL 2017 shared task (http://universaldependencies.org/conll17/). Test data are not released, except for the few treebanks that do not take part in the shared task. 64 treebanks will be in the shared task, and they correspond to the following 45 languages: Ancient Greek, Arabic, Basque, Bulgarian, Catalan, Chinese, Croatian, Czech, Danish, Dutch, English, Estonian, Finnish, French, Galician, German, Gothic, Greek, Hebrew, Hindi, Hungarian, Indonesian, Irish, Italian, Japanese, Kazakh, Korean, Latin, Latvian, Norwegian, Old Church Slavonic, Persian, Polish, Portuguese, Romanian, Russian, Slovak, Slovenian, Spanish, Swedish, Turkish, Ukrainian, Urdu, Uyghur and Vietnamese. This release fixes a bug in http://hdl.handle.net/11234/1-1976. Changed files: ud-tools-v2.0.tgz (conllu_to_text.pl, conllu_to_conllx.pl; added text_without_spaces.pl), ud-treebanks-conll2017.tgz (fi_ftb-ud-train.txt, he-ud-train.txt, it-ud-train.txt, pt_br-ud-train.txt, es-ud-train.txt) and ud-treebanks-v2.0.tgz (fi_ftb-ud-train.txt, he-ud-train.txt, it-ud-train.txt, pt_br-ud-train.txt, es-ud-train.txt, ar_nyuad-ud-dev.txt, ar_nyuad-ud-test.txt, ar_nyuad-ud-train.txt, cop-ud-dev.txt, cop-ud-test.txt, cop-ud-train.txt, sa-ud-dev.txt, sa-ud-test.txt, sa-ud-train.txt).
dc.language.iso grc
dc.language.iso ara
dc.language.iso eus
dc.language.iso bul
dc.language.iso hrv
dc.language.iso ces
dc.language.iso dan
dc.language.iso nld
dc.language.iso eng
dc.language.iso est
dc.language.iso fin
dc.language.iso fra
dc.language.iso deu
dc.language.iso got
dc.language.iso ell
dc.language.iso heb
dc.language.iso hin
dc.language.iso hun
dc.language.iso ind
dc.language.iso gle
dc.language.iso ita
dc.language.iso jpn
dc.language.iso lat
dc.language.iso nor
dc.language.iso chu
dc.language.iso fas
dc.language.iso pol
dc.language.iso por
dc.language.iso ron
dc.language.iso slv
dc.language.iso spa
dc.language.iso swe
dc.language.iso tam
dc.language.iso cat
dc.language.iso zho
dc.language.iso glg
dc.language.iso kaz
dc.language.iso lav
dc.language.iso rus
dc.language.iso tur
dc.language.iso cop
dc.language.iso san
dc.language.iso slk
dc.language.iso ukr
dc.language.iso uig
dc.language.iso vie
dc.language.iso bel
dc.language.iso kor
dc.language.iso lit
dc.language.iso urd
dc.publisher Universal Dependencies Consortium
dc.relation.replaces http://hdl.handle.net/11234/1-1827
dc.rights Licence Universal Dependencies v2.0
dc.rights.uri https://lindat.mff.cuni.cz/repository/xmlui/page/licence-UD-2.0
dc.source.uri http://universaldependencies.org/
dc.subject treebank
dc.subject dependency
dc.subject syntax
dc.subject morphology
dc.subject harmonized annotation
dc.subject interset
dc.subject universal tagset
dc.subject stanford dependencies
dc.title Universal Dependencies 2.0
dc.type corpus
metashare.ResourceInfo#ContentInfo.mediaType text
dc.rights.label PUB
has.files yes
branding LINDAT / CLARIN
demo.uri http://bionlp-www.utu.fi/dep_search
demo.uri https://lindat.mff.cuni.cz/services/pmltq/#!/treebanks
contact.person Joakim Nivre joakim.nivre@lingfil.uu.se Uppsala University
contact.person Daniel Zeman zeman@ufal.mff.cuni.cz Charles University in Prague, UFAL
sponsor Grantová agentura České republiky 15-10472S Morphologically and Syntactically Annotated Corpora of Many Languages nationalFunds
size.info 11814230 tokens
size.info 12102983 words
size.info 630518 sentences
files.size 418607328
files.count 4
featuredService.pmltq Ancient Greek|http://lindat.mff.cuni.cz/services/pmltq/ud20_grc/
featuredService.pmltq Ancient Greek - PROIEL|http://lindat.mff.cuni.cz/services/pmltq/ud20_grc_proiel/
featuredService.pmltq Arabic|http://lindat.mff.cuni.cz/services/pmltq/ud20_ar/
featuredService.pmltq Basque|http://lindat.mff.cuni.cz/services/pmltq/ud20_eu/
featuredService.pmltq Belarusian|http://lindat.mff.cuni.cz/services/pmltq/ud20_be/
featuredService.pmltq Bulgarian|http://lindat.mff.cuni.cz/services/pmltq/ud20_bg/
featuredService.pmltq Catalan|http://lindat.mff.cuni.cz/services/pmltq/ud20_ca/
featuredService.pmltq Chinese|http://lindat.mff.cuni.cz/services/pmltq/ud20_zh/
featuredService.pmltq Coptic|http://lindat.mff.cuni.cz/services/pmltq/ud20_cop/
featuredService.pmltq Croatian|http://lindat.mff.cuni.cz/services/pmltq/ud20_hr/
featuredService.pmltq Czech|http://lindat.mff.cuni.cz/services/pmltq/ud20_cs/
featuredService.pmltq Czech - CAC|http://lindat.mff.cuni.cz/services/pmltq/ud20_cs_cac/
featuredService.pmltq Czech - CLTT (legal)|http://lindat.mff.cuni.cz/services/pmltq/ud20_cs_cltt/
featuredService.pmltq Danish|http://lindat.mff.cuni.cz/services/pmltq/ud20_da/
featuredService.pmltq Dutch|http://lindat.mff.cuni.cz/services/pmltq/ud20_nl/
featuredService.pmltq Dutch - LassySmall|http://lindat.mff.cuni.cz/services/pmltq/ud20_nl_lassy/
featuredService.pmltq English|http://lindat.mff.cuni.cz/services/pmltq/ud20_en/
featuredService.pmltq English - LinES|http://lindat.mff.cuni.cz/services/pmltq/ud20_en_lines/
featuredService.pmltq English - ParTUT|http://lindat.mff.cuni.cz/services/pmltq/ud20_en_partut/
featuredService.pmltq Estonian|http://lindat.mff.cuni.cz/services/pmltq/ud20_et/
featuredService.pmltq Finnish|http://lindat.mff.cuni.cz/services/pmltq/ud20_fi/
featuredService.pmltq Finnish - FTB|http://lindat.mff.cuni.cz/services/pmltq/ud20_fi_ftb/
featuredService.pmltq French|http://lindat.mff.cuni.cz/services/pmltq/ud20_fr/
featuredService.pmltq French - ParTUT|http://lindat.mff.cuni.cz/services/pmltq/ud20_fr_partut/
featuredService.pmltq French - Sequoia|http://lindat.mff.cuni.cz/services/pmltq/ud20_fr_sequoia/
featuredService.pmltq Galician|http://lindat.mff.cuni.cz/services/pmltq/ud20_gl/
featuredService.pmltq Galician - TreeGal|http://lindat.mff.cuni.cz/services/pmltq/ud20_gl_treegal/
featuredService.pmltq German|http://lindat.mff.cuni.cz/services/pmltq/ud20_de/
featuredService.pmltq Gothic|http://lindat.mff.cuni.cz/services/pmltq/ud20_got/
featuredService.pmltq Greek|http://lindat.mff.cuni.cz/services/pmltq/ud20_el/
featuredService.pmltq Hebrew|http://lindat.mff.cuni.cz/services/pmltq/ud20_he/
featuredService.pmltq Hindi|http://lindat.mff.cuni.cz/services/pmltq/ud20_hi/
featuredService.pmltq Hungarian|http://lindat.mff.cuni.cz/services/pmltq/ud20_hu/
featuredService.pmltq Indonesian|http://lindat.mff.cuni.cz/services/pmltq/ud20_id/
featuredService.pmltq Irish|http://lindat.mff.cuni.cz/services/pmltq/ud20_ga/
featuredService.pmltq Italian|http://lindat.mff.cuni.cz/services/pmltq/ud20_it/
featuredService.pmltq Italian - ParTUT|http://lindat.mff.cuni.cz/services/pmltq/ud20_it_partut/
featuredService.pmltq Japanese|http://lindat.mff.cuni.cz/services/pmltq/ud20_ja/
featuredService.pmltq Kazakh|http://lindat.mff.cuni.cz/services/pmltq/ud20_kk/
featuredService.pmltq Korean|http://lindat.mff.cuni.cz/services/pmltq/ud20_ko/
featuredService.pmltq Latin|http://lindat.mff.cuni.cz/services/pmltq/ud20_la/
featuredService.pmltq Latin - ITTB|http://lindat.mff.cuni.cz/services/pmltq/ud20_la_ittb/
featuredService.pmltq Latin - PROIEL|http://lindat.mff.cuni.cz/services/pmltq/ud20_la_proiel/
featuredService.pmltq Latvian|http://lindat.mff.cuni.cz/services/pmltq/ud20_lv/
featuredService.pmltq Lithuanian|http://lindat.mff.cuni.cz/services/pmltq/ud20_lt/
featuredService.pmltq Norwegian - Bokmaal|http://lindat.mff.cuni.cz/services/pmltq/ud20_no_bokmaal/
featuredService.pmltq Norwegian - Nynorsk|http://lindat.mff.cuni.cz/services/pmltq/ud20_no_nynorsk/
featuredService.pmltq Old Church Slavonic|http://lindat.mff.cuni.cz/services/pmltq/ud20_cu/
featuredService.pmltq Persian|http://lindat.mff.cuni.cz/services/pmltq/ud20_fa/
featuredService.pmltq Polish|http://lindat.mff.cuni.cz/services/pmltq/ud20_pl/
featuredService.pmltq Portuguese|http://lindat.mff.cuni.cz/services/pmltq/ud20_pt/
featuredService.pmltq Portuguese - BR|http://lindat.mff.cuni.cz/services/pmltq/ud20_pt_br/
featuredService.pmltq Romanian|http://lindat.mff.cuni.cz/services/pmltq/ud20_ro/
featuredService.pmltq Russian|http://lindat.mff.cuni.cz/services/pmltq/ud20_ru/
featuredService.pmltq Russian - SynTagRus|http://lindat.mff.cuni.cz/services/pmltq/ud20_ru_syntagrus/
featuredService.pmltq Sanskrit|http://lindat.mff.cuni.cz/services/pmltq/ud20_sa/
featuredService.pmltq Slovak|http://lindat.mff.cuni.cz/services/pmltq/ud20_sk/
featuredService.pmltq Slovenian|http://lindat.mff.cuni.cz/services/pmltq/ud20_sl/
featuredService.pmltq Slovenian - SST|http://lindat.mff.cuni.cz/services/pmltq/ud20_sl_sst/
featuredService.pmltq Spanish|http://lindat.mff.cuni.cz/services/pmltq/ud20_es/
featuredService.pmltq Spanish - AnCora|http://lindat.mff.cuni.cz/services/pmltq/ud20_es_ancora/
featuredService.pmltq Swedish|http://lindat.mff.cuni.cz/services/pmltq/ud20_sv/
featuredService.pmltq Swedish - LinES|http://lindat.mff.cuni.cz/services/pmltq/ud20_sv_lines/
featuredService.pmltq Tamil|http://lindat.mff.cuni.cz/services/pmltq/ud20_ta/
featuredService.pmltq Turkish|http://lindat.mff.cuni.cz/services/pmltq/ud20_tr/
featuredService.pmltq Ukrainian|http://lindat.mff.cuni.cz/services/pmltq/ud20_uk/
featuredService.pmltq Urdu|http://lindat.mff.cuni.cz/services/pmltq/ud20_ur/
featuredService.pmltq Uyghur|http://lindat.mff.cuni.cz/services/pmltq/ud20_ug/
featuredService.pmltq Vietnamese|http://lindat.mff.cuni.cz/services/pmltq/ud20_vi/


 Files in this item  Download all files in item (399.22 MB)

This item is
Publicly Available
and licensed under:
Licence Universal Dependencies v2.0
GNU General Public License, version 3.0 Distributed under Creative Commons
Icon
Name
ud-treebanks-v2.0.tgz
Size
180.67 MB
Format
application/x-gzip
Description
Treebank data
MD5
0fc5576ebade87a0733cc323d529d784
 Download file
Icon
Name
ud-treebanks-conll2017.tgz
Size
174.86 MB
Format
application/x-gzip
Description
Training and development data for the CoNLL 2017 shared task
MD5
f4869f28c376c360c740ef8caafdfffd
 Download file
Icon
Name
ud-documentation-v2.0.tgz
Size
43.5 MB
Format
application/x-gzip
Description
Documentation
MD5
fbe08dd83675da3ac1e54a5ee67d1a69
 Download file
Icon
Name
ud-tools-v2.0.tgz
Size
192.19 KB
Format
application/x-gzip
Description
Tools
MD5
68c0c53a740a87eaeae18cd528a915c2
 Download file

Show simple item record