Show simple item record

 
dc.contributor.author Savary, Agata
dc.contributor.author Ramisch, Carlos
dc.contributor.author Guillaume, Bruno
dc.contributor.author Hawwari, Abdelati
dc.contributor.author Walsh, Abigail
dc.contributor.author Fotopoulou, Aggeliki
dc.contributor.author Bielinskienė, Agnė
dc.contributor.author Estarrona, Ainara
dc.contributor.author Gatt, Albert
dc.contributor.author Butler, Alexandra
dc.contributor.author Rademaker, Alexandre
dc.contributor.author Maldonado, Alfredo
dc.contributor.author Villavicencio, Aline
dc.contributor.author Farrugia, Alison
dc.contributor.author Muscat, Amanda
dc.contributor.author Gatt, Anabelle
dc.contributor.author Antić, Anđela
dc.contributor.author De Santis, Anna
dc.contributor.author Raffone, Annalisa
dc.contributor.author Riccio, Anna
dc.contributor.author Pascucci, Antonio
dc.contributor.author Gurrutxaga, Antton
dc.contributor.author Bhatia, Archna
dc.contributor.author Vaidya, Ashwini
dc.contributor.author Miral, Ayşenur
dc.contributor.author QasemiZadeh, Behrang
dc.contributor.author Priego Sanchez, Belem
dc.contributor.author Griciūtė, Bernadeta
dc.contributor.author Erden, Berna
dc.contributor.author Parra Escartín, Carla
dc.contributor.author Herrero, Carlos
dc.contributor.author Carlino, Carola
dc.contributor.author Pasquer, Caroline
dc.contributor.author Liebeskind, Chaya
dc.contributor.author Wang, Chenweng
dc.contributor.author Ben Khelil, Chérifa
dc.contributor.author Bonial, Claire
dc.contributor.author Somers, Clarissa
dc.contributor.author Aceta, Cristina
dc.contributor.author Krstev, Cvetana
dc.contributor.author Bejček, Eduard
dc.contributor.author Lindqvist, Ellinor
dc.contributor.author Erenmalm, Elsa
dc.contributor.author Palka-Binkiewicz, Emilia
dc.contributor.author Rimkute, Erika
dc.contributor.author Petterson, Eva
dc.contributor.author Cap, Fabienne
dc.contributor.author Hu, Fangyuan
dc.contributor.author Sangati, Federico
dc.contributor.author Wick Pedro, Gabriela
dc.contributor.author Speranza, Giulia
dc.contributor.author Jagfeld, Glorianna
dc.contributor.author Blagus, Goranka
dc.contributor.author Berk, Gözde
dc.contributor.author Attard, Greta
dc.contributor.author Eryiğit, Gülşen
dc.contributor.author Finnveden, Gustav
dc.contributor.author Martínez Alonso, Héctor
dc.contributor.author de Medeiros Caseli, Helena
dc.contributor.author Elyovich, Hevi
dc.contributor.author Xu, Hongzhi
dc.contributor.author Xiao, Huangyang
dc.contributor.author Miranda, Isaac
dc.contributor.author Jaknić, Isidora
dc.contributor.author El Maarouf, Ismail
dc.contributor.author Aduriz, Itziar
dc.contributor.author Gonzalez, Itziar
dc.contributor.author Matas, Ivana
dc.contributor.author Stoyanova, Ivelina
dc.contributor.author Jazbec, Ivo-Pavao
dc.contributor.author Busuttil, Jael
dc.contributor.author Waszczuk, Jakub
dc.contributor.author Findlay, Jamie
dc.contributor.author Bonnici, Janice
dc.contributor.author Šnajder, Jan
dc.contributor.author Antoine, Jean-Yves
dc.contributor.author Foster, Jennifer
dc.contributor.author Chen, Jia
dc.contributor.author Nivre, Joakim
dc.contributor.author Monti, Johanna
dc.contributor.author McCrae, John
dc.contributor.author Kovalevskaitė, Jolanta
dc.contributor.author Jain, Kanishka
dc.contributor.author Simkó, Katalin
dc.contributor.author Yu, Ke
dc.contributor.author Azzopardi, Kirsty
dc.contributor.author Adalı, Kübra
dc.contributor.author Uria, Larraitz
dc.contributor.author Zilio, Leonardo
dc.contributor.author Boizou, Loïc
dc.contributor.author van der Plas, Lonneke
dc.contributor.author Galea, Luke
dc.contributor.author Sarlak, Mahtab
dc.contributor.author Buljan, Maja
dc.contributor.author Cherchi, Manuela
dc.contributor.author Tanti, Marc
dc.contributor.author Di Buono, Maria Pia
dc.contributor.author Todorova, Maria
dc.contributor.author Candito, Marie
dc.contributor.author Constant, Matthieu
dc.contributor.author Shamsfard, Mehrnoush
dc.contributor.author Jiang, Menghan
dc.contributor.author Boz, Mert
dc.contributor.author Spagnol, Michael
dc.contributor.author Onofrei, Mihaela
dc.contributor.author Li, Minli
dc.contributor.author Elbadrashiny, Mohamed
dc.contributor.author Diab, Mona
dc.contributor.author Rizea, Monica-Mihaela
dc.contributor.author Hadj Mohamed, Najet
dc.contributor.author Theoxari, Natasa
dc.contributor.author Schneider, Nathan
dc.contributor.author Tabone, Nicole
dc.contributor.author Ljubešić, Nikola
dc.contributor.author Vale, Oto
dc.contributor.author Cook, Paul
dc.contributor.author Yan, Peiyi
dc.contributor.author Gantar, Polona
dc.contributor.author Ehren, Rafael
dc.contributor.author Fabri, Ray
dc.contributor.author Ibrahim, Rehab
dc.contributor.author Ramisch, Renata
dc.contributor.author Walles, Rinat
dc.contributor.author Wilkens, Rodrigo
dc.contributor.author Urizar, Ruben
dc.contributor.author Sun, Ruilong
dc.contributor.author Malka, Ruth
dc.contributor.author Galea, Sara Anne
dc.contributor.author Stymne, Sara
dc.contributor.author Louizou, Sevasti
dc.contributor.author Hu, Sha
dc.contributor.author Taslimipoor, Shiva
dc.contributor.author Ratori, Shraddha
dc.contributor.author Srivastava, Shubham
dc.contributor.author Cordeiro, Silvio Ricardo
dc.contributor.author Krek, Simon
dc.contributor.author Liu, Siyuan
dc.contributor.author Zeng, Si
dc.contributor.author Yu, Songping
dc.contributor.author Arhar Holdt, Špela
dc.contributor.author Markantonatou, Stella
dc.contributor.author Papadelli, Stella
dc.contributor.author Leseva, Svetlozara
dc.contributor.author Kuzman, Taja
dc.contributor.author Kavčič, Teja
dc.contributor.author Lynn, Teresa
dc.contributor.author Lichte, Timm
dc.contributor.author Pickard, Thomas
dc.contributor.author Dimitrova, Tsvetana
dc.contributor.author Yih, Tsy
dc.contributor.author Güngör, Tunga
dc.contributor.author Dinç, Tutkum
dc.contributor.author Iñurrieta, Uxoa
dc.contributor.author Tajalli, Vahide
dc.contributor.author Stefanova, Valentina
dc.contributor.author Caruso, Valeria
dc.contributor.author Puri, Vandana
dc.contributor.author Foufi, Vassiliki
dc.contributor.author Barbu Mititelu, Verginica
dc.contributor.author Vincze, Veronika
dc.contributor.author Kovács, Viktória
dc.contributor.author Shukla, Vishakha
dc.contributor.author Giouli, Voula
dc.contributor.author Ge, Xiaomin
dc.contributor.author Ha-Cohen Kerner, Yaakov
dc.contributor.author Öztürk, Yağmur
dc.contributor.author Yarandi, Yalda
dc.contributor.author Parmentier, Yannick
dc.contributor.author Zhang, Yongchen
dc.contributor.author Zhao, Yun
dc.contributor.author Urešová, Zdeňka
dc.contributor.author Yirmibeşoğlu, Zeynep
dc.contributor.author Qin, Zhenzhen
dc.contributor.author Stank
dc.contributor.author Cristescu, Mihaela
dc.contributor.author Zgreabăn, Bianca-Mădălina
dc.contributor.author Bărbulescu, Elena-Andreea
dc.contributor.author Stanković, Ranka
dc.date.accessioned 2023-05-10T11:36:56Z
dc.date.available 2023-05-10T11:36:56Z
dc.date.issued 2023-05-10
dc.identifier.uri http://hdl.handle.net/11372/LRT-5124
dc.description This multilingual resource contains corpora in which verbal MWEs have been manually annotated. VMWEs include idioms (let the cat out of the bag), light-verb constructions (make a decision), verb-particle constructions (give up), inherently reflexive verbs (help oneself), and multi-verb constructions (make do). This is the first release of the corpora without an associated shared task. Previous version (1.2) was associated with the PARSEME Shared Task on semi-supervised Identification of Verbal MWEs (2020). The data covers 26 languages corresponding to the combination of the corpora for all previous three editions (1.0, 1.1 and 1.2) of the corpora. VMWEs were annotated according to the universal guidelines. The corpora are provided in the cupt format, inspired by the CONLL-U format. Morphological and syntactic information, ­­­­including parts of speech, lemmas, morphological features and/or syntactic dependencies, are also provided. Depending on the language, the information comes from treebanks (e.g., Universal Dependencies) or from automatic parsers trained on treebanks (e.g., UDPipe). All corpora are split into training, development and test data, following the splitting strategy adopted for the PARSEME Shared Task 1.2. The annotation guidelines are available online: https://parsemefr.lis-lab.fr/parseme-st-guidelines/1.3 The .cupt format is detailed here: https://multiword.sourceforge.net/cupt-format/
dc.language.iso ara
dc.language.iso bul
dc.language.iso ces
dc.language.iso deu
dc.language.iso ell
dc.language.iso eng
dc.language.iso spa
dc.language.iso eus
dc.language.iso fas
dc.language.iso fra
dc.language.iso gle
dc.language.iso heb
dc.language.iso hin
dc.language.iso hrv
dc.language.iso hun
dc.language.iso lit
dc.language.iso ita
dc.language.iso mlt
dc.language.iso pol
dc.language.iso por
dc.language.iso ron
dc.language.iso slv
dc.language.iso srp
dc.language.iso swe
dc.language.iso tur
dc.language.iso zho
dc.publisher PARSEME
dc.relation.isreferencedby https://aclanthology.org/2023.mwe-1.6/
dc.relation.replaces http://hdl.handle.net/11234/1-3367
dc.rights PARSEME Corpora v. 1.3 - Licence Agreement
dc.rights.uri https://lindat.mff.cuni.cz/repository/xmlui/page/licence-mwe-1.3
dc.source.uri https://gitlab.com/parseme/corpora/-/wikis/home
dc.subject multiword expressions
dc.subject verbal multiword expressions
dc.subject light verb construction
dc.subject verb-particle constructions
dc.subject inherently reflexive verbs
dc.subject verbal idioms
dc.subject multi-verb constructions
dc.title PARSEME corpora annotated for verbal multiword expressions (version 1.3)
dc.type corpus
metashare.ResourceInfo#ContentInfo.mediaType text
dc.rights.label PUB
has.files yes
branding LRT + Open Submissions
demo.uri http://parseme.grew.fr/
contact.person Agata Savary agata.savary@universite-paris-saclay.fr Université Paris Saclay
contact.person Carlos Ramisch carlos.ramisch@lis-lab.fr Aix Marseille University
size.info 455629 sentences
size.info 9264811 tokens
size.info 127498 multiWordUnits
files.size 134135315
files.count 27


 Files in this item

 Download all files in item (127.92 MB)
This item is
Publicly Available
and licensed under:
PARSEME Corpora v. 1.3 - Licence Agreement
GNU General Public License, version 3.0 Distributed under Creative Commons
Icon
Name
README.md
Size
7.08 KB
Format
Unknown
Description
General README file
MD5
5902de46b35f82c79183b20d67ab13de
 Download file
Icon
Name
AR.tgz
Size
10.78 MB
Format
application/x-gzip
Description
Arabic files
MD5
73fe213c348928f5eb49a635a6f02a01
 Download file  Preview
 File Preview  
  • AR
    • README.md4 kB
    • split-stats.txt1010 B
    • test-stats.md177 B
    • test.cupt6 MB
    • dev.cupt2 MB
    • total-stats.md180 B
    • dev-stats.md161 B
    • train-stats.md183 B
    • train.cupt39 MB
Icon
Name
BG.tgz
Size
6.48 MB
Format
application/x-gzip
Description
Bulgarian files
MD5
7ccee1056d5621a9b509cf727a678525
 Download file  Preview
 File Preview  
  • BG
    • README.md3 kB
    • split-stats.txt1 kB
    • test-stats.md180 B
    • test.cupt8 MB
    • dev.cupt2 MB
    • total-stats.md182 B
    • dev-stats.md176 B
    • train-stats.md186 B
    • train.cupt30 MB
Icon
Name
CS.tgz
Size
12.86 MB
Format
application/x-gzip
Description
Czech files
MD5
9fe9764dc970e2c646049533a81ccda6
 Download file  Preview
 File Preview  
  • CS
    • README.md4 kB
    • split-stats.txt1 kB
    • test-stats.md147 B
    • test.cupt9 MB
    • dev.cupt2 MB
    • total-stats.md149 B
    • dev-stats.md142 B
    • train-stats.md153 B
    • train.cupt69 MB
Icon
Name
DE.tgz
Size
2.25 MB
Format
application/x-gzip
Description
German files
MD5
eaee4a615ce4abd74aab58ea72d5c12e
 Download file  Preview
 File Preview  
  • DE
    • README.md5 kB
    • split-stats.txt1 kB
    • test-stats.md201 B
    • test.cupt2 MB
    • dev.cupt881 kB
    • total-stats.md205 B
    • dev-stats.md198 B
    • train-stats.md210 B
    • train.cupt8 MB
Icon
Name
EL.tgz
Size
10.6 MB
Format
application/x-gzip
Description
Greek files
MD5
125789048de3a0ee764cc3d9f34bc854
 Download file  Preview
 File Preview  
  • EL
    • README.md8 kB
    • split-stats.txt1 kB
    • test-stats.md183 B
    • test.cupt7 MB
    • dev.cupt2 MB
    • total-stats.md199 B
    • dev-stats.md166 B
    • train-stats.md204 B
    • train.cupt53 MB
Icon
Name
EN.tgz
Size
1.59 MB
Format
application/x-gzip
Description
English files
MD5
b8c356eefeb174e0984f6c7b1188dba9
 Download file  Preview
 File Preview  
  • EN
    • README.md3 kB
    • split-stats.txt1 kB
    • test-stats.md217 B
    • test.cupt3 MB
    • dev.cupt1 MB
    • total-stats.md215 B
    • dev-stats.md211 B
    • train-stats.md216 B
    • train.cupt1 MB
Icon
Name
ES.tgz
Size
2.09 MB
Format
application/x-gzip
Description
Spanish files
MD5
588b050f3cd655d1dd6df000b0d702da
 Download file  Preview
 File Preview  
  • ES
    • README.md4 kB
    • split-stats.txt1 kB
    • test-stats.md211 B
    • test.cupt3 MB
    • dev.cupt1 MB
    • total-stats.md211 B
    • dev-stats.md187 B
    • train-stats.md198 B
    • train.cupt7 MB
Icon
Name
EU.tgz
Size
2.02 MB
Format
application/x-gzip
Description
Basque files
MD5
5b9d3da6fcdce7e800b1c1ea07eb6ef1
 Download file  Preview
 File Preview  
  • EU
    • README.md6 kB
    • split-stats.txt1 kB
    • test-stats.md152 B
    • test.cupt4 MB
    • dev.cupt1 MB
    • total-stats.md151 B
    • dev-stats.md149 B
    • train-stats.md153 B
    • train.cupt4 MB
Icon
Name
FA.tgz
Size
703.09 KB
Format
application/x-gzip
Description
Farsi files
MD5
d0459becd9d685241b241384ec79ad57
 Download file  Preview
 File Preview  
  • FA
    • README.md5 kB
    • split-stats.txt1 kB
    • test-stats.md127 B
    • test.cupt1 MB
    • dev.cupt358 kB
    • total-stats.md140 B
    • dev-stats.md125 B
    • train-stats.md145 B
    • train.cupt2 MB
Icon
Name
FR.tgz
Size
6.12 MB
Format
application/x-gzip
Description
French files
MD5
755009c7e5ba96e74cedc14ec802eb2b
 Download file  Preview
 File Preview  
  • FR
    • README.md7 kB
    • split-stats.txt1 kB
    • test-stats.md180 B
    • test.cupt7 MB
    • dev.cupt2 MB
    • total-stats.md181 B
    • dev-stats.md164 B
    • train-stats.md186 B
    • train.cupt23 MB
Icon
Name
GA.tgz
Size
494.12 KB
Format
application/x-gzip
Description
Irish files
MD5
cb2b193f7ce5bd60a77ba55efbd8232f
 Download file  Preview
 File Preview  
  • GA
    • README.md4 kB
    • split-stats.txt1 kB
    • test-stats.md202 B
    • test.cupt1 MB
    • dev.cupt546 kB
    • total-stats.md200 B
    • dev-stats.md195 B
    • train-stats.md197 B
    • train.cupt512 kB
Icon
Name
HE.tgz
Size
5.26 MB
Format
application/x-gzip
Description
Hebrew files
MD5
f2e883e1a108a3888fb2628d769b9c3c
 Download file  Preview
 File Preview  
  • HE
    • README.md5 kB
    • split-stats.txt1 kB
    • test-stats.md169 B
    • test.cupt5 MB
    • dev.cupt1 MB
    • total-stats.md172 B
    • dev-stats.md166 B
    • train-stats.md175 B
    • train.cupt21 MB
Icon
Name
HI.tgz
Size
469.3 KB
Format
application/x-gzip
Description
Hindi files
MD5
1d8dbf79b80326f797d517f3f993d04d
 Download file  Preview
 File Preview  
  • HI
    • README.md3 kB
    • split-stats.txt998 B
    • test-stats.md163 B
    • test.cupt1 MB
    • dev.cupt647 kB
    • total-stats.md161 B
    • dev-stats.md159 B
    • train-stats.md161 B
    • train.cupt824 kB
Icon
Name
HR.tgz
Size
1.98 MB
Format
application/x-gzip
Description
Croatian files
MD5
951cd6b5948ee8e1aa6a9a4a8bf41336
 Download file  Preview
 File Preview  
  • HR
    • README.md3 kB
    • split-stats.txt1 kB
    • test-stats.md198 B
    • test.cupt3 MB
    • dev.cupt1 MB
    • total-stats.md199 B
    • dev-stats.md176 B
    • train-stats.md182 B
    • train.cupt5 MB
Icon
Name
HU.tgz
Size
1.88 MB
Format
application/x-gzip
Description
Hungarian files
MD5
a1153a044795ee7a9151e0ad2f9e25c1
 Download file  Preview
 File Preview  
  • HU
    • README.md3 kB
    • split-stats.txt1 kB
    • test-stats.md192 B
    • test.cupt5 MB
    • dev.cupt1 MB
    • total-stats.md191 B
    • dev-stats.md189 B
    • train-stats.md193 B
    • train.cupt4 MB
Icon
Name
IT.tgz
Size
4.67 MB
Format
application/x-gzip
Description
Italian files
MD5
565fb5c73667b4ac55e8aacf20680501
 Download file  Preview
 File Preview  
  • IT
    • README.md10 kB
    • split-stats.txt1 kB
    • test-stats.md232 B
    • test.cupt5 MB
    • dev.cupt1 MB
    • total-stats.md251 B
    • dev-stats.md224 B
    • train-stats.md253 B
    • train.cupt16 MB
Icon
Name
LT.tgz
Size
2.98 MB
Format
application/x-gzip
Description
Lithuanian files
MD5
8f94517eebae1216e80ea6effc97a91a
 Download file  Preview
 File Preview  
  • LT
    • README.md5 kB
    • split-stats.txt1 kB
    • test-stats.md151 B
    • test.cupt10 MB
    • dev.cupt3 MB
    • total-stats.md148 B
    • dev-stats.md146 B
    • train-stats.md149 B
    • train.cupt3 MB
Icon
Name
MT.tgz
Size
2.78 MB
Format
application/x-gzip
Description
Maltese files
MD5
12ee7b2105eeac324386c859a7ef7816
 Download file  Preview
 File Preview  
  • MT
    • README.md6 kB
    • split-stats.txt864 B
    • test-stats.md174 B
    • test.cupt3 MB
    • dev.cupt1 MB
    • total-stats.md192 B
    • dev-stats.md139 B
    • train-stats.md169 B
    • train.cupt7 MB
Icon
Name
PL.tgz
Size
6.99 MB
Format
application/x-gzip
Description
Polish files
MD5
bdae0922e513f36c000b47360980ffc9
 Download file  Preview
 File Preview  
  • PL
    • README.md12 kB
    • split-stats.txt1 kB
    • test-stats.md166 B
    • test.cupt6 MB
    • dev.cupt2 MB
    • total-stats.md167 B
    • dev-stats.md163 B
    • train-stats.md172 B
    • train.cupt30 MB
Icon
Name
PT.tgz
Size
7.59 MB
Format
application/x-gzip
Description
Portuguese files
MD5
2c96f436546787f976e20a2022abf516
 Download file  Preview
 File Preview  
  • PT
    • README.md10 kB
    • split-stats.txt1 kB
    • test-stats.md180 B
    • test.cupt6 MB
    • dev.cupt2 MB
    • total-stats.md182 B
    • dev-stats.md162 B
    • train-stats.md184 B
    • train.cupt28 MB
Icon
Name
RO.tgz
Size
12.33 MB
Format
application/x-gzip
Description
Romanian files
MD5
7efcbd0b9902d925c11f014b6ccd3c18
 Download file  Preview
 File Preview  
  • RO
    • README.md4 kB
    • split-stats.txt1 kB
    • test-stats.md185 B
    • test.cupt29 MB
    • dev.cupt10 MB
    • total-stats.md184 B
    • dev-stats.md180 B
    • train-stats.md186 B
    • train.cupt35 MB
Icon
Name
SL.tgz
Size
8.35 MB
Format
application/x-gzip
Description
Slovenian files
MD5
6933ab467e6bef5e52d0656075e42618
 Download file  Preview
 File Preview  
  • SL
    • README.md3 kB
    • split-stats.txt1 kB
    • test-stats.md181 B
    • test.cupt15 MB
    • dev.cupt4 MB
    • total-stats.md180 B
    • dev-stats.md175 B
    • train-stats.md184 B
    • train.cupt24 MB
Icon
Name
SR.tgz
Size
1.11 MB
Format
application/x-gzip
Description
Serbian files
MD5
0ad8cad8ca462ea837445d2166bc722a
 Download file  Preview
 File Preview  
  • SR
    • README.md2 kB
    • split-stats.txt1 kB
    • test-stats.md165 B
    • test.cupt2 MB
    • dev.cupt1017 kB
    • total-stats.md162 B
    • dev-stats.md160 B
    • train-stats.md166 B
    • train.cupt2 MB
Icon
Name
SV.tgz
Size
1.44 MB
Format
application/x-gzip
Description
Swedish files
MD5
5c71eb09a2bb773b21141a13e8e40a88
 Download file  Preview
 File Preview  
  • SV
    • README.md4 kB
    • split-stats.txt1 kB
    • test-stats.md205 B
    • test.cupt3 MB
    • dev.cupt1 MB
    • total-stats.md203 B
    • dev-stats.md198 B
    • train-stats.md206 B
    • train.cupt4 MB
Icon
Name
TR.tgz
Size
4.55 MB
Format
application/x-gzip
Description
Turkish files
MD5
1c36bfd64fba1d93f9deca35e3272ed1
 Download file  Preview
 File Preview  
  • TR
    • README.md6 kB
    • split-stats.txt671 B
    • test-stats.md131 B
    • test.cupt5 MB
    • dev.cupt1 MB
    • total-stats.md144 B
    • dev-stats.md129 B
    • train-stats.md149 B
    • train.cupt20 MB
Icon
Name
ZH.tgz
Size
9.61 MB
Format
application/x-gzip
Description
Chinese files
MD5
362b4150e0fda49a0915130bc85a6712
 Download file  Preview
 File Preview  
  • ZH
    • README.md7 kB
    • split-stats.txt1 kB
    • test-stats.md183 B
    • test.cupt2 MB
    • dev.cupt952 kB
    • total-stats.md189 B
    • dev-stats.md181 B
    • train-stats.md193 B
    • train.cupt34 MB

Show simple item record