Available Languages
Each language in MuNeCo is set up as a corpus on its own. Therefore, you first have to select a language before you can search.
Language | Size | NLP Status | ||
---|---|---|---|---|
select | Adyghe | 4M | ||
select | Afrikaans | 8.1M | tld | |
select | Albanian, Tosk | 8.9M | tl | |
select | Amharic | 1.4M | ||
select | Arab, Standard | 16M | tld | |
select | Aragonese | 817k | ||
select | Armenian | 9.1M | tld | |
select | Asturian | 4.1M | xl | |
select | Aymara, Central | 2.2k | ||
select | Azerbaijani, Northern | 5M | ||
select | Bashkir | 3.4M | ||
select | Basque | 32M | tld | |
select | Belarusian | 11M | tld | |
select | Bengali | 1.3M | ||
select | Breton | 667k | ||
select | Bulgarian | 4.9M | tld | |
select | Buryat, Russian | 1.8M | ||
select | Catalan | 32M | tld | |
select | Chechen | 614k | ||
select | Chuvash | 2.5M | ||
select | Crimean Tatar | 199k | ||
select | Croatian | 16M | tld | |
select | Czech | 29M | tld | |
select | Danish | 12M | tld | |
select | Dari | 5.4M | ||
select | Dutch | 32M | tld | |
select | English | 32M | tld | |
select | Estonian | 5.6M | tld | |
select | Faroese | 4.9M | tld | |
select | Finnish | 7.9M | tld | |
select | French | 11M | tld | |
select | Frisian, West | 6M | tl | |
select | Friulian | 2.7M | ||
select | Gagauz | 320k | ||
select | Galician | 32M | tld | |
select | Ganda | 5.3M | ||
select | Georgian | 3.9M | ||
select | German | 15M | tld | |
select | Greek | 3.3M | tld | |
select | Guaraní, Paraguayan | 548k | ||
select | Haitian Creole | 252k | ||
select | Hausa | 4.4M | ||
select | Hebrew | 9.6M | tld | |
select | Hungarian | 14M | tld | |
select | Ibanag | 28k | ||
select | Icelandic | 24M | tld | |
select | Igbo | 1M | ||
select | Ilocano | 780k | ||
select | Indonesian | 2.5M | tld | |
select | Inuktitut, Greenlandic | 7.8M | ||
select | Irish | 3M | tld | |
select | Italian | 23M | tld | |
select | Kabuverdianu | 64k | ||
select | Kalmyk Oirat | 635k | ||
select | Karakalpak | 1.1M | ||
select | Kazakh | 3.1M | ||
select | Khmer, Central | 811k | ||
select | Kinyarwanda | 2.6M | ||
select | Kirghiz | 3M | ||
select | Korean | 4.2M | tld | |
select | Kurdish, Central | 585k | ||
select | Ladin | 1.4M | ||
select | Ladino | 753k | ||
select | Lao | 490k | ||
select | Latvian | 13M | tld | |
select | Lezghian | 565k | ||
select | Lithuanian | 1.5M | tld | |
select | Luxembourgish | 7.9M | ||
select | Macedonian | 4M | tl | |
select | Malayalam | 6.7M | ||
select | Malaysian, Standard | 5.8M | ||
select | Maltese | 14M | td | |
select | Maori | 2.4M | ||
select | Mauritian Kreol | 978k | ||
select | Maya, Yucatán | 287k | ||
select | Mirandese | 192k | ||
select | Moksha | 367k | ||
select | Mongolian | 1.6M | ||
select | Ndebele | 1.7M | ||
select | Ndonga | 4.7M | ||
select | Nenets | 513k | ||
select | Norwegian | 31M | tld | |
select | Nyanja | 559k | ||
select | Occitan | 3.5M | ||
select | Oromo | 290k | ||
select | Ossetian | 134k | ||
select | Pangasinan | 52k | ||
select | Papiamento | 3.2M | ||
select | Pashto, Northern | 2.3M | ||
select | Pnar | 1.1M | ||
select | Polish | 13M | tld | |
select | Portuguese | 32M | tld | |
select | Romani, Balkan | 23k | ||
select | Romanian | 5.3M | tld | |
select | Romansch | 3.3M | ||
select | Russian | 13M | tld | |
select | Sami, Nortern | 1.5M | tld | |
select | Samoan | 2.9M | ||
select | Sardinian, Logudorese | 536k | ||
select | Serbian | 2.5M | tld | |
select | Shona | 1.8M | ||
select | Slovak | 25M | tld | |
select | Slovenian | 15M | tld | |
select | Somali | 3.3M | ||
select | Sorbian, Upper | 1.3M | ||
select | Spanish | 32M | tld | |
select | Swahili | 17M | ||
select | Swedish | 11M | tld | |
select | Tagalog | 7.4M | ||
select | Tajiki | 5.2M | ||
select | Tamil | 979k | tld | |
select | Tatar | 4.9M | ||
select | Tetun | 3.2M | ||
select | Tigrinya | 1.4M | ||
select | Tok Pisin | 349k | ||
select | Tongan | 263k | ||
select | Tswana | 1.4M | ||
select | Turkish | 3.1M | tld | |
select | Turkmen | 2.6M | ||
select | Tuva | 1M | ||
select | Ucranian | 7.5M | tld | |
select | Udmurt | 720k | ||
select | Urdu | 5.5M | tld | |
select | Uyghur | 4.8M | tld | |
select | Uzbek, Northern | 2.5M | ||
select | Veps | 103k | ||
select | Vietnamese | 5.6M | tld | |
select | Võro | 374k | ||
select | Waray-Waray | 16k | ||
select | Welsh | 8.2M | tld | |
select | Wolof | 17k | ||
select | Yakut | 2.2M | ||
select | Yiddish, Eastern | 2M | ||
select | Yoruba | 1.1M | ||
select | Zazaki, Kirmanjki | 75k | ||
select | Zulu | 711k |
136 languages - 845M tokens in total