Soubory tohoto záznamu
Stáhnout všechny soubory záznamu (732.96 KB)- Název
- w2c.tar.gz
- Velikost
- 165.85 KB
- Formát
- application/x-gzip
- Popis
- W2C toolset source code
- MD5
- 747d9fabca38d085e976950193029ca3
- w2c-src
- pipes
- vertical.sh476 B
- Makefile52 B
- strip-html-tags.php423 B
- wiki-remove-interwiki.sh409 B
- checkRequirements.sh149 B
- frequencyList.sh384 B
- ngrams.pl1 kB
- wordLengths.pl793 B
- textStats.pl7 kB
- Makefile1 kB
- tests
- config.sh783 B
- boilerpipe
- Makefile1 kB
- Makefile112 B
- wac
- Makefile2 kB
- create-url-list.sh648 B
- langDetect
- Makefile17 B
- test.sh1 kB
- webAPI
- Makefile60 B
- test.sh1 kB
- README396 B
- checkRequirements.sh2 kB
- scripts
- lab
- copy-results-to-single-node.sh2 kB
- web-mini-corpora-lab.sh1 kB
- delete-created-files.sh425 B
- process-file-wrapper.sh1006 B
- copy-to-ufallab.sh346 B
- process-results-fix-wrapper.sh2 kB
- missing-languages.sh565 B
- process-file.sh1 kB
- info-words.sh633 B
- process-results-fix.sh7 kB
- fill-corpora-quotas-wrapper.sh415 B
- process-results.sh9 kB
- build-package-wrapper.sh1 kB
- process-results-wrapper.sh2 kB
- process-results-overview.sh14 kB
- run-serial.sh495 B
- run-parallel.sh467 B
- build-package.sh5 kB
- fill-corpora-quotas.sh2 kB
- download-wikipedias.sh1 kB
- ufallab
- config.sh321 B
- copy-files-from-webhosting.sh980 B
- merge-results.sh4 kB
- generate-stats.sh15 kB
- extract-data.sh535 B
- lab
- data
- tools
- ethnologueParser
- extractInfo.pl2 kB
- ethnologueParser.sh1 kB
- fillDB.sh1 kB
- splitter
- splitter.pl2 kB
- splitter.sh247 B
- webAPI
- webAPI.sh2 kB
- crawlerSimple
- Makefile61 B
- crawlerSimple.sh4 kB
- internetSize
- internetSize.sh6 kB
- corpusAnalysis
- generateCAHTML.sh1007 B
- frequencyList.pl1 kB
- config.sh206 B
- corpusAnalysis.sh969 B
- grep
- checkRequirements.sh292 B
- Makefile318 B
- search
- google.pl2 kB
- ua.txt1 kB
- langDetect
- train.pl5 kB
- filter.pl2 kB
- config.sh109 B
- createLanguageModel.sh3 kB
- train.sh194 B
- detector.pl2 kB
- detect.pl3 kB
- detect.sh196 B
- eval.pl7 kB
- eval.sh192 B
- convert.pl1 kB
- wikiCorpora
- wikiCorpora.sh2 kB
- wikiCorpora.pl602 B
- aspellCoverage
- aspellCoverage.sh3 kB
- README208 B
- massExecute
- massExecute.sh237 B
- massExecute.pl1 kB
- Makefile356 B
- langList
- config.sh154 B
- Makefile47 B
- langList.sh201 B
- langInfo.sh2 kB
- langList.pl5 kB
- README55 B
- languages.2010-12-12.all161 kB
- languages.2010-12-12.wiki14 kB
- fillLangDB
- fillFiles01.sh26 kB
- fillLanguageInfo.sh710 B
- Makefile108 B
- fillAliases.sh1 kB
- fillAspellDictionaries.sh1 kB
- utils
- mergeAllStatistics.sh7 kB
- cleanFile.sh1 kB
- xpath
- xpath.sh94 B
- xpath.pl663 B
- normalize.pl1 kB
- processFile.sh2 kB
- processFiles.sh2 kB
- expandSearch.pl1 kB
- config.sh2 kB
- wikiExternalLinks
- wikiExternalLinks.sh2 kB
- wikiMiniCorpora
- Makefile183 B
- wikiMiniCorpora.sh2 kB
- ethnologueParser
- builder
- html2text.pl795 B
- parser.pl6 kB
- links.txt349 B
- regenerateTexts.sh2 kB
- create-corpora.pl45 kB
- log-analyzer.pl7 kB
- tidy.sh339 B
- links.small.txt43 B
- Makefile1 kB
- create-corpora.sh3 kB
- README1 kB
- packData.sh2 kB
- charter.sh8 kB
- Common.pm17 kB
- config.xml489 B
- extract-results.sh361 B
- controller.pl5 kB
- keeper.sh3 kB
- crawler.pl9 kB
- create-corpora-local.sh403 B
- detector.pl7 kB
- lib
- WikiParser.pm3 kB
- LanguageModel.pm652 B
- Makefile15 B
- t
- LangDetect.t587 B
- Utils.pm149 B
- LangDetect.pm538 B
- experiments
- aspell-coverage-01.sh6 kB
- word-lengths-01.sh4 kB
- lang-stats-01.sh4 kB
- web-mini-corpora.sh562 B
- wiki-vs-internet-size-01.sh7 kB
- visualizations
- splitter.pl536 B
- transpose.sh411 B
- table.pl5 kB
- dataStats.sh165 B
- dataStats.pl784 B
- dataCombiner.pl1 kB
- listDiffScore.pl1 kB
- plot.sh848 B
- bin
- wikiExternalLinks.sh0 B
- normalize.pl0 B
- splitter.pl0 B
- crawlerSimple.sh0 B
- langList.sh0 B
- xpath.sh0 B
- wikiCorpora.sh0 B
- corpusAnalysis.sh0 B
- aspellCoverage.sh0 B
- wikiMiniCorpora.sh0 B
- langInfo.sh0 B
- webAPI.sh0 B
- generateCAHTML.sh0 B
- pipes
- Název
- tr46.pdf
- Velikost
- 567.11 KB
- Formát
- Popis
- Technical Report - documentation
- MD5
- 824ef862d75b40fc324d54b13a592ee1