DATA CLASSES
Perl Python Java
DOCUMENT
import the class use Udapi::Core::Document; import Document from udapi.core.document import cz.ufal.udapi.core.impl.DefaultDocument;
import cz.ufal.udapi.core.Document;
• construction my $doc = Udapi::Core::Document->new() doc = Document() Document doc = new DefaultDocument();
• load a document from a file $doc->load_conllu('test.conllu') doc.load_conllu('test.conllu') doc = new CoNLLUReader("test.conllu").readDocument();
• save a document to a file $doc->save_conllu('test.conllu') doc.store_conllu('test.conllu') CoNLLUWriter coNLLUWriter = new CoNLLUWriter();
coNLLUWriter.writeDocument(doc, Paths.get("test.conllu"));
• get all bundles in the document my @buns = $doc->bundles buns = doc.bundles # list version
for bundle in doc: # iterator version
List<Bundle> = doc.getBundles()
• create a new bundle and append it to the end of the document my $bun = $doc->create_bundle() bun = doc.create_bundle() Bundle bun = doc.createBundle();
• delete document (free memory) $doc->destroy() # not needed // not needed
BUNDLE
import the class use Udapi::Core::Bundle; import Bundle from udapi.core.document import cz.ufal.udapi.core.Bundle;
import cz.ufal.udapi.core.impl.DefaultBundle;
• construction my $bun = Udapi::Core::Bundle->new() bun = Bundle() Bundle bun = new DefaultBundle(doc)
• get all trees contained in the bundle my @trees = $bun->trees trees = bun.trees # list version
for tree in bun: # iterator version
List<Root> trees = bun.getTrees();
• get the tree with a given zone my $root = $bun->get_tree($zone) root = bun.get_tree(zone) Optional<Root> root = bun.getTree(zone);
• create a new tree in the bundle my $root = $bun->create_tree($zone) root = bun.create_tree(zone) bun.createTree();
• add an existing tree to the bundle $bun->add_tree($root) bun.add_tree(root) bun.addTree(root);
• document in which the bundle is contained $bun->document bun.document bun.getDocument();
• get id of the bundle (any string) my $id = $bun->id id = bun.id bun.getId();
• get 1-based index of the bundle within document my $index = $bun->number index = bun.number int index = bun.getNumber()
• remove (=delete) a bundle $bundle->remove() bundle.remove() bundle.remove();
NODE
import the class use Udapi::Core::Node; import Node from udapi.core.node import cz.ufal.udapi.core.Node;
import cz.ufal.udapi.core.imple.DefaultNode;
• constructor my $node = Udapi::Core::Node->new(); node = Node() Node node = new DefaultNode(tree);
• get an attribute value my $lemma = $node->lemma lemma = node.lemma String lemma = node.getLemma();
• write an attribute value $node->set_lemma($lemma); node.lemma = lemma node.setLemma(lemma);
• get the node's parent my $parent = $node->parent parent = node.parent Optional<Node> parent = node.getParent();
• set the node's parent $node->set_parent($new_parent) node.set_parent(new_parent) node.setParent(newParent);
• create a node (as the last node in the whole tree) my $child = $node->create_child() child = node.create_child() Node child = node.createChild();
• remove a node (delete it and all its descendants) $node->remove() node.remove() node.remove();
• get node's descendants (sorted by ord) my @nodes = $node->descendants() nodes = node.descendants() List<Node> nodes = node.getDescendants();
• get all node's children (sorted by ord) my @nodes = $node->children() nodes = node.children() List<Node> nodes = node.getChildren();
• get the last child or the node itself my $node = $node->children({last_only=>1, add_self=>1}) List<Node> nodes = node.getChildren(EnumSet.of(Node.ChildrenArg.ADD_SELF, Node.ChildrenArg.LAST_ONLY));
• shift (=reorder) a node (with its whole subtree) after a target node $node->shift_after_node($target) node.shift_after_node(target) node.shiftAfterNode(target);
• shift a node without its subtree before a target node's subtree $node->shift_before_subtree($target, {without_children=>1}) node.shift_before_subtree(target,without_children=1) node.shiftBeforeSubtree(target, EnumSet.of(Node.ShiftArg.WITHOUT_CHILDREN);
• Is the given node a descendant (transitive child) of a given target node my $bool = $node->is_descendant_of($target) bool = node.is_descendant_of(target) boolean bool = node.isDescendantOf(target);
• get full id of the node (root_address # node_id) my $address = $node->address address = node.address() node.getAddress();
other methods zone, bundle, root, document, is_root, next_node, prev_node, precedes($another_node), get_attrs(@names) getZone(), getBundle(), getRoot(), getDocument(), isRoot(), getNextNode(), getPrevNode(), precedes(anotherNode)
ROOT
import the class use Udapi::Core::Node::Root; import Rode from udapi.core.node import cz.ufal.udapi.core.Root;
• get the bundle in which the tree is located my $bun = $root->bundle bun = root.bundle Bundle bun = root.getBundle();
• remove the whole tree from the bundle $root->remove() root.remove() root.remove();
• get sentence string (if stored) my $sen = $root->sentence sen = root.sentence String sen = root.getSentence();
• get zone of the tree my $zone = $root->zone zone = root.zone String zone = root.getZone();
• set zone of the tree $root->set_zone($zone) root.set_zone(zone) root.setZone(zone);
• copy tree my $new_root = $root->copy_tree() Root newRoot = root.copyTree();
• get full id of the root ("bundle_id/zone" or "bundle_id" if zone is empty) my $address = $root->address address = root.address() root.getAddress();
• create a node (as the last node in the whole tree) my $child = $root->create_child() child = root.create_child() Node child = root.createChild();
• access root's lemma (or form, upos, xpos, feats, deprel, deps), should return a placeholder '<ROOT>' my $lemma = $root->lemma lemma = root.lemma String lemma = root.getLemma();
• calling shift_* methods on root (e.g. shift_after_node) results in exception "Cannot call shift_* methods on root" Shift methods can be called on root.getNod()
all (other) methods of Node yes yes Root is composite, root.getNode() implements all the methods of the Node
PROCESSING CLASSES
Perl Python Java
BLOCK:
basic processing unit
use Udapi::Core::Block; import Block from udapi.core.block import cz.ufal.udapi.core.Block;
construction (of a derived class) use Udapi::Block::Dummy; import Dummy from udapi.block.dummy import cz.ufal.udapi.block.Dummy;
block methods process_(document|bundle|tree|node|start|end) _should_process_(bundle|tree) process_(document|bundle|tree|node|start|end) process(Document|Bundle|Tree|Node|Start|End) (before|after)Process(Bundle|Tree|Document)
block parameters zones
RUNNER: (support for the command-line execution) use Udapi::Core::Run; import Run from udapi.core.run import cz.ufal.udapi.core.Run
COMMAND LINE EXECUTION
Perl Python Java
Util::Eval cat in.conllu | udapi.pl Read::CoNLLU Util::Eval node='say $.lemma' cat in.conllu | udapi.groovy Read::CoNLLU Util::Eval node='println c.lemma'
conllu to txt (Bash) cat in.conllu | udapi.pl Read::CoNLLU Write::TextModeTrees > out.txt cat in.conllu | udapi.groovy Read::CoNLLU Write::TextModeTrees > out.txt
conllu to txt (PowerShell) Use Lucida Sans Unicode in Powershell ISE
cmd.exe /c
echo [Console]::OutputEncoding = [System.Text.Encoding]::UTF8
$OutputEncoding = New-Object -typename System.Text.UTF8Encoding
Get-Content -encoding utf8 in.conllu | out-string |
  groovy .\bin\udapi.groovy
  Read::CoNLLU Write::TextModeTrees