Commit 681727718acccf4a63d6dca1cb4c682a05217abb
1 parent
a6478f31
Added Świgra
Showing
122 changed files
with
111222 additions
and
0 deletions
Too many changes to show.
To preserve performance only 2 of 122 files are displayed.
swigra/disambiguator-pcfg/README
0 → 100644
1 | +Katalog zawiera pliki źródłowe dezambiguatora dla lasów składniowych w formacie Świgra/Składnica oraz: | |
2 | + | |
3 | +pcfg-grammar.pkl - gramatyka pcfg slużąca do dezambiguacji | |
4 | +skrypt make-model.py służy do stworzenia nowej gamatyki | |
5 | + | |
6 | +skrypt disamb-tree.py służy do automatycznej anotacji ujednoznacznionego drzewa | |
7 | + | |
8 | +sposób użycia: | |
9 | + | |
10 | + python disamb-tree.py sciezka/do/nazwa_pliku_wejsciowego.xml | |
11 | + skrypt utworzy plik sciezka/do/nazwa_pliku_wejscioweg-disamb.xml | |
0 | 12 | \ No newline at end of file |
... | ... |
swigra/disambiguator-pcfg/disamb-tree.py
0 → 100644
1 | +# -*- encoding: utf-8 -*- | |
2 | +__author__ = 'nika' | |
3 | + | |
4 | +from xml.sax import make_parser | |
5 | +from treeparser import TreeParser | |
6 | +import pickle | |
7 | +import sys | |
8 | +import xml.dom.minidom | |
9 | +import codecs | |
10 | + | |
11 | +try: | |
12 | + f = sys.argv[1] | |
13 | +except IndexError: | |
14 | + print("ERROR: no filename given") | |
15 | + exit() | |
16 | + | |
17 | +# parse xml file for future processing | |
18 | +dom = xml.dom.minidom.parse(sys.argv[1]) | |
19 | +nodes = dom.getElementsByTagName("node") | |
20 | + | |
21 | +if nodes: # if there is anything to disambiguate | |
22 | + # load grammar | |
23 | +# grammar = pickle.load(open("grammars/pcfg-tfw-130718.pkl")) | |
24 | + grammar = pickle.load(open("grammars/pcfg-tfw-150326.pkl")) | |
25 | + # make parser, parse tree | |
26 | + parser = make_parser() | |
27 | + handler = TreeParser(f) | |
28 | + parser.setContentHandler(handler) | |
29 | + parser.parse(f) | |
30 | + tree = handler.getTree() | |
31 | + | |
32 | + # disambiguation | |
33 | + tree.act_pcfg(grammar) | |
34 | + disamb_nodes = tree.getDisambNodes() | |
35 | + | |
36 | +# update chosen nodes (if any) | |
37 | +for node in nodes: | |
38 | + if node.attributes["nid"].value in disamb_nodes: | |
39 | + node.attributes["chosen"] = "true" | |
40 | + children_all = node.getElementsByTagName("children") | |
41 | + for children in children_all: | |
42 | + chosen = True | |
43 | + for child in children.getElementsByTagName("child"): | |
44 | + if child.attributes["nid"].value not in disamb_nodes: | |
45 | + chosen = False | |
46 | + if chosen: | |
47 | + children.attributes["chosen"] = "true" | |
48 | + else: | |
49 | + node.attributes["chosen"] = "false" | |
50 | + | |
51 | +new_f = sys.argv[1].rsplit(".", 1)[0] + "-disamb.xml" | |
52 | +print "saving in :", new_f | |
53 | +open(new_f, 'w').write(codecs.encode(dom.toxml(), 'utf-8')) | |
... | ... |