Commit 7688bb05efebbc741649dbd798183a3099f96234
1 parent
0bd70dd2
Uporządkowanie plików ze Świgrą
Showing
130 changed files
with
0 additions
and
118002 deletions
Too many changes to show.
To preserve performance only 2 of 130 files are displayed.
swigra/disambiguator-pcfg/README deleted
1 | -Katalog zawiera pliki źródłowe dezambiguatora dla lasów składniowych w formacie Świgra/Składnica oraz: | |
2 | - | |
3 | -pcfg-grammar.pkl - gramatyka pcfg slużąca do dezambiguacji | |
4 | -skrypt make-model.py służy do stworzenia nowej gamatyki | |
5 | - | |
6 | -skrypt disamb-tree.py służy do automatycznej anotacji ujednoznacznionego drzewa | |
7 | - | |
8 | -sposób użycia: | |
9 | - | |
10 | - python disamb-tree.py sciezka/do/nazwa_pliku_wejsciowego.xml | |
11 | - skrypt utworzy plik sciezka/do/nazwa_pliku_wejscioweg-disamb.xml | |
12 | 0 | \ No newline at end of file |
swigra/disambiguator-pcfg/disamb-tree.py deleted
1 | -# -*- encoding: utf-8 -*- | |
2 | -__author__ = 'nika' | |
3 | - | |
4 | -from xml.sax import make_parser | |
5 | -from treeparser import TreeParser | |
6 | -import pickle | |
7 | -import sys | |
8 | -import xml.dom.minidom | |
9 | -import codecs | |
10 | - | |
11 | -try: | |
12 | - f = sys.argv[1] | |
13 | -except IndexError: | |
14 | - print("ERROR: no filename given") | |
15 | - exit() | |
16 | - | |
17 | -# parse xml file for future processing | |
18 | -dom = xml.dom.minidom.parse(sys.argv[1]) | |
19 | -nodes = dom.getElementsByTagName("node") | |
20 | - | |
21 | -if nodes: # if there is anything to disambiguate | |
22 | - # load grammar | |
23 | -# grammar = pickle.load(open("grammars/pcfg-tfw-130718.pkl")) | |
24 | - grammar = pickle.load(open("grammars/pcfg-tfw-150326.pkl")) | |
25 | - # make parser, parse tree | |
26 | - parser = make_parser() | |
27 | - handler = TreeParser(f) | |
28 | - parser.setContentHandler(handler) | |
29 | - parser.parse(f) | |
30 | - tree = handler.getTree() | |
31 | - | |
32 | - # disambiguation | |
33 | - tree.act_pcfg(grammar) | |
34 | - disamb_nodes = tree.getDisambNodes() | |
35 | - | |
36 | -# update chosen nodes (if any) | |
37 | -for node in nodes: | |
38 | - if node.attributes["nid"].value in disamb_nodes: | |
39 | - node.attributes["chosen"] = "true" | |
40 | - children_all = node.getElementsByTagName("children") | |
41 | - for children in children_all: | |
42 | - chosen = True | |
43 | - for child in children.getElementsByTagName("child"): | |
44 | - if child.attributes["nid"].value not in disamb_nodes: | |
45 | - chosen = False | |
46 | - if chosen: | |
47 | - children.attributes["chosen"] = "true" | |
48 | - else: | |
49 | - node.attributes["chosen"] = "false" | |
50 | - | |
51 | -new_f = sys.argv[1].rsplit(".", 1)[0] + "-disamb.xml" | |
52 | -print "saving in :", new_f | |
53 | -open(new_f, 'w').write(codecs.encode(dom.toxml(), 'utf-8')) |