Commit 681727718acccf4a63d6dca1cb4c682a05217abb

Authored by Jan Lupa
1 parent a6478f31

Added Świgra

Showing 122 changed files with 111222 additions and 0 deletions

Too many changes to show.

To preserve performance only 2 of 122 files are displayed.

swigra/disambiguator-pcfg/README 0 → 100644
  1 +Katalog zawiera pliki źródłowe dezambiguatora dla lasów składniowych w formacie Świgra/Składnica oraz:
  2 +
  3 +pcfg-grammar.pkl - gramatyka pcfg slużąca do dezambiguacji
  4 +skrypt make-model.py służy do stworzenia nowej gamatyki
  5 +
  6 +skrypt disamb-tree.py służy do automatycznej anotacji ujednoznacznionego drzewa
  7 +
  8 +sposób użycia:
  9 +
  10 + python disamb-tree.py sciezka/do/nazwa_pliku_wejsciowego.xml
  11 + skrypt utworzy plik sciezka/do/nazwa_pliku_wejscioweg-disamb.xml
0 12 \ No newline at end of file
... ...
swigra/disambiguator-pcfg/disamb-tree.py 0 → 100644
  1 +# -*- encoding: utf-8 -*-
  2 +__author__ = 'nika'
  3 +
  4 +from xml.sax import make_parser
  5 +from treeparser import TreeParser
  6 +import pickle
  7 +import sys
  8 +import xml.dom.minidom
  9 +import codecs
  10 +
  11 +try:
  12 + f = sys.argv[1]
  13 +except IndexError:
  14 + print("ERROR: no filename given")
  15 + exit()
  16 +
  17 +# parse xml file for future processing
  18 +dom = xml.dom.minidom.parse(sys.argv[1])
  19 +nodes = dom.getElementsByTagName("node")
  20 +
  21 +if nodes: # if there is anything to disambiguate
  22 + # load grammar
  23 +# grammar = pickle.load(open("grammars/pcfg-tfw-130718.pkl"))
  24 + grammar = pickle.load(open("grammars/pcfg-tfw-150326.pkl"))
  25 + # make parser, parse tree
  26 + parser = make_parser()
  27 + handler = TreeParser(f)
  28 + parser.setContentHandler(handler)
  29 + parser.parse(f)
  30 + tree = handler.getTree()
  31 +
  32 + # disambiguation
  33 + tree.act_pcfg(grammar)
  34 + disamb_nodes = tree.getDisambNodes()
  35 +
  36 +# update chosen nodes (if any)
  37 +for node in nodes:
  38 + if node.attributes["nid"].value in disamb_nodes:
  39 + node.attributes["chosen"] = "true"
  40 + children_all = node.getElementsByTagName("children")
  41 + for children in children_all:
  42 + chosen = True
  43 + for child in children.getElementsByTagName("child"):
  44 + if child.attributes["nid"].value not in disamb_nodes:
  45 + chosen = False
  46 + if chosen:
  47 + children.attributes["chosen"] = "true"
  48 + else:
  49 + node.attributes["chosen"] = "false"
  50 +
  51 +new_f = sys.argv[1].rsplit(".", 1)[0] + "-disamb.xml"
  52 +print "saving in :", new_f
  53 +open(new_f, 'w').write(codecs.encode(dom.toxml(), 'utf-8'))
... ...