test.ml 8.41 KB
(*
 *  ENIAM_LCGparser, a parser for Logical Categorial Grammar formalism
 *  Copyright (C) 2016 Wojciech Jaworski <wjaworski atSPAMfree mimuw dot edu dot pl>
 *  Copyright (C) 2016 Institute of Computer Science Polish Academy of Sciences
 *
 *  This library is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Lesser General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *)

open ENIAM_LCGtypes
open Xstd

type entry =
    Basic of grammar_symbol
  | Raised of grammar_symbol

let examples = [
  "kot",[
    0, 1, "Ala","Ala","subst",   Basic(Tensor[Atom "np"; Atom "nom"]);
    1, 2, "ma","mieć","fin",     Basic(ImpSet(Tensor[Atom "ip"],[Both,Tensor[Atom "np"; Atom "nom"];Both,Tensor[Atom "np"; Atom "acc"]]));
    (* 1, 2, "ma","mieć","fin",     Basic(Imp(Imp(Tensor[Atom "ip"],Backward,Tensor[Atom "np"; Atom "nom"]),Forward,Tensor[Atom "np"; Atom "nom"])); *)
    2, 3, "kota","kot","subst",  Basic(Tensor[Atom "np"; Atom "acc"]);
    3, 4, ".",".","interp",      Basic(Imp(Tensor[Atom "<root>"],Backward,Tensor[Atom "ip"]));
  ],4;

  "rudy",[
    0, 1, "Ala","Ala","subst",   Basic(Tensor[Atom "np"; Atom "nom"]);
    1, 2, "ma","mieć","fin",     Basic(ImpSet(Tensor[Atom "ip"],[Both,Tensor[Atom "np"; Atom "nom"];Both,Tensor[Atom "np"; Atom "acc"]]));
    2, 3, "rudego","rudy","adj", Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"A",Tensor[Atom "adjp"; AVar "case"]));
    3, 4, "kota","kot","subst",  Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",ImpSet(Tensor[Atom "np"; AVar "case"],[Backward,Maybe(Tensor[Atom "adjp"; AVar "case"])])));
    (* 3, 4, "kota","kot","subst",  Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",ImpSet(Tensor[Atom "np"; AVar "case"],[Backward,Tensor[Atom "adjp"; AVar "case"]]))); *)
    4, 5, ".",".","interp",      Basic(Imp(Tensor[Atom "<root>"],Backward,Tensor[Atom "ip"]));
    ],5;

  "jaki",[
    0, 1, "Jakiego","jaki","adj",Raised(WithVar("case",With[Atom "gen"; Atom "acc"],"A",ImpSet(ImpSet(Tensor[Atom "cp"; Atom "int"; Atom "jaki"],
                                                                                             [Forward,Imp(Tensor[Atom "ip"],Forward,Tensor[Atom "np"; AVar "case"])]),
                                                                                             [Forward,Imp(Tensor[Atom "np"; AVar "case"],Backward,Tensor[Atom "adjp"; AVar "case"])])));
    1, 2, "kota","kot","subst",  Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",ImpSet(Tensor[Atom "np"; AVar "case"],[Backward,Maybe(Tensor[Atom "adjp"; AVar "case"])])));
    (* 1, 2, "kota","kot","subst",  Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",Imp(Tensor[Atom "np"; AVar "case"],Backward,Maybe(Tensor[Atom "adjp"; AVar "case"])))); *)
    (* 1, 2, "kota","kot","subst",  Basic(WithVar("case",With[Atom "gen"; Atom "acc"],"B",ImpSet(Tensor[Atom "np"; AVar "case"],[Backward,Tensor[Atom "adjp"; AVar "case"]]))); *)
    2, 3, "Ala","Ala","subst",   Basic(Tensor[Atom "np"; Atom "nom"]);
    3, 4, "ma","mieć","fin",     Basic(ImpSet(Tensor[Atom "ip"],[Both,Tensor[Atom "np"; Atom "nom"];Both,Tensor[Atom "np"; Atom "acc"]]));
    4, 5, "?","?","interp",      Basic(Imp(Tensor[Atom "<root>"],Backward,Tensor[Atom "cp";Atom "int";Top]));
  ],5;

  "ocean",[
    0, 1, "Wpłynąłem","wpłynąć","praet",   Basic(Imp(Tensor[Atom "ip"],Forward,Tensor[Atom "prepnp"; Atom "acc"]));
    1, 2, "na","na","prep",     Basic(Imp(Tensor[Atom "prepnp";Atom "acc"],Forward,Tensor[Atom "np"; Atom "acc"]));
    2, 3, "suchego","suchy","adj", Basic(Tensor[Atom "adjp"; Atom "gen"]);
    3, 4, "przestwór","przestwór","subst",  Basic(Imp(Tensor[Atom "np"; Atom "acc"],Forward,Tensor[Atom "np"; Atom "gen"]));
    4, 5, "oceanu","ocean","subst",  Basic(Imp(Tensor[Atom "np"; Atom "gen"],Backward,Tensor[Atom "adjp"; Atom "gen"]));
    5, 6, ".",".","interp",      Basic(Imp(Tensor[Atom "<root>"],Backward,Tensor[Atom "ip"]));
    ],6;
]

let create_chart tokens last =
  ENIAM_LCGrenderer.reset_variable_numbers ();
  let chart = ENIAM_LCGchart.make last in
  let chart = Xlist.fold tokens chart (fun chart (lnode,rnode,orth,lemma,pos,entry) ->
      ENIAM_LCGrenderer.reset_variable_names ();
      ENIAM_LCGrenderer.add_variable_numbers ();
      let syntax,semantics = match entry with
        Basic syntax ->
          let node = {ENIAM_LCGrenderer.empty_node with
                      orth=orth; lemma=lemma; pos=pos;
                      symbol=ENIAM_LCGrenderer.make_symbol syntax} in
          let semantics = ENIAM_LCGrenderer.make_term node syntax in
          ENIAM_LCGrenderer.simplify (syntax,semantics)
      | Raised syntax ->
          let node = {ENIAM_LCGrenderer.empty_node with
                      orth=orth; lemma=lemma; pos=pos;
                      symbol=ENIAM_LCGrenderer.make_raised_symbol syntax} in
          let outer_node = {ENIAM_LCGrenderer.empty_node with
                            orth=""; lemma=lemma; pos="";
                            symbol=ENIAM_LCGrenderer.make_symbol syntax} in
          let semantics = ENIAM_LCGrenderer.make_raised_term node outer_node syntax in
          ENIAM_LCGrenderer.simplify (syntax,semantics) in
      let lf = if lnode = 0 then true else false in
      let rf = if rnode = last then true else false in
      ENIAM_LCGchart.add chart lnode rnode (Bracket(lf,rf,syntax),semantics) 0) in
  chart

let create_text_fragments tokens last =
  let text_fragments = Array.make last IntMap.empty in
  Xlist.iter tokens (fun (lnode,rnode,orth,lemma,pos,entry) ->
    text_fragments.(lnode) <- IntMap.add text_fragments.(lnode) rnode orth);
  Int.iter_down 0 (last - 1) (fun i ->
    let map = IntMap.fold text_fragments.(i) text_fragments.(i) (fun map j orth ->
      if j = last then map else
      IntMap.fold text_fragments.(j) map (fun map k orth2 ->
        IntMap.add map k (orth ^ " " ^ orth2))) in
    text_fragments.(i) <- map);
  text_fragments

let test_example (name,tokens,last) =
  ENIAM_LCGreductions.reset_variant_label ();
  let chart = create_chart tokens last in
  let text_fragments = create_text_fragments tokens last in
  ENIAM_LCGlatexOf.print_chart "results/" (name^"1_chart") "a3" text_fragments chart;
  let chart,references = ENIAM_LCGchart.lazify chart in
  ENIAM_LCGlatexOf.print_chart "results/" (name^"2_chart") "a4" text_fragments chart;
  ENIAM_LCGlatexOf.print_references "results/" (name^"2_references") "a4" references;
  let chart = ENIAM_LCGchart.parse chart references 30. Sys.time in (* uwaga: niejawna zmiana imperatywna w references *)
  ENIAM_LCGlatexOf.print_chart "results/" (name^"3_chart") "a4" text_fragments chart;
  ENIAM_LCGlatexOf.print_references "results/" (name^"3_references") "a4" references;
  if ENIAM_LCGchart.is_parsed chart then (
    let term = ENIAM_LCGchart.get_parsed_term chart in
    Xlatex.latex_file_out "results/" (name^"4_term") "a4" false (fun file ->
        Printf.fprintf file "\\[%s\\]\n" (ENIAM_LCGlatexOf.linear_term 0 term));
    Xlatex.latex_compile_and_clean "results/" (name^"4_term");
    let dependency_tree = ENIAM_LCGreductions.reduce term references in
    ENIAM_LCGlatexOf.print_dependency_tree "results/" (name^"4_dependency_tree") "a0" dependency_tree;
    if ENIAM_LCGreductions.is_reduced_dependency_tree dependency_tree then (
        ENIAM_LCGreductions.assign_labels dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *)
        ENIAM_LCGlatexOf.print_dependency_tree "results/" (name^"5_dependency_tree") "a4" dependency_tree;
        ENIAM_LCGreductions.remove_cuts dependency_tree; (* uwaga: niejawna zmiana imperatywna w dependency_tree *)
        ENIAM_LCGlatexOf.print_dependency_tree "results/" (name^"6_dependency_tree") "a4" dependency_tree;
        ENIAM_LCGgraphOf.print_dependency_tree "results/" (name^"6_dependency_tree") dependency_tree;
        ENIAM_LCGgraphOf.print_simplified_dependency_tree "results/" (name^"6_simple_dependency_tree") dependency_tree;
        ())
    else print_endline "not reduced")
  else print_endline "not parsed"

let _ =
  Xlist.iter examples test_example