Instances.java 8.2 KB
package is2.data;

import java.util.BitSet;

import is2.io.CONLLReader09;
import is2.util.DB;



public class Instances  {

	
	public IEncoder m_encoder;

	
	protected int size=0;
	
	protected int capacity;
	
	public int[][] forms;

	public int[][] plemmas;
	public int[][] glemmas;


	public short[][] heads;
	public short[][] pheads;

	public short[][] labels;
	public short[][] plabels;

	public short[][] gpos;
	public short[][] pposs;


	public short[][][] feats;


	public int[][] predicat;


	public short[][] predicateId;


	public short[][] semposition;


	public short[][][] arg;


	public short[][][] argposition;


	public BitSet[] pfill;


	public short[][] gfeats;
	public short[][] pfeats;


	public Instances() {}

		
	

	public static int m_unkown = 0;
	public static int m_count = 0;


	public static boolean m_report;
	public static boolean m_found =false;

	
	final public void setForm(int i, int p, String x) {
		
	
		forms[i][p] = m_encoder.getValue(PipeGen.WORD,x);
		if (forms[i][p]==-1) {
			if (m_report) System.out.println("unkwrd "+x); 
			m_unkown++;
			m_found=true;
		} 
		m_count++;
	}
	


	final public void setRel(int i, int p, String x) {
		labels[i][p] = (short)m_encoder.getValue(PipeGen.REL,x);
		
	}
	

	final public void setHead(int i, int c, int p) {
		heads[i][c] =(short)p;
	}

	final public int size() {	
		return size;
	}
	public void setSize(int n) {
		size=n;		
	}




	public void init(int ic, IEncoder mf) {
		init(ic, mf, -1);
	}


	public void init(int ic, IEncoder mf, int version) {
		capacity =ic;
		m_encoder = mf;
		
		forms = new int[capacity][];
		plemmas = new int[capacity][];
		glemmas = new int[capacity][];
		pposs= new short[capacity][];
	
		gpos= new short[capacity][];
		labels= new short[capacity][];
		heads= new short[capacity][];
		plabels= new short[capacity][];
		pheads= new short[capacity][];
		feats = new short[capacity][][];
		gfeats = new short[capacity][];
		pfeats = new short[capacity][];
		
		predicat =new int[ic][];
		predicateId = new short[ic][];
		semposition = new short[ic][];
		arg= new short[ic][][];
		argposition= new short[ic][][];
		
		pfill = new BitSet[ic];
	}


	public int length(int i) {
		return forms[i].length;
	}


	public int createInstance09(int length) {
		
		forms[size] = new int[length];
		plemmas[size] = new int[length];
		glemmas[size] = new int[length];
	
		pposs[size] = new short[length];
		
		gpos[size] = new short[length];
	
		labels[size] = new short[length];
		heads[size] = new short[length];
		
		this.pfill[size] = new BitSet(length);
		
		feats[size] = new short[length][];
		gfeats[size] = new short[length];
		pfeats[size] = new short[length];
		plabels[size] = new short[length];
		pheads[size] = new short[length];
		
		size++;
		
		return size-1;
		
	}

/*
	public final void setPPos(int i, int p, String x) {
		ppos[i][p] = (short)m_encoder.getValue(PipeGen.POS,x);
		
	}
*/

	public final void setPPoss(int i, int p, String x) {
		pposs[i][p] = (short)m_encoder.getValue(PipeGen.POS,x);
		
	}


	public final void setGPos(int i, int p, String x) {
		gpos[i][p] = (short)m_encoder.getValue(PipeGen.POS,x);
	}


	public void setLemma(int i, int p, String x) {
		plemmas[i][p] = m_encoder.getValue(PipeGen.WORD,x);	
	}


	public void setGLemma(int i, int p, String x) {
		glemmas[i][p] = m_encoder.getValue(PipeGen.WORD,x);	
	}


	public void setFeats(int i, int p, String[] fts) {
		if (fts==null) {
			feats[i][p] =null;
			return ;
		}
		feats[i][p] = new short[fts.length];
		
		for(int k=0;k<fts.length;k++) {
			feats[i][p][k] =  (short)m_encoder.getValue(PipeGen.FEAT,fts[k]);		
		}
		
	}


	public void setFeature(int i, int p, String feature) {
		if (feature==null) return;	
		this.gfeats[i][p]= (short) m_encoder.getValue(PipeGen.FFEATS,feature);
/*		if (gfeats[i][p]==-1) {
			System.out.println("+"+feature);
			new Exception().printStackTrace();
			System.exit(0);
		}
		*/
	}
	public void setPFeature(int i, int p, String feature) {
		if (feature==null) return;	
		this.pfeats[i][p]= (short) m_encoder.getValue(PipeGen.FFEATS,feature);
	}


	public int getWValue(String v) {
		return m_encoder.getValue(PipeGen.WORD, v);
	}


	public final void setPRel(int i, int p, String x) {
		plabels[i][p] = (short)m_encoder.getValue(PipeGen.REL,x);	
	}


	public final void setPHead(int i, int c, int p) {
		pheads[i][c] =(short)p;
	}

/*
	public String toString(int c) {
		StringBuffer s = new StringBuffer();
		for(int i=0;i<length(c);i++) {
			s.append(i).append('\t').append(forms[c][i]).append("\t_\t").append(ppos[c][i]).append('\t').
			append('\t').append(heads[c][i]).append('\n');
		}
		
		return s.toString();
	}
*/

	/*
	public void setPos(int i, int p, String x) {
		ppos[i][p] = (short)m_encoder.getValue(PipeGen.POS,x);
		
	}
*/

	/**
	 * Create the semantic representation
	 * @param inst
	 * @param it
	 * @return
	 */
	public boolean createSem(int inst, SentenceData09 it) {
		
		boolean error = false;
		
		if (it.sem==null) return error;
		
		predicat[inst] = new int[it.sem.length];
		semposition[inst] = new short[it.sem.length];
		predicateId[inst] = new short[it.sem.length];
	
		if (it.sem!=null) {
			arg[inst] = new short[it.sem.length][];
			argposition[inst] =new short[it.sem.length][];
		}
		if (it.sem==null) return error;
		
		// init sems
	
		
		
		
		for(int i=0;i<it.sem.length;i++) {
	
			String pred;
			short predSense =0;
			if (it.sem[i].indexOf('.')>0) {
				pred = it.sem[i].substring(0, it.sem[i].indexOf('.'));
				predSense = (short)m_encoder.getValue(PipeGen.SENSE, it.sem[i].substring(it.sem[i].indexOf('.')+1, it.sem[i].length()));
				//Short.parseShort(it.sem[i].substring(it.sem[i].indexOf('.')+1, it.sem[i].length()));
			} else {
				pred = it.sem[i];	
				predSense=(short)m_encoder.getValue(PipeGen.SENSE, "");
			}
		
			predicat[inst][i] = m_encoder.getValue(PipeGen.PRED, pred);
			predicateId[inst][i] = predSense;
			 
			semposition[inst][i]=(short)it.semposition[i];
			
			// this can happen too when no arguments have values
			if (it.arg==null) {
			//	DB.println("error  arg == null "+i+" sem"+it.sem[i]+" inst number "+inst);
			//	error =true;
				continue;
			}
			
	
			// last pred(s) might have no argument 
			if (it.arg.length<=i) {
			//	DB.println("error in instance "+inst+" argument list and number of predicates different arg lists: "+it.arg.length+" preds "+sem.length);
			//	error =true;
				continue;
			}
			
			
			// this happens from time to time, if the predicate has no arguments
			if (it.arg[i]==null) {
			//	DB.println("error no args for pred "+i+" "+it.sem[i]+" length "+it.ppos.length);
				//		error =true;
				continue;
			}
			
			int argCount=it.arg[i].length;
			arg[inst][i] = new short[it.arg[i].length];
			argposition[inst][i] = new short[it.arg[i].length];
			
			// add the content of the argument
			for(int a=0;a<argCount;a++) {
				arg[inst][i][a]=(short)m_encoder.getValue(PipeGen.ARG, it.arg[i][a]);
				argposition[inst][i][a]=(short)it.argposition[i][a];
	
				//System.out.print(" #"+a+" pos: "+argposition[inst][i][a]+"  "+it.arg[i][a]+" ");
			}
			//System.out.println("");
			
		}
		
		return error;
		
	}


	public int predCount(int n) {
		return pfill[n].cardinality();
	}


	/**
	 * @param pscnt
	 * @return
	 */
	public String print(int pscnt) {
		StringBuilder s = new StringBuilder();
		
		for(int i=0;i<this.length(pscnt);i++) {
			s.append(i+"\t"+forms[pscnt][i]+"\t"+this.glemmas[pscnt][i]+"\t"+this.plemmas[pscnt][i]+"\t"+this.gpos[pscnt][i]+"\t"
					+this.pposs[pscnt][i]+"\t"+this.gfeats[pscnt][i]+"\t"+(this.feats[pscnt][i]!=null&&this.feats[pscnt][i].length>0?this.feats[pscnt][i][0]:null)+
					"\t l "+(labels[pscnt]!=null&&labels[pscnt].length>i?labels[pscnt][i]:null)+"\t"+
					"\t"+heads[pscnt][i]+"\t"+
					(plabels[pscnt]!=null&&plabels[pscnt].length>i?plabels[pscnt][i]:null)+
					"\t"+this.predicat[pscnt][i]+"\n");
		}
		return s.toString();
	}

	public String print1(int pscnt) {
		StringBuilder s = new StringBuilder();
		
		for(int i=0;i<this.length(pscnt);i++) {
			s.append(i+"\t"+forms[pscnt][i]+"\t"+"\t"+this.plemmas[pscnt][i]+"\t"+
					+this.pposs[pscnt][i]+
					"\t l "+(labels[pscnt]!=null&&labels[pscnt].length>i?labels[pscnt][i]:null)+"\t"+
					"\t"+heads[pscnt][i]+"\t"+
					(plabels[pscnt]!=null&&plabels[pscnt].length>i?plabels[pscnt][i]:null)+
					"\n");
		}
		return s.toString();
	}

	
	
	
}