MFO.java 8.43 KB
package is2.data;

import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map.Entry;

import is2.util.DB;

/**
 * Map Features, do not map long to integer
 *
 * @author Bernd Bohnet, 20.09.2009
 */

final public class MFO implements IEncoderPlus {

	/** The features and its values */
	static private final HashMap<String, HashMap<String, Integer>> m_featureSets = new HashMap<String, HashMap<String, Integer>>();

	/** The feature class and the number of values */
	static private final HashMap<String, Integer> m_featureCounters = new HashMap<String, Integer>();

	/** The number of bits needed to encode a feature */
	static final HashMap<String, Integer> m_featureBits = new HashMap<String, Integer>();

	final public static String NONE = "<None>";

	final public static class Data4 {
		public int shift;
		public short a0, a1, a2, a3, a4, a5, a6, a7, a8, a9;
		public int v0, v1, v2, v3, v4, v5, v6, v7, v8, v9;

		final public long calcs(int b, long v, long l) {
			if (l < 0)
				return l;
			l |= v << shift;
			shift += b;
			return l;
		}

	}

	public MFO() {
	}

	/**
	 * Register an attribute class, if it not exists and add a possible value
	 * 
	 * @param type
	 * @param type2
	 */
	@Override
	final public int register(String a, String v) {

		HashMap<String, Integer> fs = getFeatureSet().get(a);
		if (fs == null) {
			fs = new HashMap<String, Integer>();
			getFeatureSet().put(a, fs);
			fs.put(NONE, 0);
			getFeatureCounter().put(a, 1);
		}
		Integer c = getFeatureCounter().get(a);

		Integer i = fs.get(v);
		if (i == null) {
			fs.put(v, c);
			c++;
			getFeatureCounter().put(a, c);
			return c - 1;
		} else
			return i;
	}

	/**
	 * Calculates the number of bits needed to encode a feature
	 */
	public void calculateBits() {

		for (Entry<String, Integer> e : getFeatureCounter().entrySet()) {
			int bits = (int) Math.ceil((Math.log(e.getValue() + 1) / Math.log(2)));
			m_featureBits.put(e.getKey(), bits);
		}

		// System.out.println("total number of needed bits "+total);
	}

	@Override
	public String toString() {

		StringBuffer content = new StringBuffer();
		for (Entry<String, Integer> e : getFeatureCounter().entrySet()) {
			content.append(e.getKey() + " " + e.getValue());
			content.append(':');
			// HashMap<String,Integer> vs = getFeatureSet().get(e.getKey());
			content.append(getFeatureBits(e.getKey()));

			/*
			 * if (vs.size()<120) for(Entry<String,Integer> e2 : vs.entrySet())
			 * { content.append(e2.getKey()+" ("+e2.getValue()+") "); }
			 */
			content.append('\n');

		}
		return content.toString();
	}

	static final public long calcs(Data4 d, int b, long v, long l) {
		if (l < 0)
			return l;
		l |= v << d.shift;
		d.shift += b;
		return l;
	}

	static final public short getFeatureBits(String a) {
		return (short) m_featureBits.get(a).intValue();
	}

	/**
	 * Get the integer place holder of the string value v of the type a
	 *
	 * @param t
	 *            the type
	 * @param v
	 *            the value
	 * @return the integer place holder of v
	 */
	@Override
	final public int getValue(String t, String v) {

		if (m_featureSets.get(t) == null)
			return -1;
		Integer vi = m_featureSets.get(t).get(v);
		if (vi == null)
			return -1; // stop &&
		return vi.intValue();
	}

	/**
	 * Static version of getValue
	 * 
	 * @see getValue
	 */
	static final public int getValueS(String a, String v) {

		if (m_featureSets.get(a) == null)
			return -1;
		Integer vi = m_featureSets.get(a).get(v);
		if (vi == null)
			return -1; // stop &&
		return vi.intValue();
	}

	public int hasValue(String a, String v) {

		Integer vi = m_featureSets.get(a).get(v);
		if (vi == null)
			return -1;
		return vi.intValue();
	}

	final public long calc2(Data4 d) {

		if (d.v0 < 0 || d.v1 < 0)
			return -1;
		// if (d.v1<0||d.v2<0) return -1;

		long l = d.v0;
		short shift = d.a0;
		l |= (long) d.v1 << shift;
		shift += d.a1;
		// l |= (long)d.v2<<shift;
		d.shift = shift;

		// d.shift=;
		return l;
	}

	final public long calc3(Data4 d) {

		if (d.v0 < 0 || d.v1 < 0 || d.v2 < 0)
			return -1;
		// if (d.v1<0||d.v2<0) return -1;

		long l = d.v0;
		short shift = d.a0;
		l |= (long) d.v1 << shift;
		shift += d.a1;
		l |= (long) d.v2 << shift;
		d.shift = shift + d.a2;

		// d.shift=;
		return l;
	}

	final public long calc4(Data4 d) {
		if (d.v0 < 0 || d.v1 < 0 || d.v2 < 0 || d.v3 < 0)
			return -1;

		long l = d.v0;
		int shift = d.a0;
		l |= (long) d.v1 << shift;
		shift += d.a1;
		l |= (long) d.v2 << shift;
		shift += d.a2;
		l |= (long) d.v3 << shift;
		d.shift = shift + d.a3;

		return l;
	}

	final public long calc5(Data4 d) {

		if (d.v0 < 0 || d.v1 < 0 || d.v2 < 0 || d.v3 < 0 || d.v4 < 0)
			return -1;

		long l = d.v0;
		int shift = d.a0;
		l |= (long) d.v1 << shift;
		shift += d.a1;
		l |= (long) d.v2 << shift;
		shift += d.a2;
		l |= (long) d.v3 << shift;
		shift += d.a3;
		l |= (long) d.v4 << shift;
		d.shift = shift + d.a4;

		return l;
	}

	static final public long calc6(Data4 d) {

		if (d.v0 < 0 || d.v1 < 0 || d.v2 < 0 || d.v3 < 0 || d.v4 < 0 || d.v5 < 0)
			return -1;

		long l = d.v0;
		int shift = d.a0;
		l |= (long) d.v1 << shift;
		shift += d.a1;
		l |= (long) d.v2 << shift;
		shift += d.a2;
		l |= (long) d.v3 << shift;
		shift += d.a3;
		l |= (long) d.v4 << shift;
		shift += d.a4;
		l |= (long) d.v5 << shift;
		d.shift = shift + d.a5;

		return l;
	}

	final public long calc7(Data4 d) {

		if (d.v0 < 0 || d.v1 < 0 || d.v2 < 0 || d.v3 < 0 || d.v4 < 0 || d.v5 < 0 || d.v6 < 0)
			return -1;

		long l = d.v0;
		int shift = d.a0;
		l |= (long) d.v1 << shift;
		shift += d.a1;
		l |= (long) d.v2 << shift;
		shift += d.a2;
		l |= (long) d.v3 << shift;
		shift += d.a3;
		l |= (long) d.v4 << shift;
		shift += d.a4;
		l |= (long) d.v5 << shift;
		shift += d.a5;
		l |= (long) d.v6 << shift;
		d.shift = shift + d.a6;

		return l;
	}

	final public long calc8(Data4 d) {

		if (d.v0 < 0 || d.v1 < 0 || d.v2 < 0 || d.v3 < 0 || d.v4 < 0 || d.v5 < 0 || d.v6 < 0 || d.v7 < 0)
			return -1;

		long l = d.v0;
		int shift = d.a0;
		l |= (long) d.v1 << shift;
		shift += d.a1;
		l |= (long) d.v2 << shift;
		shift += d.a2;
		l |= (long) d.v3 << shift;
		shift += d.a3;
		l |= (long) d.v4 << shift;
		shift += d.a4;
		l |= (long) d.v5 << shift;
		shift += d.a5;
		l |= (long) d.v6 << shift;
		shift += d.a6;
		l |= (long) d.v7 << shift;
		d.shift = shift + d.a7;

		return l;
	}

	/**
	 * Maps a long to a integer value. This is very useful to save memory for
	 * sparse data long values
	 * 
	 * @param node
	 * @return the integer
	 */
	static public int misses = 0;
	static public int good = 0;

	/**
	 * Write the data
	 * 
	 * @param dos
	 * @throws IOException
	 */
	public void writeData(DataOutputStream dos) throws IOException {
		dos.writeInt(getFeatureSet().size());
		for (Entry<String, HashMap<String, Integer>> e : getFeatureSet().entrySet()) {
			dos.writeUTF(e.getKey());
			dos.writeInt(e.getValue().size());

			for (Entry<String, Integer> e2 : e.getValue().entrySet()) {

				if (e2.getKey() == null)
					DB.println("key " + e2.getKey() + " value " + e2.getValue() + " e -key " + e.getKey());
				dos.writeUTF(e2.getKey());
				dos.writeInt(e2.getValue());

			}

		}
	}

	public void read(DataInputStream din) throws IOException {

		int size = din.readInt();
		for (int i = 0; i < size; i++) {
			String k = din.readUTF();
			int size2 = din.readInt();

			HashMap<String, Integer> h = new HashMap<String, Integer>();
			getFeatureSet().put(k, h);
			for (int j = 0; j < size2; j++) {
				h.put(din.readUTF(), din.readInt());
			}
			getFeatureCounter().put(k, size2);
		}

		calculateBits();
	}

	/**
	 * Clear the data
	 */
	static public void clearData() {
		getFeatureSet().clear();
		m_featureBits.clear();
		getFeatureSet().clear();
	}

	@Override
	public HashMap<String, Integer> getFeatureCounter() {
		return m_featureCounters;
	}

	static public HashMap<String, HashMap<String, Integer>> getFeatureSet() {
		return m_featureSets;
	}

	static public String[] reverse(HashMap<String, Integer> v) {
		String[] set = new String[v.size()];
		for (Entry<String, Integer> e : v.entrySet()) {
			set[e.getValue()] = e.getKey();
		}
		return set;
	}

}