RandomIndex.java 4.81 KB
/**
 * 
 */
package is2.data;

import java.util.BitSet;

import is2.util.DB;


/**
 * @author Dr. Bernd Bohnet, 20.05.2011
 * 
 * 
 */
public class RandomIndex implements Long2IntInterface {
	
	
	final int[] prims = {52349171,199951347,89990,5001,32891,17,19,23,29,31,37,47,53,59,61,67,71};
//	final int[] prims = {1,3,5,7,11,17,19,23,29,31,37,47,53,59,61,67,71};
	
    final long hashFunctionModifiers[]; 

    final int kbit,lbit; 
    final int hsize ; // maximal size of hash
    
    final int bits; // available bits
    final int moves; // needed moves to put a number into  
 
    
	/**
	 * Creates the random functions.
	 * 
	 * @param kbit The bits to be mapped
	 * @param lbit The left shift of the bits
	 * @param hsize The size of the featurs space (not included in the original algorithm)
	 * @param numberFunctions The number of the hash functions
	 */
	public RandomIndex(int kbit, int lbit, int hsize, int numberFunctions) {
		
		
		this.kbit =kbit;
		this.lbit =lbit;

		
		if (hsize<=0) this.hsize = 67000001; // default value 
		else 		  this.hsize = hsize;

		bits = (int) Math.ceil(Math.log(this.hsize)/Math.log(2));
		
		moves = (int) Math.ceil(64f/(float)bits);

		
		
		DB.println("moves "+moves+" bits "+bits+" hsize "+hsize);
		
		hashFunctionModifiers = new long[numberFunctions];
		
		for (int f = 0;f<numberFunctions;f++) hashFunctionModifiers[f] = prims[f];  		
	}
	
	

    public int[] hash(long x)
    {
    	int[] hvals = new int[hashFunctionModifiers.length];
    	
    	for(int k=0;k<hashFunctionModifiers.length;k++) {
    	
    		// the original function: value = ((x+1) * hashFunctionModifiers[k] & m ) >> n;
    		
    		// the first part of the original function
    		long value = (x+1) * hashFunctionModifiers[k];

    		// do the above >> n with a maximal size of the available hash values
    		// Shift all bits until they have been each xor-ed (^) in the range of the hash
    		// in order the have all information potentially represented there.

    		for(int j=1;j<= moves;j++) value = value ^ (value >> (bits*j));
    		
    		// Map the value to the range of the available space should be the same as (value & m) . 
    		hvals[k] = Math.abs((int)value % hsize);
    	}
        return hvals;
    }
    
    public int[] hashU(long x)
    {
    	int[] hvals = new int[hashFunctionModifiers.length];
    	
    	long y = Long.reverse(x);
    	for(int k=0;k<hashFunctionModifiers.length;k++) {
    	
    		// the original function: value = ((x+1) * hashFunctionModifiers[k] & m ) >> n;
    		
    		// the first part of the original function
    		long value1 = (((y+1) * hashFunctionModifiers[k]) /* % 2 pow 64 */  ) >> (kbit-lbit);
    	
    		// I get probably only the first part lets get the second part too
    	//	long value2 = (((y+1>>20) * hashFunctionModifiers[k]) /* % 2 pow 64 */  ) >> (kbit-lbit);
    	
    	
    		// the modulo (%)  2 pow 64 is done since the long number can not be larger than 2 pow 64.  
    	//	System.out.println("value "+value+" shift "+(lbit-kbit));
    		hvals[k] = Math.abs((int)value1);
    	}
        return hvals;
    }
    
    /*
     (defun generate-hash-fn (&key (k-bit 32)
                              (l-bit 8)
                              verbosep constants (count 4))

  (labels ((random-constant ()
             (let ((a (+ (random (- (expt 2 k-bit) 1)) 1)))
               (logior a 1)))) ;; inclusive OR ensures odd number.                                                                                                                            
    (let ((pdiff (- (- k-bit l-bit)));; neg. sign to do a rightshift, see ash()                                                                                                               
          (sub1 (- (expt 2 k-bit) 1))
          (constants (copy-list constants)))
      (unless constants
	(loop ;; a = odd number a where 0 < a < u.                                                                                                                                            
            until (= count (length constants))
            do (pushnew (random-constant) constants)))
      (when verbosep
        (format t "~&generate-hash-fn(): using random constants: ~a~%"
		constants))
      (values
       #'(lambda (x)
           (loop
               for a in constants
               ;;; always add 1 to x to avoid f(0)=0.                                                                                                                                         
               collect (ash (logand (* (+ 1 x) a) sub1) pdiff)))
       constants))))

     */
    
    
    
    
    
    /* (non-Javadoc)
	 * @see is2.data.Long2IntInterface#l2i(long)
	 */
	@Override
	public int l2i(long l) {
		// TODO Auto-generated method stub
		return 0;
	}

	/* (non-Javadoc)
	 * @see is2.data.Long2IntInterface#size()
	 */
	@Override
	public int size() {
		return hsize;
	}

}