0 1 matrix balancing - algorithm

On wikipedia http://en.wikipedia.org/wiki/Dynamic_programming#A_type_of_balanced_0.E2.80.931_matrix, counting the number of 0 1 balanced matrices is given as an example of dynamic programming. But I found it really hard to implement the algorithm given there. Is there a better algorithm?
If not, then can anyone kindly explain the algorithm presented there in a way that makes it easier to implement. Like what would be the recurrence relation in this algorithm? Because once I have found it, it would be easy to do memoization.
Also can any one tell that why this particular problem seems so much harder than all the other problems given on that page.

Dynamic programming would be faster, but here is a simple way for enumeration.
Balanced matrix: bicolorable 0-1 matrix. Here s is the dimension of the 0-1 balanced square matrix, L[p][q] is an entry of the matrix. Initially call enumerate(s,1,1).
int enumerate(int s, int p,int q){
if(p>s) {
printmatrix(L);
return 0;
}
if(p>=3 && q>=3){
int min = p;if(p>q){min=q;}
if L[1...min][1...min] is not a balanced matrix, then return 0;
}
if(q<=s) {
L[p][q] = 1;
enumerate(s,p,q+1);
if(p!=q){
L[p][q] = 0;
enumerate(s,p,q+1);
}
}
if(q>s) {
enumerate(s,p+1,1);
}
return 0;
}

Dynamic programming solution
import java.util.HashMap;
import java.util.Map;
public class Balanced01Matrix {
//Variable to hold all possible row permutation
private int[][] rowPerms;
//Mapping function f((n/2,n/2),(n/2,n/2)......(n/2,n/2))
Map<String, Integer> arrangeFunction;
int rowCounter = 0;
/**
* #param args
*/
public static void main(String[] args) {
Balanced01Matrix bm = new Balanced01Matrix();
int n = 4;
int rows = bm.combination(n, n/2);
bm.rowPerms = new int[rows][n];
//Getting all the row permuation with n/2 '0' and n/2 '1'
bm.getAllCombination(n/2, n/2, n, new int[n]);
//bm.printAllCombination();
bm.arrangeFunction = new HashMap<String, Integer>();
//Variable to hold vector ((n/2,n/2),(n/2,n/2),......(n/2,n/2))
int[][] digitsRemaining = new int[n][2];
for(int i=0;i<n;i++){
digitsRemaining[i][0]=n/2;
digitsRemaining[i][1]=n/2;
}
//Printing total number of combination possible for nxn balanced matrix
System.out.println(bm.possibleCombinations(digitsRemaining, n));
}
/**
* Method to get all permutation of a row with n/2 zero and n/2 one
* #param oneCount
* #param zeroCount
* #param totalCount
* #param tmpArr
*/
private void getAllCombination(int oneCount, int zeroCount, int totalCount, int[] tmpArr){
if(totalCount>0){
if(oneCount > 0){
tmpArr[totalCount-1] = 1;
getAllCombination(oneCount-1, zeroCount, totalCount-1, tmpArr);
}
if(zeroCount > 0){
tmpArr[totalCount-1] = 0;
getAllCombination(oneCount, zeroCount-1, totalCount-1, tmpArr);
}
}else{
rowPerms[rowCounter++] = tmpArr.clone();
}
}
/**
* Recursive function to calculate all combination possible for a given vector and level
* #param digitsRemaining
* #param level
* #return
*/
private int possibleCombinations(int[][] digitsRemaining, int level){
//Using memoization
if(arrangeFunction.containsKey(getStringForDigitsRemaining(digitsRemaining))){
return arrangeFunction.get(getStringForDigitsRemaining(digitsRemaining));
}
int totalCombination = 0;
for(int[] row: rowPerms){
int i=0;
int[][] tmpArr = createCopy(digitsRemaining);
for(;i<row.length;i++){
if(row[i]==0){
if(tmpArr[i][0] - 1 >= 0){
tmpArr[i][0] -= 1;
}else
break;
}else{
if(tmpArr[i][1] - 1 >= 0){
tmpArr[i][1] -= 1;
}else
break;
}
}
//If row permutation is successfully used for this level
//else try next permuation
if(i==row.length){
//If last row of matrix return 1
if(level == 1){
return 1;
}else{
int combinations = possibleCombinations(tmpArr, level-1);
arrangeFunction.put(getStringForDigitsRemaining(tmpArr), combinations);
totalCombination += combinations;
}
}
}
return totalCombination;
}
/**
* Creating deep copy of 2 dimensional array
* #param arr
* #return
*/
private int[][] createCopy(int[][] arr){
int[][] newArr = new int[arr.length][arr[0].length];
for(int i=0;i<arr.length;i++){
for(int j=0;j<arr[0].length;j++){
newArr[i][j] = arr[i][j];
}
}
return newArr;
}
private void printRow(int[] row){
for(int i: row)
System.out.print(i);
}
private String getStringForDigitsRemaining(int[][] digitsRemaining){
StringBuilder sb = new StringBuilder();
for(int i=0;i<digitsRemaining.length;i++){
sb.append(digitsRemaining[i][0]);
sb.append(digitsRemaining[i][1]);
}
return sb.toString();
}
/**
* Calculates x choose y
* #param x
* #param y
*/
private int combination(int x, int y){
if(x>0 && y>0 && x>y)
return factorial(x)/(factorial(y)*factorial(x-y));
else
return 0;
}
private int factorial(int x){
if(x==0)
return 1;
return x*factorial(x-1);
}
private void printAllCombination(){
for(int[] arr: rowPerms){
for(int i: arr)
System.out.print(i);
System.out.println();
}
}
}

Related

Using minHash to compare more than 2 sets

I have a class called FindSimilar which uses minHash to find similarities between 2 sets (and for this goal, it works great). My problem is that I need to compare more than 2 sets, more specifically, I need to compare a given set1 with an unknown amount of other sets. Here is the class:
import java.util.HashSet;
import java.util.Map;
import java.util.Random;
import java.util.Set;
public class FindSimilar<T>
{
private int hash[];
private int numHash;
public FindSimilar(int numHash)
{
this.numHash = numHash;
hash = new int[numHash];
Random r = new Random(11);
for (int i = 0; i < numHash; i++)
{
int a = (int) r.nextInt();
int b = (int) r.nextInt();
int c = (int) r.nextInt();
int x = hash(a * b * c, a, b, c);
hash[i] = x;
}
}
public double similarity(Set<T> set1, Set<T> set2)
{
int numSets = 4;
Map<T, boolean[]> bitMap = buildBitMap(set1, set2);
int[][] minHashValues = initializeHashBuckets(numSets, numHash);
computeFindSimilarForSet(set1, 0, minHashValues, bitMap);
computeFindSimilarForSet(set2, 1, minHashValues, bitMap);
return computeSimilarityFromSignatures(minHashValues, numHash);
}
private static int[][] initializeHashBuckets(int numSets,
int numHashFunctions)
{
int[][] minHashValues = new int[numSets][numHashFunctions];
for (int i = 0; i < numSets; i++)
{
for (int j = 0; j < numHashFunctions; j++)
{
minHashValues[i][j] = Integer.MAX_VALUE;
}
}
return minHashValues;
}
private static double computeSimilarityFromSignatures(
int[][] minHashValues, int numHashFunctions)
{
int identicalFindSimilares = 0;
for (int i = 0; i < numHashFunctions; i++)
{
if (minHashValues[0][i] == minHashValues[1][i])
{
identicalFindSimilares++;
}
}
return (1.0 * identicalFindSimilares) / numHashFunctions;
}
private static int hash(int x, int a, int b, int c)
{
int hashValue = (int) ((a * (x >> 4) + b * x + c) & 131071);
return Math.abs(hashValue);
}
private void computeFindSimilarForSet(Set<T> set, int setIndex,
int[][] minHashValues, Map<T, boolean[]> bitArray)
{
int index = 0;
for (T element : bitArray.keySet())
{
/*
* for every element in the bit array
*/
for (int i = 0; i < numHash; i++)
{
/*
* for every hash
*/
if (set.contains(element))
{
/*
* if the set contains the element
*/
int hindex = hash[index];
if (hindex < minHashValues[setIndex][index])
{
/*
* if current hash is smaller than the existing hash in
* the slot then replace with the smaller hash value
*/
minHashValues[setIndex][i] = hindex;
}
}
}
index++;
}
}
public Map<T, boolean[]> buildBitMap(Set<T> set1, Set<T> set2)
{
Map<T, boolean[]> bitArray = new HashMap<T, boolean[]>();
for (T t : set1)
{
bitArray.put(t, new boolean[] { true, false });
}
for (T t : set2)
{
if (bitArray.containsKey(t))
{
// item is present in set1
bitArray.put(t, new boolean[] { true, true });
}
else if (!bitArray.containsKey(t))
{
// item is not present in set1
bitArray.put(t, new boolean[] { false, true });
}
}
return bitArray;
}
public static void main(String[] args)
{
Set<String> set1 = new HashSet<String>();
set1.add("FRANCISCO");
set1.add("abc");
set1.add("SAN");
Set<String> set2 = new HashSet<String>();
set2.add("b");
set2.add("a");
set2.add("SAN");
set2.add("USA");
FindSimilar<String> minHash = new FindSimilar<String>(set1.size() + set2.size());
System.out.println("Set1 : " + set1);
System.out.println("Set2 : " + set2);
System.out.println("Similarity between two sets: "
+ minHash.similarity(set1, set2));
}
}
I need to use the similarity method on more than 2 sets. The problem is that I can't find a way to go over all of them. If I create a for, I can't say I want to compare set1 and seti. I am not sure if I am making sense, I must admit I am a bit confused.
The goal of the program is to compare users. A user has a list of contacts (other users) and similar users have similar contacts. Each set is a user and the contents of the sets will be their contacts.
In implementations of set similarity join algorithms, sets are usually converted to an array of integers. Each integer represents a set element, and the conversion is typically done with a hash map. The arrays are sorted, such that the overlap between two sets can be computed in a merge like manner. If you are interested in these algorithms and their pruning techniques, the paper at http://ssjoin.dbresearch.uni-salzburg.at/ could be a good start.
I have found a (not sure if) cheesy solution for my problem by placing all sets inside an ArrayList structure and then converting it to an actual array:
ArrayList<Set<String>> list = new ArrayList<Set<String>>();
for(int i = 0; i < numPeople; i++){
Set<String> set1 = new HashSet<String>();
list.add(set1);
//another for goes here later on
}
Set<String>[] bs = list.toArray(new Set[0]);
.
.
.
public static void main(String[] args)
{
.
.
.
for(int i = 1; i<bs.length; i++){
System.out.format("Set %d: ", i+1);
System.out.println(bs[0]);
System.out.println("Similarity between two sets: "
+ minHash.similarity(bs[0], bs[i]));
}
}
This gives off a The expression of type Set[] needs unchecked conversion to conform to Set<String>[] warning, but runs fine. This does exactly what I wanted it to (I still need a for to put data inside the sets, but that shouldn't be hard. If anyone could tell me if this solution should be used or if there is a better alternative, I'd like to hear it, since I am still learning and any info would be useful.

unable to create second deck from same code for a queue

I have to set up a queue class that implements from a deque class. I need to use this to set up two deck cards with a random order. I have the code below, it works when the first deck is created but for some reason it does not work with the second deck, its the same code that im reusing.
Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException: 5
at prog.pkg4.Deque.insertOnBack(Prog4.java:93)
at prog.pkg4.Queue.insert(Prog4.java:153)
at prog.pkg4.Prog4.createDeck(Prog4.java:465)
at prog.pkg4.Prog4.topTrump(Prog4.java:444)
at prog.pkg4.Prog4.main(Prog4.java:287)
initiates the two decks
Queue player = new Queue();
Queue computer = new Queue();
player = createDeck(player, cards);
computer = createDeck(computer, cards);
code to create random deck
public static Queue createDeck(Queue queue, GreekHero[] cards){
Random rand = new Random();
int temp = 0;
int r;
for(int i = 0; i < 30; i++){
r = rand.nextInt(30);
cards[temp] = cards[i];
cards[i] = cards[r];
cards[r] = cards[temp];
}
for(int i = 0; i < 29; i++){
queue.insert(cards[i]);
System.out.println(queue.insertions());
System.out.println(queue);
}
return queue;
}
class Queue{
private Deque queue;
public Queue(){
queue = new Deque();
}
public void insert(Object o){
queue.insertOnBack(o);
}
public Object delete(){
return queue.deleteFromFront();
}
public boolean isEmpty(){
return queue.isEmpty();
}
public String toString(){
return queue.toString();
}
public int insertions(){
return queue.getInsertions();
}
}
i've tested out the deque code several times i know it works, as demonstrated by the first deck that is created, im just not sure what could be causing the problem for the second deck.
EDIT: I've added the Deque class code below, the way i have this set up is that if the number of insertions equals the size of the array, it should double in size, as mentioned before it works with the first deque but on the second deque it stops at size of array - 1. I've increased the size to test out and I could make it bigger to satisfy this project but I need to create a deque with an increasing array.
class Deque{
private Object[] arrayObject;
private int beggining; //tracks first element in array
private int insertions; //counts the items in the array
private static int SIZE = 30; //size of array
public Deque(){
arrayObject = new Object[SIZE];
beggining = 0;
insertions = 0;
}
// displays position of first element in circular array
public Object getBeggining(){
int temp = beggining + 1;
if(temp == SIZE)
temp = 0;
return temp;
}
public int getInsertions(){
return insertions;
}
public Object indexOne(){
int temp = beggining + 1;
if(temp == SIZE)
temp = 0;
return arrayObject[temp];
}
public String toString(){
if(isEmpty())
return "Empty";
int temp = beggining + 1;
if( temp >= SIZE)
temp = 0;
String s = "Current Index:\n[("+arrayObject[temp]+")";
int loops = 0;
for(int i = temp + 1; loops < insertions - 1; i++){
if(i >= SIZE)
i = 0;
s += ", ("+arrayObject[i]+")";
loops++;
}
s += "]";
return s;
}
public String toStore(){
String s = "Store Index:\n[(1: "+arrayObject[1]+")";
for(int i = 1; i <= SIZE - 1; i++)
s += ", ("+(i+1)+": "+arrayObject[i]+")";
s += "]";
return s;
}
public void insertOnFront(Object o){
if(insertions == SIZE)
arrayObject = increaseArray();
arrayObject[beggining] = o;
beggining--;
if(beggining < 0)
beggining = SIZE - 1;
insertions++;
}
public Object deleteFromFront(){
if(isEmpty())
return null;
int count = beggining + 1;
if(count >= SIZE)
count = 0;
Object temp = arrayObject[count];
beggining += 1;
insertions--;
if(insertions > 0)
insertions = 0;
return temp;
}
public void insertOnBack(Object o){
int temp = beggining + insertions + 1;
if(insertions == SIZE - 1)
arrayObject = increaseArray();
if(temp >= SIZE)
temp = 0 + (temp - SIZE);
arrayObject[temp] = o;
insertions++;
}
public Object deleteFromBack(){
if(isEmpty())
return null;
int count = beggining + insertions;
Object temp = arrayObject[count];
insertions--;
if(insertions >= 0)
insertions = 0;
return temp;
}
public boolean isEmpty(){
if(insertions > 0)
return false;
else
return true;
}
public Object[] increaseArray(){
SIZE *= 2;
int loops = 0;
int j = beggining;
Object[] newArray = new Object[SIZE];
for(int i = j; loops <= SIZE/2; i++){
if(j >= SIZE/2)
j = 0;
newArray[i] = arrayObject[j];
loops++;
j++;
}
return newArray;
}
}
I solved the issue by moving the SIZE variable as an instance variable of the class and removed static from it. I don't know why the issue popped in on the second iteration rather than on the first try, ill look it up later, if anyone knows please post it here.

Finding the index of the first word starting with a given alphabet form a alphabetically sorted list

Based on the current implementation, I will get an arraylist which contains some 1000 unique names in the alphabetically sorted order(A-Z or Z-A) from some source.
I need to find the index of the first word starting with a given alphabet.
So to be more precise, when I select an alphabet, for eg. "M", it should give me the index of the first occurrence of the word starting in "M" form the sorted list.
And that way I should be able to find the index of all the first words starting in each of the 26 alphabets.
Please help me find a solution which doesn't compromise on the speed.
UPDATE:
Actually after getting the 1000 unique names, the sorting is also done by one of my logics.
If this can be done while doing the sorting itself, I can avoid the reiteration on the list after sorting to find the indices for the alphabets.
Is that possible?
Thanks,
Sen
I hope this little piece of code will help you. I guessed the question is related to Java, because you mentioned ArrayList.
String[] unsorted = {"eve", "bob", "adam", "mike", "monica", "Mia", "marta", "pete", "Sandra"};
ArrayList<String> names = new ArrayList<String>(Arrays.asList(unsorted));
String letter = "M"; // find index of this
class MyComp implements Comparator<String>{
String first = "";
String letter;
MyComp(String letter){
this.letter = letter.toUpperCase();
}
public String getFirst(){
return first;
}
#Override
public int compare(String s0, String s1) {
if(s0.toUpperCase().startsWith(letter)){
if(s0.compareTo(first) == -1 || first.equals("")){
first = s0;
}
}
return s0.toUpperCase().compareTo(s1.toUpperCase());
}
};
MyComp mc = new MyComp(letter);
Collections.sort(names, mc);
int index = names.indexOf(mc.getFirst()); // the index of first name starting with letter
I'm not sure if it's possible to also store the index of the first name in the comparator without much overhead. Anyway, if you implement your own version of sorting algorithm e.g. quicksort, you should know about the index of the elements and could calculate the index while sorting. This depends on your chosen sorting algorithm and implementation. In fact if I know how your sorting is implemented, we could insert the index calculation.
So I came up with my own solution for this.
package test.binarySearch;
import java.util.Random;
/**
*
* Binary search to find the index of the first starting in an alphabet
*
* #author Navaneeth Sen <navaneeth.sen#multichoice.co.za>
*/
class SortedWordArray
{
private final String[] a; // ref to array a
private int nElems; // number of data items
public SortedWordArray(int max) // constructor
{
a = new String[max]; // create array
nElems = 0;
}
public int size()
{
return nElems;
}
public int find(String searchKey)
{
return recFind(searchKey, 0, nElems - 1);
}
String array = null;
int arrayIndex = 0;
private int recFind(String searchKey, int lowerBound,
int upperBound)
{
int curIn;
curIn = (lowerBound + upperBound) / 2;
if (a[curIn].startsWith(searchKey))
{
array = a[curIn];
if ((curIn == 0) || !a[curIn - 1].startsWith(searchKey))
{
return curIn; // found it
}
else
{
return recFind(searchKey, lowerBound, curIn - 1);
}
}
else if (lowerBound > upperBound)
{
return -1; // can't find it
}
else // divide range
{
if (a[curIn].compareTo(searchKey) < 0)
{
return recFind(searchKey, curIn + 1, upperBound);
}
else // it's in lower half
{
return recFind(searchKey, lowerBound, curIn - 1);
}
} // end else divide range
} // end recFind()
public void insert(String value) // put element into array
{
int j;
for (j = 0; j < nElems; j++) // find where it goes
{
if (a[j].compareTo(value) > 0) // (linear search)
{
break;
}
}
for (int k = nElems; k > j; k--) // move bigger ones up
{
a[k] = a[k - 1];
}
a[j] = value; // insert it
nElems++; // increment size
} // end insert()
public void display() // displays array contents
{
for (int j = 0; j < nElems; j++) // for each element,
{
System.out.print(a[j] + " "); // display it
}
System.out.println("");
}
} // end class OrdArray
class BinarySearchWordApp
{
static final String AB = "12345aqwertyjklzxcvbnm";
static Random rnd = new Random();
public static String randomString(int len)
{
StringBuilder sb = new StringBuilder(len);
for (int i = 0; i < len; i++)
{
sb.append(AB.charAt(rnd.nextInt(AB.length())));
}
return sb.toString();
}
public static void main(String[] args)
{
int maxSize = 100000; // array size
SortedWordArray arr; // reference to array
int[] indices = new int[27];
arr = new SortedWordArray(maxSize); // create the array
for (int i = 0; i < 100000; i++)
{
arr.insert(randomString(10)); //insert it into the array
}
arr.display(); // display array
String searchKey;
for (int i = 97; i < 124; i++)
{
searchKey = (i == 123)?"1":Character.toString((char) i);
long time_1 = System.currentTimeMillis();
int result = arr.find(searchKey);
long time_2 = System.currentTimeMillis() - time_1;
if (result != -1)
{
indices[i - 97] = result;
System.out.println("Found " + result + "in "+ time_2 +" ms");
}
else
{
if (!(i == 97))
{
indices[i - 97] = indices[i - 97 - 1];
}
System.out.println("Can't find " + searchKey);
}
}
for (int i = 0; i < indices.length; i++)
{
System.out.println("Index [" + i + "][" + (char)(i+97)+"] = " + indices[i]);
}
} // end main()
}
All comments welcome.

Can someone with GA experience examine my fitness function?

Here is my problem, I'm modifying code I found for Genetic Algorithms to do numerical optimization of a function. Essentially, given a function F and our Desired Value, the program uses GA to searches for values of x and y which provide the appropriate Desired Value.
I keep tinkering with my fitness function, which I feel is the root of the issue.
The basic code break down is:
Generate a random chromosome population
Use a bubble sort based on each chromosomes fitness
Check if any of them happen to solve the function
If one solves it, then stop and print it
Else,
Generate children based on the parents
Sort, check the best answer, loop
I hope someone can point me in the right direction I'm going to dissect it again some more tonight but I seem to have hit a snag on this. For more complex functions than that I have hard coded, it seems to converge around a random percentage (usually less than 20)... but it should be much closer to 0. The simple coded function keeps returning around 99% difference... so I'm not 100% on whats up.
import java.util.*;
import java.util.Comparator;
import java.util.TreeMap;
/**
* Modified from a file created Jul 9, 2003
* Original #author Fabian Jones
* Modified #author Cutright
* #version 2
*/
public class ScratchGA
{
private static int NUM_CHROMOSOMES = 100; //num of chromosomes in population
private static double MUTATE = .01; //chance of a mutation i.e. 88.8%
private static int desiredValue = 60466176; //desired value of function
private static int cutoff = 1000; // number of iterations before cut off
private static int longPrint = 0; //1 means print out each iteration of the population
private boolean done = false;
private Chromosome[] population;
int iteration = 0;
/**
* Constructor for objects of class ScratchGA
*/
public ScratchGA()
{
generateRandomPopulation(NUM_CHROMOSOMES);
printPopulation();
}
/**
* Generate a random population of chromosomes - WORKS
*
*/
private void generateRandomPopulation(int pop)
{
System.out.println("Generating random population of " + pop + ", now." +"\n");
population = new Chromosome[pop];
for(int i=0; i<pop; i++)
{
int rand = (int)(Math.random()*4095); // Range 0 to 4095
population[i] = (new Chromosome(rand, 12));
}
}
/**
* Codesaver for generating a new line in the output
*/
private void newLine()
{
System.out.println("\n");
}
/**
* Prints the population (the chromosomes)
*/
private void printPopulation()
{
int x=1; // variable to print 10 chromosomes on a line
if (iteration==0)
{
System.out.println("Initial population: " + "\n" );
}
else
{
if (longPrint ==1)
{
System.out.println("Population " + iteration + " :" + "\n");
for(int i=0; i<=(NUM_CHROMOSOMES-1); i++)
{
System.out.print(population[i] + " ");
if(x == 10)
{
newLine();
x=1;
}
else
{
x++;
}
}
newLine();
}
else
{
System.out.println("Best answer for iteration " + iteration + " is: " + population[0] + " with a % difference of " +population[0].getFitness());
newLine();
}
}
}
/**
* Start
* Bubblesort initial population by their fitness, see if the first chromosome
* in the sorted array satisfies our constraint.
* IF done ==true or max num of iterations
* Print best solution and its fitness
* ELSE
* generate new population based on old one, and continue on
*/
public void start()
{
// System.out.println("Starting bubblesort... Please Wait.");
bubbleSort();
//System.out.println("After Bubblesort: " );
//printPopulation();
topFitness();
if(done || iteration==cutoff){
System.out.println("DONE!!");
System.out.println("Best solution: " + population[0] + " % Difference: " + population[0].getFitness());
}
else{
iteration++;
generateNewPopulation();
printPopulation();
start();
}
}
/**
* If the top chromosomes fitness (after being sorted by bubblesort) is 100%
* done == true
*/
private void topFitness()
{
if (population[0].getFitness() == 0)
{
done = true;
}
}
/**
* Called from chromosome,
* Tests the x and y values in the function and returns their output
*/
public static double functionTest(int x, int y)
{
return (3*x)^(2*y); // From our desired value we're looking for x=2, y=5
}
/**
* Returns the desired outcome of the function, with ideal x and y
* Stored above in a private static
*/
public static int getDesired()
{
return desiredValue;
}
/**
* Sort Chromosome array, based on fitness
* utilizes a bubblesort
*/
private void bubbleSort()
{
Chromosome temp;
for(int i=0; i<NUM_CHROMOSOMES; i++){
for(int j=1; j<(NUM_CHROMOSOMES-i); j++){
if(population[j-1].getFitness() > population[j].getFitness())
{
//swap elements
temp = population[j-1];
population[j-1] = population[j];
population[j] = temp;
}
}
}
}
/**
* Top 30: Elitism
* Next 60: Offspring of Elitists
* Next 10: Random
*/
private void generateNewPopulation(){
System.out.println("***Generating New Population");
Chromosome[] temp = new Chromosome[100];
for (int i = 0; i < 30; i++)
{
Chromosome x = population[i];
if (shouldMutate())
mutate(x);
temp[i]=x;
}
for (int i = 0; i < 30; i++)
{
temp[i+30] =cross1(population[i], population[i+1]);
temp[i+60] = cross2(population[i], population[i+1]);
}
for (int i = 90; i<100; i++)
{
int rand = (int)(Math.random()*4095); // Range 0 to 4095
Chromosome x = new Chromosome(rand, 12);
temp[i] = x;
}
population = temp;
}
/**
* First cross type, with two parents
*/
private Chromosome cross1(Chromosome parent1, Chromosome parent2){
String bitS1 = parent1.getBitString();
String bitS2 = parent2.getBitString();
int length = bitS1.length();
int num = (int)(Math.random()*length); // number from 0 to length-1
String newBitString = bitS2.substring(0, num) + bitS1.substring(num, length);
Chromosome offspring = new Chromosome();
offspring.setBitString(newBitString);
if(shouldMutate()){
mutate(offspring);
}
return offspring;
}
/**
* Second cross type, parents given in same order as first, but reverses internal workings
*/
private Chromosome cross2(Chromosome parent1, Chromosome parent2){
String bitS1 = parent1.getBitString();
String bitS2 = parent2.getBitString();
int length = bitS1.length();
int num = (int)(Math.random()*length); // number from 0 to length-1
String newBitString = bitS2.substring(0, num) + bitS1.substring(num, length);
Chromosome offspring = new Chromosome();
offspring.setBitString(newBitString);
if(shouldMutate()){
mutate(offspring);
}
return offspring;
}
/**
* Returns a boolean of whether a character should mutate based on the mutation value at top
*/
private boolean shouldMutate(){
double num = Math.random()*100;
return (num <= MUTATE);
}
/**
* Returns a boolean of whether a character should mutate based on the mutation value at top
*/
private void mutate(Chromosome offspring){
String s = offspring.getBitString();
int num = s.length();
int index = (int) (Math.random()*num);
String newBit = flip(s.substring(index, index+1));
String newBitString = s.substring(0, index) + newBit + s.substring(index+1, s.length());
offspring.setBitString(newBitString);
}
/**
* Flips bits in a string 1 to 0, 0 to 1
*/
private String flip(String s){
return s.equals("0")? "1":"0";
}
}
import java.lang.Comparable;
import java.math.*;
/**
* Modified from a file created on Jul 9, 2003
* Unsure of original author
*
*/
public class Chromosome implements Comparable
{
protected String bitString;
/**
* Constructor for objects of class Chromosome
*/
public Chromosome()
{
}
public Chromosome(int value, int length)
{
bitString = convertIntToBitString(value, length);
}
public void setBitString(String s)
{
bitString = s;
}
public String getBitString()
{
return bitString;
}
public int compareTo(Object o)
{
Chromosome c = (Chromosome) o;
int num = countOnes(this.bitString) - countOnes(c.getBitString());
return num;
}
public double getFitness()
{
String working = bitString;
int x1 = Integer.parseInt(working.substring(0,6),2);
int x2 = Integer.parseInt(working.substring(6),2);
double result = ScratchGA.functionTest(x1,x2);
double percentDiff = ((ScratchGA.getDesired() - result)/ScratchGA.getDesired())*100;
if (percentDiff >= 0)
{
return percentDiff;
}
else
{
return -percentDiff;
}
}
public boolean equals(Object o)
{
if(o instanceof Chromosome)
{
Chromosome c = (Chromosome) o;
return c.getBitString().equals(bitString);
}
return false;
}
public int hashCode()
{
return bitString.hashCode();
}
public String toString()
{
return bitString;
}
public static int countOnes(String bits)
{
int sum = 0;
for(int i = 0; i < bits.length(); ++ i){
String test = bits.substring(i, i+1);
if(test.equals("1")){
sum = sum + 1;
}
}
return sum;
}
public static String convertIntToBitString(int val, int length)
{
int reval = val;
StringBuffer bitString = new StringBuffer(length);
for(int i = length-1; i >=0; --i ){
if( reval - (Math.pow(2, i)) >= 0 ){
bitString.append("1");
reval = (int) (reval - Math.pow(2, i));
}
else{
bitString.append("0");
}
}
return bitString.toString();
}
public static void main(String[] args
){
//System.out.println(convertIntToBitString(2046, 10));
Chromosome c = new Chromosome(1234, 10);
//System.out.println(c.fitness());
}
}
Actually, it was a simple error that eluded me, that I should have caught. The major issue was in using return (3*x)^(2*y); ^ is a bitwise XOR in java, but an exponent. (Whoops) The problem was rectified using Math.pow(3*x,2*y); ...and a little double check of the fitness function had it up and running with some other minor changes :)

Finding the longest repeated substring

What would be the best approach (performance-wise) in solving this problem?
I was recommended to use suffix trees. Is this the best approach?
Check out this link: http://introcs.cs.princeton.edu/java/42sort/LRS.java.html
/*************************************************************************
* Compilation: javac LRS.java
* Execution: java LRS < file.txt
* Dependencies: StdIn.java
*
* Reads a text corpus from stdin, replaces all consecutive blocks of
* whitespace with a single space, and then computes the longest
* repeated substring in that corpus. Suffix sorts the corpus using
* the system sort, then finds the longest repeated substring among
* consecutive suffixes in the sorted order.
*
* % java LRS < mobydick.txt
* ',- Such a funny, sporty, gamy, jesty, joky, hoky-poky lad, is the Ocean, oh! Th'
*
* % java LRS
* aaaaaaaaa
* 'aaaaaaaa'
*
* % java LRS
* abcdefg
* ''
*
*************************************************************************/
import java.util.Arrays;
public class LRS {
// return the longest common prefix of s and t
public static String lcp(String s, String t) {
int n = Math.min(s.length(), t.length());
for (int i = 0; i < n; i++) {
if (s.charAt(i) != t.charAt(i))
return s.substring(0, i);
}
return s.substring(0, n);
}
// return the longest repeated string in s
public static String lrs(String s) {
// form the N suffixes
int N = s.length();
String[] suffixes = new String[N];
for (int i = 0; i < N; i++) {
suffixes[i] = s.substring(i, N);
}
// sort them
Arrays.sort(suffixes);
// find longest repeated substring by comparing adjacent sorted suffixes
String lrs = "";
for (int i = 0; i < N - 1; i++) {
String x = lcp(suffixes[i], suffixes[i+1]);
if (x.length() > lrs.length())
lrs = x;
}
return lrs;
}
// read in text, replacing all consecutive whitespace with a single space
// then compute longest repeated substring
public static void main(String[] args) {
String s = StdIn.readAll();
s = s.replaceAll("\\s+", " ");
StdOut.println("'" + lrs(s) + "'");
}
}
Have a look at http://en.wikipedia.org/wiki/Suffix_array as well - they are quite space-efficient and have some reasonably programmable algorithms to produce them, such as "Simple Linear Work Suffix Array Construction" by Karkkainen and Sanders
Here is a simple implementation of longest repeated substring using simplest suffix tree. Suffix tree is very easy to implement in this way.
#include <iostream>
#include <vector>
#include <unordered_map>
#include <string>
using namespace std;
class Node
{
public:
char ch;
unordered_map<char, Node*> children;
vector<int> indexes; //store the indexes of the substring from where it starts
Node(char c):ch(c){}
};
int maxLen = 0;
string maxStr = "";
void insertInSuffixTree(Node* root, string str, int index, string originalSuffix, int level=0)
{
root->indexes.push_back(index);
// it is repeated and length is greater than maxLen
// then store the substring
if(root->indexes.size() > 1 && maxLen < level)
{
maxLen = level;
maxStr = originalSuffix.substr(0, level);
}
if(str.empty()) return;
Node* child;
if(root->children.count(str[0]) == 0) {
child = new Node(str[0]);
root->children[str[0]] = child;
} else {
child = root->children[str[0]];
}
insertInSuffixTree(child, str.substr(1), index, originalSuffix, level+1);
}
int main()
{
string str = "banana"; //"abcabcaacb"; //"banana"; //"mississippi";
Node* root = new Node('#');
//insert all substring in suffix tree
for(int i=0; i<str.size(); i++){
string s = str.substr(i);
insertInSuffixTree(root, s, i, s);
}
cout << maxLen << "->" << maxStr << endl;
return 1;
}
/*
s = "mississippi", return "issi"
s = "banana", return "ana"
s = "abcabcaacb", return "abca"
s = "aababa", return "aba"
*/
the LRS problem is one that is best solved using either a suffix tree or a suffix array. Both approaches have a best time complexity of O(n).
Here is an O(nlog(n)) solution to the LRS problem using a suffix array. My solution can be improved to O(n) if you have a linear construction time algorithm for the suffix array (which is quite hard to implement). The code was taken from my library. If you want more information on how suffix arrays work make sure to check out my tutorials
/**
* Finds the longest repeated substring(s) of a string.
*
* Time complexity: O(nlogn), bounded by suffix array construction
*
* #author William Fiset, william.alexandre.fiset#gmail.com
**/
import java.util.*;
public class LongestRepeatedSubstring {
// Example usage
public static void main(String[] args) {
String str = "ABC$BCA$CAB";
SuffixArray sa = new SuffixArray(str);
System.out.printf("LRS(s) of %s is/are: %s\n", str, sa.lrs());
str = "aaaaa";
sa = new SuffixArray(str);
System.out.printf("LRS(s) of %s is/are: %s\n", str, sa.lrs());
str = "abcde";
sa = new SuffixArray(str);
System.out.printf("LRS(s) of %s is/are: %s\n", str, sa.lrs());
}
}
class SuffixArray {
// ALPHABET_SZ is the default alphabet size, this may need to be much larger
int ALPHABET_SZ = 256, N;
int[] T, lcp, sa, sa2, rank, tmp, c;
public SuffixArray(String str) {
this(toIntArray(str));
}
private static int[] toIntArray(String s) {
int[] text = new int[s.length()];
for(int i=0;i<s.length();i++)text[i] = s.charAt(i);
return text;
}
// Designated constructor
public SuffixArray(int[] text) {
T = text;
N = text.length;
sa = new int[N];
sa2 = new int[N];
rank = new int[N];
c = new int[Math.max(ALPHABET_SZ, N)];
construct();
kasai();
}
private void construct() {
int i, p, r;
for (i=0; i<N; ++i) c[rank[i] = T[i]]++;
for (i=1; i<ALPHABET_SZ; ++i) c[i] += c[i-1];
for (i=N-1; i>=0; --i) sa[--c[T[i]]] = i;
for (p=1; p<N; p <<= 1) {
for (r=0, i=N-p; i<N; ++i) sa2[r++] = i;
for (i=0; i<N; ++i) if (sa[i] >= p) sa2[r++] = sa[i] - p;
Arrays.fill(c, 0, ALPHABET_SZ, 0);
for (i=0; i<N; ++i) c[rank[i]]++;
for (i=1; i<ALPHABET_SZ; ++i) c[i] += c[i-1];
for (i=N-1; i>=0; --i) sa[--c[rank[sa2[i]]]] = sa2[i];
for (sa2[sa[0]] = r = 0, i=1; i<N; ++i) {
if (!(rank[sa[i-1]] == rank[sa[i]] &&
sa[i-1]+p < N && sa[i]+p < N &&
rank[sa[i-1]+p] == rank[sa[i]+p])) r++;
sa2[sa[i]] = r;
} tmp = rank; rank = sa2; sa2 = tmp;
if (r == N-1) break; ALPHABET_SZ = r + 1;
}
}
// Use Kasai algorithm to build LCP array
private void kasai() {
lcp = new int[N];
int [] inv = new int[N];
for (int i = 0; i < N; i++) inv[sa[i]] = i;
for (int i = 0, len = 0; i < N; i++) {
if (inv[i] > 0) {
int k = sa[inv[i]-1];
while( (i + len < N) && (k + len < N) && T[i+len] == T[k+len] ) len++;
lcp[inv[i]-1] = len;
if (len > 0) len--;
}
}
}
// Finds the LRS(s) (Longest Repeated Substring) that occurs in a string.
// Traditionally we are only interested in substrings that appear at
// least twice, so this method returns an empty set if this is not the case.
// #return an ordered set of longest repeated substrings
public TreeSet <String> lrs() {
int max_len = 0;
TreeSet <String> lrss = new TreeSet<>();
for (int i = 0; i < N; i++) {
if (lcp[i] > 0 && lcp[i] >= max_len) {
// We found a longer LRS
if ( lcp[i] > max_len )
lrss.clear();
// Append substring to the list and update max
max_len = lcp[i];
lrss.add( new String(T, sa[i], max_len) );
}
}
return lrss;
}
public void display() {
System.out.printf("-----i-----SA-----LCP---Suffix\n");
for(int i = 0; i < N; i++) {
int suffixLen = N - sa[i];
String suffix = new String(T, sa[i], suffixLen);
System.out.printf("% 7d % 7d % 7d %s\n", i, sa[i],lcp[i], suffix );
}
}
}
public class LongestSubString {
public static void main(String[] args) {
String s = findMaxRepeatedString("ssssssssssss this is a ddddddd word with iiiiiiiiiis and loads of these are ppppppppppppps");
System.out.println(s);
}
private static String findMaxRepeatedString(String s) {
Processor p = new Processor();
char[] c = s.toCharArray();
for (char ch : c) {
p.process(ch);
}
System.out.println(p.bigger());
return new String(new char[p.bigger().count]).replace('\0', p.bigger().letter);
}
static class CharSet {
int count;
Character letter;
boolean isLastPush;
boolean assign(char c) {
if (letter == null) {
count++;
letter = c;
isLastPush = true;
return true;
}
return false;
}
void reassign(char c) {
count = 1;
letter = c;
isLastPush = true;
}
boolean push(char c) {
if (isLastPush && letter == c) {
count++;
return true;
}
return false;
}
#Override
public String toString() {
return "CharSet [count=" + count + ", letter=" + letter + "]";
}
}
static class Processor {
Character previousLetter = null;
CharSet set1 = new CharSet();
CharSet set2 = new CharSet();
void process(char c) {
if ((set1.assign(c)) || set1.push(c)) {
set2.isLastPush = false;
} else if ((set2.assign(c)) || set2.push(c)) {
set1.isLastPush = false;
} else {
set1.isLastPush = set2.isLastPush = false;
smaller().reassign(c);
}
}
CharSet smaller() {
return set1.count < set2.count ? set1 : set2;
}
CharSet bigger() {
return set1.count < set2.count ? set2 : set1;
}
}
}
I had an interview and I needed to solve this problem. This is my solution:
public class FindLargestSubstring {
public static void main(String[] args) {
String test = "ATCGATCGA";
System.out.println(hasRepeatedSubString(test));
}
private static String hasRepeatedSubString(String string) {
Hashtable<String, Integer> hashtable = new Hashtable<>();
int length = string.length();
for (int subLength = length - 1; subLength > 1; subLength--) {
for (int i = 0; i <= length - subLength; i++) {
String sub = string.substring(i, subLength + i);
if (hashtable.containsKey(sub)) {
return sub;
} else {
hashtable.put(sub, subLength);
}
}
}
return "No repeated substring!";
}}
There are way too many things that affect performance for us to answer this question with only what you've given us. (Operating System, language, memory issues, the code itself)
If you're just looking for a mathematical analysis of the algorithm's efficiency, you probably want to change the question.
EDIT
When I mentioned "memory issues" and "the code" I didn't provide all the details. The length of the strings you will be analyzing are a BIG factor. Also, the code doesn't operate alone - it must sit inside a program to be useful. What are the characteristics of that program which impact this algorithm's use and performance?
Basically, you can't performance tune until you have a real situation to test. You can make very educated guesses about what is likely to perform best, but until you have real data and real code, you'll never be certain.

Resources