Related
A set of integers is given as input. You have to return the subset of that set so that the mean - median is maximum for that subset.
Example 1
Input
{1,2,3,4}
Output
{1,2,4}
Example 2
Input
{1,2,2,3,3}
Output
{2,2,3}
package subsetMean_Median;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class MySolution {
public static void main(String[] args) {
int[] arr=
{2,3,2,1,3};
// {1,3,2,4};
Arrays.sort(arr);
int[] outp=meanMedian(arr);
for(int e:outp) {
System.out.print(e+"\t");
}
}
protected static int[] meanMedian(int[] arr) {
double median=findMedian(arr);
double mean=findMean(arr);
double diff=median-mean;
int MAXINDEX=0;
int n=arr.length;
double sets=(1<<n);
System.out.println("sets:"+sets);
for(int i=1;i<=sets;i++) {
int[] subset=findSubset(i,arr);
mean=findMean(subset);
median=findMedian(subset);
if(mean -median>diff) {
diff=mean-median;MAXINDEX=i;
}
}
System.out.println("mean: "+mean+"\tmedian: "+median+"\tdiff: "+diff);
return findSubset(MAXINDEX,arr);
}
protected static int[] findSubset(int counter, int[] arr) {
int n=arr.length;
List<Integer> ls=new ArrayList<Integer>();
for(int j=0;j<n;j++) {
if((counter & (1<<j))>0) {
ls.add(arr[j]);
}
}
int[] output= new int[ls.size()];
for(int j=0;j<ls.size();j++) {
output[j]=ls.get(j);
}
return output;
}
protected static double findMean(int[] arr) {
int n=arr.length;
double sum=0;
if(n==0) return 0;
for(int i=0;i<n;i++)
sum +=arr[i];
return (sum/n);
}
protected static double findMedian(int[] arr) {
int n=arr.length;
if(n%2==1)
return arr[(n/2)];
else if(n>=2)
return 0.5*(arr[((n-2)/2)]+arr[n/2]);
else return 0;
}
}
For every possible median:
lllllmrrrrr
Sort both parts L and R, then start choosing in pair lr maximal elements from both parts and with addition of every next element recompute mean, store arrangement with the best difference. Then the same for minimal elements.
There are about N possible medians, sorting takes O(N*lgN), on every iteration you need to compute up to N means, you can do it in O(N). So, overall complexity is O(N^3*LgN), but most likely you can avoid sorting on every iteration, instead sort whole array only once and update parts in O(1) on every iteration. With such an improvements it is O(N^2).
The most important thing in this problem is to find the Subset.
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class MeanMedian {
public static void main(String[] args) {
int[] arr = { 1, 2, 3 };// { 1, 2, 2, 3, 3 };// { 1, 2, 3, 4 };
returnMaxMeanMedian(arr);
}
private static void returnMaxMeanMedian(int[] arr) {
double max = -999.9;
List<Integer[]> subArr = subSet(arr);
Integer[] maxArr = new Integer[1];
for (Integer[] sub : subArr) {
double newMax = calcDiff(sub);
if (max <= newMax) {
max = newMax;
maxArr = sub;
}
}
System.out.println(Arrays.toString(maxArr));
}
private static double calcDiff(Integer[] sub) {
// calc. mean
double sum = 0;
for (int i = 0; i < sub.length; i++) {
sum += sub[i];
}
sum = sum / sub.length;
// calc. median
double median = 0;
if (sub.length % 2 == 0)
median = (double) (sub[(sub.length / 2) - 1] + sub[sub.length / 2]) / 2;
else
median = sub[sub.length / 2];
double diff = sum - median;
return diff;
}
private static List<Integer[]> subSet(int[] arr) {
List<Integer[]> subArr = new ArrayList<Integer[]>();
int n = arr.length;
// Run a loop until 2^n
// subsets one by one
for (int i = 0; i < (1 << n); i++) {
String subSet = "";
// Print current subset
for (int j = 0; j < n; j++)
if ((i & (1 << j)) > 0)
subSet += arr[j] + " ";
subArr.add(convertToInt(subSet.trim().split(" ")));
}
return subArr;
}
private static Integer[] convertToInt(String[] arr) {
if (arr[0] == "")
return new Integer[] { 0 };
Integer[] intArr = new Integer[arr.length];
for (int i = 0; i < arr.length; i++) {
intArr[i] = Integer.parseInt(arr[i].trim());
}
return intArr;
}
}
Sort the list in O(n log n).
Deleting any element to the left of the median (center element or pair) has the same effect on the median, but affect the mean differently. Ditto for elements to the right.
That means that if anything will improve (mean - median), one of these will improve it the most:
the smallest element in the array
the smallest element to the right of the median
one of the element(s) that comprises the median
I.e., for each possible new median, how can we achieve the largest mean?
Repeatedly check these 3-4 for improving mean-median, deleting whatever improves the most. Each operation is O(1), as is recalculating the mean and median. You have to do this at most O(n) times.
The running time is O(n log n) if the list is unsorted, otherwise O(n).
Is this question only for a positive sequence of numbers? If yes, there's this efficient piece of code I wrote:
import java.util.Scanner;
public class MeanMedian {
public static void main(String[] args) {
// TODO Auto-generated method stub
Scanner sc = new Scanner(System.in);
int i;
int j;
int k;
int in_length;
int mid_loc;
int sum_arr;
float median = 0.0f;
float mean = 0.0f;
float delta = 0.0f;
float incremental_delta = 0.0f;
float MEDIAN_FOR_MAX_DELTA = 0.0f;
float MEAN_FOR_MAX_DELTA = 0.0f;
float MAX_DELTA = -1.0f;
int MAX_SEQ_LENGTH = 0;
System.out.print("Enter the length of input: ");
in_length = sc.nextInt();
int in_arr[]= new int [in_length+1];
int out_arr[] = new int [in_length+1]; //This is the maximum size of the output array.
int MAX_DELTA_ARR[] = new int [in_length+1];
// STAGE-1: Accept the input sequence
for (i = 1; i <= in_length; i++) {
System.out.print("Enter the input #" + i + ": ");
in_arr[i] = sc.nextInt();
}
// STAGE-1 completed.
// STAGE-2: Sort the array (Bubble sort in Ascending order)
for (j = 1; j < in_length; j++) {
for (i = in_length; i > j; i--) {
if (in_arr[i-1] > in_arr[i]) {
k = in_arr[i];
in_arr[i] = in_arr[i-1];
in_arr[i-1] = k;
}
}
}
// STAGE-2 completed.
// STAGE-3: Compute Max Delta
MAX_DELTA = -99999; //Store as large -ve number as float data type can hold.
for (i = in_length; i > 2; i--) {
// STAGE-3a: Optional - Clear the out_arr[]
for (j = 1; j <= in_length; j++) {
out_arr [j] = 0;
}
// STAGE-3a completed.
// STAGE-3b: Determine the index of the median for the sequence of length i
if (i % 2 == 1) {
mid_loc = (i + 1)/2;
}
else {
mid_loc = (i / 2) + 1;
}
// STAGE-3b completed.
// STAGE-3c: Create the selection that gives the min median and max mean.
// STAGE-3c1: Create left side of mid point.
for (j = mid_loc; j > 0; j--) {
out_arr[j] = in_arr[j];
}
// STAGE-3c1 completed.
// STAGE-3c2: Create right side of mid point.
k = in_length;
for (j = i; j > mid_loc; j--) {
out_arr[j] = in_arr[k];
k = k - 1;
}
// STAGE-3c2 completed.
// STAGE-3c3: Do the SHIFT TEST.
//for (; k <= mid_loc + in_length - i; k++) {
for (k = mid_loc + 1; k <= mid_loc + in_length - i; k++) {
if (i % 2 == 1) {
incremental_delta = ((float)in_arr[k] - (float)out_arr[1])/i - ((float)in_arr[k] - (float)out_arr[mid_loc]);
}
else {
incremental_delta = ((float)in_arr[k] - (float)out_arr[1])/i - (((float)in_arr[k] - (float)out_arr[mid_loc]/2));
}
if (incremental_delta >= 0 ) {
//Insert this new element
for(j = 1; j < mid_loc; j++) {
out_arr[j] = out_arr[j+1];
}
out_arr[mid_loc] = in_arr[k];
}
}
// STAGE-3c3 completed.
// STAGE-3d: Find the median of the present sequence.
if(i % 2 == 1) {
median = out_arr[mid_loc];
}
else {
median = ((float)out_arr[mid_loc] + (float)out_arr[mid_loc - 1])/2;
}
// STAGE-3d completed.
// STAGE-3e: Find the mean of the present sequence.
sum_arr = 0;
for(j=1; j <= i ; j++) {
sum_arr = sum_arr + out_arr[j];
}
mean = (float)sum_arr / i;
// STAGE-3e completed.
// STAGE-3f: Find the delta for the present sequence and compare with previous MAX_DELTA. Store the result.
delta = mean - median;
if(delta > MAX_DELTA) {
MAX_DELTA = delta;
MEAN_FOR_MAX_DELTA = mean;
MEDIAN_FOR_MAX_DELTA = median;
MAX_SEQ_LENGTH = i;
for (j = 1; j <= MAX_SEQ_LENGTH; j++) {
MAX_DELTA_ARR[j] = out_arr[j];
}
}
// STAGE-3f completed.
}
// STAGE-4: Print the result.
System.out.println("--- RESULT ---");
System.out.print("The given input sequence is: ");
System.out.print("{ ");
for(i=1; i <= in_length; i++) {
System.out.print(in_arr[i]);
System.out.print(" ");
}
System.out.print("}");
System.out.println("");
System.out.print("The sequence with maximum difference between mean and median is: ");
System.out.print("{ ");
for(i=1; i <= MAX_SEQ_LENGTH; i++) {
System.out.print(MAX_DELTA_ARR[i]);
System.out.print(" ");
}
System.out.print("}");
System.out.println("");
System.out.println("The mean for this sequence is: " + MEAN_FOR_MAX_DELTA);
System.out.println("The median for this sequence is: " + MEDIAN_FOR_MAX_DELTA);
System.out.println("The maximum difference between mean and median for this sequence is: " + MAX_DELTA);
}
}
This code has order O(n) (if we ignore the necessity to sort the input array).
In case, -ve inputs are also expected - the only way out is by evaluating each subset. The downside to this approach is that the algorithm has exponential order: O(2^n).
As a compromise you could use both types of algorithm in your code and switch between the two by evaluating the input sequence. By the way, where did you come across this question?
from itertools import combinations
[Verfication of the code][1]
# function to generate all subsets possible, there will be 2^n - 1 subsets(combinations)
def subsets(arr):
temp = []
for i in range(1, len(arr)+1):
comb = combinations(arr, i)
for j in comb:
temp.append(j)
return temp
# function to calculate median
def median(arr):
mid = len(arr)//2
if(len(arr)%2==0):
median = (arr[mid] + arr[mid-1])/2
else:`
median = arr[mid]
return median
# function to calculate median
def mean(arr):
temp = 0
for i in arr:
temp = temp + i
return temp/len(arr)
# function to solve given problem
def meanMedian(arr):
sets = subsets(arr)
max_value = 0
for i in sets:
mean_median = mean(i)-median(i)
if(mean_median>max_value):
max_value = mean_median
needed_set = i
return needed_set
[1]: https://i.stack.imgur.com/Mx4pc.png
So I tried a little on the problem and here is a code that might help you. Its written in a way that should be easy to read, and if not, do let me know. Maybe you need to take array input from the user as I have taken a fixed array. That shouldn't be much of a problem I am sure.
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
class MeanMinusMedian
{
private static float mean = 0;
private static float median = 0;
private static float meanMinusMedian = 0;
private static List<Integer> meanMinusMedianList = null;
private static void formMeanMinusMedianArr(int data[], int sumOfData)
{
findMean(data, sumOfData);
findMedian(data);
if ((mean - median) > meanMinusMedian) {
meanMinusMedian = mean - median;
meanMinusMedianList = new ArrayList<Integer>();
Arrays.stream(data)
.forEach(e->meanMinusMedianList.add(e));
}
}
/**
* #param data
*/
private static void findMedian(int[] data) {
int dataLen = data.length;
median = data.length % 2 == 0 ? ((float)data[dataLen / 2] + (float)data[dataLen / 2 - 1]) / 2 : data[dataLen / 2];
}
/**
* #param data
* #param sumOfData
*/
private static void findMean(int[] data, int sumOfData) {
mean = ((float)sumOfData /(float) data.length);
}
/**
*
* #param arr
* #param data
* #param start
* #param end
* #param index
* #param runningVal
*/
private static void combinationUtil(int arr[], int data[], int start, int end, int index, int runningVal) {
// Current combination is ready to be printed, print it
if (index == runningVal) {
formMeanMinusMedianArr(data, Arrays.stream(data) // Step 1
.sum());
return;
}
// replace index with all possible elements. The condition
// "end-i+1 >= r-index" makes sure that including one element
// at index will make a combination with remaining elements
// at remaining positions
for (int i = start; i <= end && end - i + 1 >= runningVal - index; i++) {
data[index] = arr[i];
combinationUtil(arr, data, i + 1, end, index + 1, runningVal);
}
}
/**
*
* #param arr
* #param n
* #param runningVal
*/
private static void printCombination(int arr[], int n, int runningVal) {
int data[] = new int[runningVal];
// Print all combination using temporary array 'data[]'
combinationUtil(arr, data, 0, n - 1, 0, runningVal);
}
public static void main(String[] args) {
int arr[] = { 1, 2, 2, 3, 3 };
int runningVal = 1;//Running value
int len = arr.length;
for (int i = 1; i < arr.length; i++) {
printCombination(arr, len, runningVal + i);
}
System.out.println(meanMinusMedianList);
}
}
Taking reference of answer of Bhaskar13 https://stackoverflow.com/a/59386801/3509609 , I solved it without using the bit shift operators, to add more readability.
package array;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
public class MeanMinusMedianMax {
public static void main(String[] args) {
System.out.println(Arrays.toString(maxDiffrenceSubSet(4, new int[] { 4, 2, 3, 1 })));
System.out.println(Arrays.toString(maxDiffrenceSubSet(4, new int[] { 1, 2, 2, 3, 3 })));
}
public static int[] maxDiffrenceSubSet(int n, int[] input2) {
int totalSubsets = (int) Math.pow(2, n);
Map<Integer, ArrayList<Integer>> subsetsMap = new HashMap<Integer, ArrayList<Integer>>();
Integer maxKey = null;
double maxDiff = 0;
for (int i = 0; i < totalSubsets; i++) {
String binaryString = Integer.toBinaryString(i);
while (binaryString.length() < 4) {
binaryString = "0" + binaryString;
}
char[] currentPick = binaryString.toCharArray();
ArrayList<Integer> currentList = new ArrayList<Integer>();
for (int x = 0; x < currentPick.length; x++) {
if ((currentPick[x]) == '1') {
currentList.add(input2[x]);
}
}
Collections.sort(currentList);
subsetsMap.put(i, currentList);
double mean = findMean(currentList);
double median = findMedian(currentList);
double currentDifference = mean - median;
if (currentDifference > maxDiff) {
maxDiff = currentDifference;
maxKey = i;
}
}
return subsetsMap.get(maxKey).stream().mapToInt(i -> i).toArray();
}
static double findMean(ArrayList<Integer> arr) {
int n = arr.size();
double sum = 0;
if (n == 0)
return 0;
for (int i = 0; i < n; i++)
sum += arr.get(i);
return (sum / n);
}
static double findMedian(ArrayList<Integer> arr) {
int n = arr.size();
if (n % 2 == 1)
return arr.get((n / 2));
else if (n >= 2)
return 0.5 * (arr.get(((n - 2) / 2)) + arr.get(n / 2));
else
return 0;
}
}
class UserMainCode (object):
def meanmedian(cls,ip1,ip2=[]):
s = []
s = ip2
lst = []
final = []
op = []
max_val = 0
diff = 0
for i in range(1,ip1+1):
n=i
lst = list(itertools.combinations(s,n))
final = final +lst
for i in range(len(final)):
men = statistics.mean(final[i])
med = statistics.median(final[i])
diff = men - med
if max_val < diff:
op = final[i]
max_val = diff
return op
There is an integer array d which does not contain more than two elements of the same value. How many distinct ascending triples (d[i] < d[j] < d[k], i < j < k) are present?
Input format:
The first line contains an integer N denoting the number of elements in the array. This is followed by a single line containing N integers separated by a single space with no leading/trailing spaces
Output format:
A single integer that denotes the number of distinct ascending triples present in the array
Constraints:
N <= 10^5
Every value in the array is present at most twice
Every value in the array is a 32-bit positive integer
Sample input:
6
1 1 2 2 3 4
Sample output:
4
Explanation:
The distinct triplets are
(1,2,3)
(1,2,4)
(1,3,4)
(2,3,4)
Another test case:
Input:
10
1 1 5 4 3 6 6 5 9 10
Output:
28
I tried to solve using DP. But out of 15 test cases only 7 test cases passed.
Please help solve this problem.
You should note that you only need to know the number of elements that are smaller/larger than a particular element to know how many triples it serves as the middle point for. Using this you can calculate the number of triples quite easily, the only remaining problem is to get rid of duplicates, but given that you are limited to at most 2 of the same element, this is trivial.
I solved using a Binary Index Tree http://community.topcoder.com/tc?module=Static&d1=tutorials&d2=binaryIndexedTrees.
I also did a small write up, http://www.kesannmcclean.com/?p=223.
package com.jai;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.HashMap;
public class Triplets {
int[] lSmaller, rLarger, treeArray, dscArray, lFlags, rFlags;
int size, count = 0;
Triplets(int aSize, int[] inputArray) {
size = aSize;
lSmaller = new int[size];
rLarger = new int[size];
dscArray = new int[size];
int[] tmpArray = Arrays.copyOf(inputArray, inputArray.length);
Arrays.sort(tmpArray);
HashMap<Integer, Integer> tmpMap = new HashMap<Integer, Integer>(size);
for (int i = 0; i < size; i++) {
if (!tmpMap.containsKey(tmpArray[i])) {
count++;
tmpMap.put(tmpArray[i], count);
}
}
count++;
treeArray = new int[count];
lFlags = new int[count];
rFlags = new int[count];
for (int i = 0; i < size; i++) {
dscArray[i] = tmpMap.get(inputArray[i]);
}
}
void update(int idx) {
while (idx < count) {
treeArray[idx]++;
idx += (idx & -idx);
}
}
int read(int index) {
int sum = 0;
while (index > 0) {
sum += treeArray[index];
index -= (index & -index);
}
return sum;
}
void countLeftSmaller() {
Arrays.fill(treeArray, 0);
Arrays.fill(lSmaller, 0);
Arrays.fill(lFlags, 0);
for (int i = 0; i < size; i++) {
int val = dscArray[i];
lSmaller[i] = read(val - 1);
if (lFlags[val] == 0) {
update(val);
lFlags[val] = i + 1;
} else {
lSmaller[i] -= lSmaller[lFlags[val] - 1];
}
}
}
void countRightLarger() {
Arrays.fill(treeArray, 0);
Arrays.fill(rLarger, 0);
Arrays.fill(rFlags, 0);
for (int i = size - 1; i >= 0; i--) {
int val = dscArray[i];
rLarger[i] = read(count - 1) - read(val);
if (rFlags[val] == 0) {
update(val);
rFlags[val] = i + 1;
}
}
}
long countTriplets() {
long sum = 0;
for (int i = 0; i < size; i++) {
sum += lSmaller[i] * rLarger[i];
}
return sum;
}
public static void main(String[] args) throws Exception {
BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
int N = Integer.parseInt(br.readLine());
int[] a = new int[N];
String[] strs = br.readLine().split(" ");
for (int i = 0; i < N; i++)
a[i] = Integer.parseInt(strs[i]);
Triplets sol = new Triplets(N, a);
sol.countLeftSmaller();
sol.countRightLarger();
System.out.println(sol.countTriplets());
}
}
For tentative algorithm that I came up with, it should be:
(K-1)!^2
where K is number of unique elements.
EDIT
After more thinking about this:
SUM[i=1,K-2] SUM[j=i+1,K-1] SUM[m=j+1,K] 1
=> SUM[i=1,K-2] (SUM[j=i+1,K-1] (K-j))
if the input is not sorted (the question is not clear about this): sort it
remove the duplicated items (this step could be conbined with the first step)
now pick 3 items. Since the items are already sorted, the three chosen items are ordered as well
IIRC there are (n!) / ((n-3)! * 3!) ways to pick the three items; with n := the number of unique items
#hadron: exactly, I couldn get my head around on why it should be 28 and not 35 for a set of 7 distinct numbers *
[Since the ques is about ascending triplets, repeated numbers can be discarded].
btw, here's a very bad Java solution(N^3):
I have also printed out the possible triplets:
I'm also thinking about some function that dictates the no: of triplets possible for input 'N'
4 4
5 10
6 20
7 35
8 56
9 84
package org.HackerRank.AlgoChallenges;
import java.util.Iterator;
import java.util.Scanner;
import java.util.TreeSet;
public class Triplets {
public static void main(String[] args) {
Scanner scanner = new Scanner(System.in);
int result = 0;
int n = scanner.nextInt();
Object[] array = new Object[n];
TreeSet<Integer> treeSet = new TreeSet<Integer>();
/*
* for (int i = 0; i < n; i++) { array[i] = scanner.nextInt(); }
*/
while (n>0) {
treeSet.add(scanner.nextInt());
n--;
}
scanner.close();
Iterator<Integer> iterator = treeSet.iterator();
int i =0;
while (iterator.hasNext()) {
//System.out.println("TreeSet["+i+"] : "+iterator.next());
array[i] = iterator.next();
//System.out.println("Array["+i+"] : "+array[i]);
i++;
}
for (int j = 0; j < (array.length-2); j++) {
for (int j2 = (j+1); j2 < array.length-1; j2++) {
for (int k = (j2+1); k < array.length; k++) {
if(array[j]!=null && array[j2]!=null && array[k]!=null){
System.out.println("{ "+array[j]+", "+array[j2]+", "+array[k]+" }");
result++;
}
}
}
}
System.out.println(result);
}
One of solution in python:
from itertools import combinations as comb
def triplet(lis):
done = dict()
result = set()
for ind, num in enumerate(lis):
if num not in done:
index = ind+1
for elm in comb(lis[index:], 2):
s,t = elm[0], elm[1]
if (num < s < t):
done.setdefault(num, None)
fin = (num,s,t)
if fin not in result:
result.add(fin)
return len(result)
test = int(raw_input())
lis = [int(_) for _ in raw_input().split()]
print triplet(lis)
Do you care about complexity?
Is the input array sorted?
if you don't mind about complexity you can solve it in complexity of N^3.
The solution with complexity N^3:
If it not sorted, then sorted the array.
Use 3 for loops one inside the other and go threw the array 3 times for each number.
Use hash map to count all the triples. The key will be the triple it self and the value will be the number of occurences.
It should be something like this:
for (i1=0; i1<N; i1++) {
for (i2=i1; i2<N; i2++) {
for (i3=i2; i3<N; i3++) {
if (N[i1] < N[i2] < N[i3]) {
/* if the triple exists in the hash then
add 1 to its value
else
put new triple to the hash with
value 1
*/
}
}
}
}
Result = number of triples in the hash;
I didn't try it but I think it should work.
I have to perform a run time analysis on quicksort by incrementing the size of the array by 100 each time. However when I measure the runtime using System.nanoTime the results aren't as I expect (my graphs look more like O(2^n)). The time shoots up whenever the array reaches around 800. Could someone please tell me where I'm going wrong with my code.
Also the count part is sort of irrelevant at the moment, since i only want to run the quicksort once at each array size.
import java.util.Random;
public class Quickworking {
public static void main(String[] args) {
Random rand = new Random();
int count2;
long total = 0;
count2 = 1;
int [] myArray = new int[1400];
//generates random array
for (int i = 0; i < myArray.length; i++){
myArray[i] = rand.nextInt(100) + 1;
//System.out.print(myArray[i] + ", ");
}
//loop sort n amount of times
for (int count = 0; count < count2; count++){
//calls the sort method on myArray giving the arguments
long start = System.nanoTime();
sort( myArray, 0, myArray.length - 1 );
long end = System.nanoTime();
System.out.println(end - start );
total += (end - start);
}
//long average = (long) total / (long) count2;
//System.out.println(average);
//prints the sorted array
//for (int i = 0; i <myArray.length; i++){
// System.out.print(myArray[i] + ", ");
//}
}
public static int sort(int myArray[], int left, int right){
int i = left, j = right;
int temp;
int pivot = myArray[(left + right) / 2];
//System.out.println("here are the pivot numbers " + pivot + ", ");
if (i <= j){
while (myArray[i] < pivot) //&& (i<right))
i++;
while (myArray[j] > pivot) //&& (j>left))
j--;
if (i <= j){
temp = myArray[i];
myArray[i] = myArray[j];
myArray[j] = temp;
i++;
j--;
}
}
if (left < j) sort(myArray, left, j);
if (i < right) sort(myArray, i, right);
return i;
}
}
Quicksort's behaviour when sorting already sorted arrays is O(n^2). After the array is sorted the first time in your code you are then sorting the sorted array. That will give you the worst case for quick sort. You need to generate a completely new random array each time to really test this.
QuickSort does not perform well on already sorted data.
See here:
http://en.wikipedia.org/wiki/Quicksort
You should randomize your array within the for loop in order to get more accurate results.
Given an n by n matrix with zeros and ones, find the largest sub-
matrix full of ones in linear time. I was told that a solution with
O(n) time complexity exists. If there are n^2 elements in a n X n
matrix how does a linear solution exist?
Unless you have a non-standard definition of submatrix this problem is NP-hard by reduction from maximum clique.
You can't search a n x n matrix in n time. Counterexample: a matrix of zeros with a single element set to one. You have to check every element to find where that one is, so time must be at least O(n^2).
Now if you say that the matrix has N = n^2 entries, and you only consider submatrices that form a contiguous block, then you should be able to find the largest submatrix by walking diagonally across the matrix, keeping track of every rectangle of ones as you go. You could in general have up to O(sqrt(N)) rectangles active simultaneously, and you would need to search in them to figure out which rectangle was the largest, so you ought to be able to do this in O(N^(3/2) * log(N)) time.
If you can pick arbitrary rows and columns to form your submatrix, then I don't see any obvious polynomial time algorithm.
The solution is linear in the number of entries, not in the number of rows or columns.
public static int biggestSubMatrix(int[][] matrix) {
int[][] newMatrix = new int[matrix.length][matrix[0].length];
for (int i = 0; i < matrix.length; i++) {
int sum = 0;
for (int j = 0; j < matrix[0].length; j++) {
if (matrix[i][j] == 1) {
sum++;
newMatrix[i][j] = sum;
} else {
sum = 0;
newMatrix[i][j] = 0;
}
}
}
int maxDimention = 0;
int maxSubMatrix = 0;
for (int i = 0; i < newMatrix[0].length; i++) {
//find dimention for each column
maxDimention = calcHighestDimentionBySmallestItem(newMatrix, i);
if(maxSubMatrix < maxDimention ){
maxSubMatrix = maxDimention ;
}
}
return maxSubMatrix;
}
private static int calcHighestDimentionBySmallestItem(int[][] matrix, int col) {
int totalMaxDimention =0;
for (int j = 0; j < matrix.length; j++) {
int maxDimention = matrix[j][col];
int numItems = 0;
int min = matrix[j][col];
int dimention = 0;
for (int i = j; i < matrix.length; i++) {
int val = matrix[i][col];
if (val != 0) {
if (val < min) {
min = val;
}
numItems++;
dimention = numItems*min;
if(dimention>maxDimention){
maxDimention = dimention;
}
} else { //case val == 0
numItems = 0;
min = 0;
}
}
if(totalMaxDimention < maxDimention){
totalMaxDimention = maxDimention;
}
}
return totalMaxDimention;
}
Given a string s, what is the fastest method to generate a set of all its unique substrings?
Example: for str = "aba" we would get substrs={"a", "b", "ab", "ba", "aba"}.
The naive algorithm would be to traverse the entire string generating substrings in length 1..n in each iteration, yielding an O(n^2) upper bound.
Is a better bound possible?
(this is technically homework, so pointers-only are welcome as well)
As other posters have said, there are potentially O(n^2) substrings for a given string, so printing them out cannot be done faster than that. However there exists an efficient representation of the set that can be constructed in linear time: the suffix tree.
There is no way to do this faster than O(n2) because there are a total of O(n2) substrings in a string, so if you have to generate them all, their number will be n(n + 1) / 2 in the worst case, hence the upper lower bound of O(n2) Ω(n2).
First one is brute force which has complexity O(N^3) which could be brought down to O(N^2 log(N))
Second One using HashSet which has Complexity O(N^2)
Third One using LCP by initially finding all the suffix of a given string which has the worst case O(N^2) and best case O(N Log(N)).
First Solution:-
import java.util.Scanner;
public class DistinctSubString {
public static void main(String[] args) {
Scanner in = new Scanner(System.in);
System.out.print("Enter The string");
String s = in.nextLine();
long startTime = System.currentTimeMillis();
int L = s.length();
int N = L * (L + 1) / 2;
String[] Comb = new String[N];
for (int i = 0, p = 0; i < L; ++i) {
for (int j = 0; j < (L - i); ++j) {
Comb[p++] = s.substring(j, i + j + 1);
}
}
/*
* for(int j=0;j<N;++j) { System.out.println(Comb[j]); }
*/
boolean[] val = new boolean[N];
for (int i = 0; i < N; ++i)
val[i] = true;
int counter = N;
int p = 0, start = 0;
for (int i = 0, j; i < L; ++i) {
p = L - i;
for (j = start; j < (start + p); ++j) {
if (val[j]) {
//System.out.println(Comb[j]);
for (int k = j + 1; k < start + p; ++k) {
if (Comb[j].equals(Comb[k])) {
counter--;
val[k] = false;
}
}
}
}
start = j;
}
System.out.println("Substrings are " + N
+ " of which unique substrings are " + counter);
long endTime = System.currentTimeMillis();
System.out.println("It took " + (endTime - startTime) + " milliseconds");
}
}
Second Solution:-
import java.util.*;
public class DistictSubstrings_usingHashTable {
public static void main(String args[]) {
// create a hash set
Scanner in = new Scanner(System.in);
System.out.print("Enter The string");
String s = in.nextLine();
int L = s.length();
long startTime = System.currentTimeMillis();
Set<String> hs = new HashSet<String>();
// add elements to the hash set
for (int i = 0; i < L; ++i) {
for (int j = 0; j < (L - i); ++j) {
hs.add(s.substring(j, i + j + 1));
}
}
System.out.println(hs.size());
long endTime = System.currentTimeMillis();
System.out.println("It took " + (endTime - startTime) + " milliseconds");
}
}
Third Solution:-
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Arrays;
public class LCPsolnFroDistinctSubString {
public static void main(String[] args) throws IOException {
BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
System.out.println("Enter Desired String ");
String string = br.readLine();
int length = string.length();
String[] arrayString = new String[length];
for (int i = 0; i < length; ++i) {
arrayString[i] = string.substring(length - 1 - i, length);
}
Arrays.sort(arrayString);
for (int i = 0; i < length; ++i)
System.out.println(arrayString[i]);
long num_substring = arrayString[0].length();
for (int i = 0; i < length - 1; ++i) {
int j = 0;
for (; j < arrayString[i].length(); ++j) {
if (!((arrayString[i].substring(0, j + 1)).equals((arrayString)[i + 1]
.substring(0, j + 1)))) {
break;
}
}
num_substring += arrayString[i + 1].length() - j;
}
System.out.println("unique substrings = " + num_substring);
}
}
Fourth Solution:-
public static void printAllCombinations(String soFar, String rest) {
if(rest.isEmpty()) {
System.out.println(soFar);
} else {
printAllCombinations(soFar + rest.substring(0,1), rest.substring(1));
printAllCombinations(soFar , rest.substring(1));
}
}
Test case:- printAllCombinations("", "abcd");
For big oh ... Best you could do would be O(n^2)
No need to reinvent the wheel, its not based on a strings, but on a sets, so you will have to take the concepts and apply them to your own situation.
Algorithms
Really Good White Paper from MS
In depth PowerPoint
Blog on string perms
well, since there is potentially n*(n+1)/2 different substrings (+1 for the empty substring), I doubt you can be better than O(n*2) (worst case). the easiest thing is to generate them and use some nice O(1) lookup table (such as a hashmap) for excluding duplicates right when you find them.
class SubstringsOfAString {
public static void main(String args[]) {
String string = "Hello", sub = null;
System.out.println("Substrings of \"" + string + "\" are :-");
for (int i = 0; i < string.length(); i++) {
for (int j = 1; j <= string.length() - i; j++) {
sub = string.substring(i, j + i);
System.out.println(sub);
}
}
}
}
class program
{
List<String> lst = new List<String>();
String str = "abc";
public void func()
{
subset(0, "");
lst.Sort();
lst = lst.Distinct().ToList();
foreach (String item in lst)
{
Console.WriteLine(item);
}
}
void subset(int n, String s)
{
for (int i = n; i < str.Length; i++)
{
lst.Add(s + str[i].ToString());
subset(i + 1, s + str[i].ToString());
}
}
}
This prints unique substrings.
https://ideone.com/QVWOh0
def uniq_substring(test):
lista=[]
[lista.append(test[i:i+k+1]) for i in range(len(test)) for k in
range(len(test)-i) if test[i:i+k+1] not in lista and
test[i:i+k+1][::-1] not in lista]
print lista
uniq_substring('rohit')
uniq_substring('abab')
['r', 'ro', 'roh', 'rohi', 'rohit', 'o', 'oh', 'ohi', 'ohit', 'h',
'hi', 'hit', 'i', 'it', 't']
['a', 'ab', 'aba', 'abab', 'b', 'bab']
Many answers that include 2 for loops and a .substring() call claim O(N^2) time complexity. However, it is important to note that the worst case for a .substring() call in Java (post update 6 in Java 7) is O(N). So by adding a .substring() call in your code, the order of N has increased by one.
Therefore, 2 for loops and a .substring() call within those loops equals an O(N^3) time complexity.
It can only be done in o(n^2) time as total number of unique substrings of a string would be n(n+1)/2.
Example:
string s = "abcd"
pass 0: (all the strings are of length 1)
a, b, c, d = 4 strings
pass 1: (all the strings are of length 2)
ab, bc, cd = 3 strings
pass 2: (all the strings are of length 3)
abc, bcd = 2 strings
pass 3: (all the strings are of length 4)
abcd = 1 strings
Using this analogy, we can write solution with o(n^2) time complexity and constant space complexity.
The source code is as below:
#include<stdio.h>
void print(char arr[], int start, int end)
{
int i;
for(i=start;i<=end;i++)
{
printf("%c",arr[i]);
}
printf("\n");
}
void substrings(char arr[], int n)
{
int pass,j,start,end;
int no_of_strings = n-1;
for(pass=0;pass<n;pass++)
{
start = 0;
end = start+pass;
for(j=no_of_strings;j>=0;j--)
{
print(arr,start, end);
start++;
end = start+pass;
}
no_of_strings--;
}
}
int main()
{
char str[] = "abcd";
substrings(str,4);
return 0;
}
Naive algorithm takes O(n^3) time instead of O(n^2) time.
There are O(n^2) number of substrings.
And if you put O(n^2) number of substrings, for example, set,
then set compares O(lgn) comparisons for each string to check if it alrady exists in the set or not.
Besides it takes O(n) time for string comparison.
Therefore, it takes O(n^3 lgn) time if you use set. and you can reduce it O(n^3) time if you use hashtable instead of set.
The point is it is string comparisons not number comparisons.
So one of the best algorithm let's say if you use suffix array and longest common prefix (LCP) algorithm, it reduces O(n^2) time for this problem.
Building a suffix array using O(n) time algorithm.
Time for LCP = O(n) time.
Since for each pair of strings in suffix array, do LCP so total time is O(n^2) time to find the length of distinct subtrings.
Besides if you want to print all distinct substrings, it takes O(n^2) time.
Try this code using a suffix array and longest common prefix. It can also give you the total number of unique substrings. The code might give a stack overflow in visual studio but runs fine in Eclipse C++. That's because it returns vectors for functions. Haven't tested it against extremely long strings. Will do so and report back.
// C++ program for building LCP array for given text
#include <bits/stdc++.h>
#include <vector>
#include <string>
using namespace std;
#define MAX 100000
int cum[MAX];
// Structure to store information of a suffix
struct suffix
{
int index; // To store original index
int rank[2]; // To store ranks and next rank pair
};
// A comparison function used by sort() to compare two suffixes
// Compares two pairs, returns 1 if first pair is smaller
int cmp(struct suffix a, struct suffix b)
{
return (a.rank[0] == b.rank[0])? (a.rank[1] < b.rank[1] ?1: 0):
(a.rank[0] < b.rank[0] ?1: 0);
}
// This is the main function that takes a string 'txt' of size n as an
// argument, builds and return the suffix array for the given string
vector<int> buildSuffixArray(string txt, int n)
{
// A structure to store suffixes and their indexes
struct suffix suffixes[n];
// Store suffixes and their indexes in an array of structures.
// The structure is needed to sort the suffixes alphabatically
// and maintain their old indexes while sorting
for (int i = 0; i < n; i++)
{
suffixes[i].index = i;
suffixes[i].rank[0] = txt[i] - 'a';
suffixes[i].rank[1] = ((i+1) < n)? (txt[i + 1] - 'a'): -1;
}
// Sort the suffixes using the comparison function
// defined above.
sort(suffixes, suffixes+n, cmp);
// At his point, all suffixes are sorted according to first
// 2 characters. Let us sort suffixes according to first 4
// characters, then first 8 and so on
int ind[n]; // This array is needed to get the index in suffixes[]
// from original index. This mapping is needed to get
// next suffix.
for (int k = 4; k < 2*n; k = k*2)
{
// Assigning rank and index values to first suffix
int rank = 0;
int prev_rank = suffixes[0].rank[0];
suffixes[0].rank[0] = rank;
ind[suffixes[0].index] = 0;
// Assigning rank to suffixes
for (int i = 1; i < n; i++)
{
// If first rank and next ranks are same as that of previous
// suffix in array, assign the same new rank to this suffix
if (suffixes[i].rank[0] == prev_rank &&
suffixes[i].rank[1] == suffixes[i-1].rank[1])
{
prev_rank = suffixes[i].rank[0];
suffixes[i].rank[0] = rank;
}
else // Otherwise increment rank and assign
{
prev_rank = suffixes[i].rank[0];
suffixes[i].rank[0] = ++rank;
}
ind[suffixes[i].index] = i;
}
// Assign next rank to every suffix
for (int i = 0; i < n; i++)
{
int nextindex = suffixes[i].index + k/2;
suffixes[i].rank[1] = (nextindex < n)?
suffixes[ind[nextindex]].rank[0]: -1;
}
// Sort the suffixes according to first k characters
sort(suffixes, suffixes+n, cmp);
}
// Store indexes of all sorted suffixes in the suffix array
vector<int>suffixArr;
for (int i = 0; i < n; i++)
suffixArr.push_back(suffixes[i].index);
// Return the suffix array
return suffixArr;
}
/* To construct and return LCP */
vector<int> kasai(string txt, vector<int> suffixArr)
{
int n = suffixArr.size();
// To store LCP array
vector<int> lcp(n, 0);
// An auxiliary array to store inverse of suffix array
// elements. For example if suffixArr[0] is 5, the
// invSuff[5] would store 0. This is used to get next
// suffix string from suffix array.
vector<int> invSuff(n, 0);
// Fill values in invSuff[]
for (int i=0; i < n; i++)
invSuff[suffixArr[i]] = i;
// Initialize length of previous LCP
int k = 0;
// Process all suffixes one by one starting from
// first suffix in txt[]
for (int i=0; i<n; i++)
{
/* If the current suffix is at n-1, then we don’t
have next substring to consider. So lcp is not
defined for this substring, we put zero. */
if (invSuff[i] == n-1)
{
k = 0;
continue;
}
/* j contains index of the next substring to
be considered to compare with the present
substring, i.e., next string in suffix array */
int j = suffixArr[invSuff[i]+1];
// Directly start matching from k'th index as
// at-least k-1 characters will match
while (i+k<n && j+k<n && txt[i+k]==txt[j+k])
k++;
lcp[invSuff[i]] = k; // lcp for the present suffix.
// Deleting the starting character from the string.
if (k>0)
k--;
}
// return the constructed lcp array
return lcp;
}
// Utility function to print an array
void printArr(vector<int>arr, int n)
{
for (int i = 0; i < n; i++)
cout << arr[i] << " ";
cout << endl;
}
// Driver program
int main()
{
int t;
cin >> t;
//t = 1;
while (t > 0) {
//string str = "banana";
string str;
cin >> str; // >> k;
vector<int>suffixArr = buildSuffixArray(str, str.length());
int n = suffixArr.size();
cout << "Suffix Array : \n";
printArr(suffixArr, n);
vector<int>lcp = kasai(str, suffixArr);
cout << "\nLCP Array : \n";
printArr(lcp, n);
// cum will hold number of substrings if that'a what you want (total = cum[n-1]
cum[0] = n - suffixArr[0];
// vector <pair<int,int>> substrs[n];
int count = 1;
for (int i = 1; i <= n-suffixArr[0]; i++) {
//substrs[0].push_back({suffixArr[0],i});
string sub_str = str.substr(suffixArr[0],i);
cout << count << " " << sub_str << endl;
count++;
}
for(int i = 1;i < n;i++) {
cum[i] = cum[i-1] + (n - suffixArr[i] - lcp[i - 1]);
int end = n - suffixArr[i];
int begin = lcp[i-1] + 1;
int begin_suffix = suffixArr[i];
for (int j = begin, k = 1; j <= end; j++, k++) {
//substrs[i].push_back({begin_suffix, lcp[i-1] + k});
// cout << "i push " << i << " " << begin_suffix << " " << k << endl;
string sub_str = str.substr(begin_suffix, lcp[i-1] +k);
cout << count << " " << sub_str << endl;
count++;
}
}
/*int count = 1;
cout << endl;
for(int i = 0; i < n; i++){
for (auto it = substrs[i].begin(); it != substrs[i].end(); ++it ) {
string sub_str = str.substr(it->first, it->second);
cout << count << " " << sub_str << endl;
count++;
}
}*/
t--;
}
return 0;
}
And here's a simpler algorithm:
#include <iostream>
#include <string.h>
#include <vector>
#include <string>
#include <algorithm>
#include <time.h>
using namespace std;
char txt[100000], *p[100000];
int m, n;
int cmp(const void *p, const void *q) {
int rc = memcmp(*(char **)p, *(char **)q, m);
return rc;
}
int main() {
std::cin >> txt;
int start_s = clock();
n = strlen(txt);
int k; int i;
int count = 1;
for (m = 1; m <= n; m++) {
for (k = 0; k+m <= n; k++)
p[k] = txt+k;
qsort(p, k, sizeof(p[0]), &cmp);
for (i = 0; i < k; i++) {
if (i != 0 && cmp(&p[i-1], &p[i]) == 0){
continue;
}
char cur_txt[100000];
memcpy(cur_txt, p[i],m);
cur_txt[m] = '\0';
std::cout << count << " " << cur_txt << std::endl;
count++;
}
}
cout << --count << endl;
int stop_s = clock();
float run_time = (stop_s - start_s) / double(CLOCKS_PER_SEC);
cout << endl << "distinct substrings \t\tExecution time = " << run_time << " seconds" << endl;
return 0;
}
Both algorithms listed a simply too slow for extremely long strings though. I tested the algorithms against a string of length over 47,000 and the algorithms took over 20 minutes to complete, with the first one taking 1200 seconds, and the second one taking 1360 seconds, and that's just counting the unique substrings without outputting to the terminal. So for probably strings of length up to 1000 you might get a working solution. Both solutions did compute the same total number of unique substrings though. I did test both algorithms against string lengths of 2000 and 10,000. The times were for the first algorithm: 0.33 s and 12 s; for the second algorithm it was 0.535 s and 20 s. So it looks like in general the first algorithm is faster.
Here is my code in Python. It generates all possible substrings of any given string.
def find_substring(str_in):
substrs = []
if len(str_in) <= 1:
return [str_in]
s1 = find_substring(str_in[:1])
s2 = find_substring(str_in[1:])
substrs.append(s1)
substrs.append(s2)
for s11 in s1:
substrs.append(s11)
for s21 in s2:
substrs.append("%s%s" %(s11, s21))
for s21 in s2:
substrs.append(s21)
return set(substrs)
If you pass str_ = "abcdef" to the function, it generates the following results:
a, ab, abc, abcd, abcde, abcdef, abcdf, abce, abcef, abcf, abd, abde, abdef, abdf, abe, abef, abf, ac, acd, acde, acdef, acdf, ace, acef, acf, ad, ade, adef, adf, ae, aef, af, b, bc, bcd, bcde, bcdef, bcdf, bce, bcef, bcf, bd, bde, bdef, bdf, be, bef, bf, c, cd, cde, cdef, cdf, ce, cef, cf, d, de, def, df, e, ef, f