Number of dict words containing each substring

Number of dict words containing each substring - algorithm

I chanced upon a problem which requires user to count the number of matching words for N substrings in a list of K strings (each string has length M) for the following constraints:
0 < length of substring <= 100
0 < M <= 100
0 < N <= 10,000
0 < K <= 10,000
For example:
substring se for { serpent, lose, last } will yield 2.
Given the enormous bounds of this input, checking all K strings for each substring will be too expensive. KMP would not work for this reason. Preprocessing the strings with suffix tree would be the next better option. But I can't possibly create suffix tree for each word because it would again lead to the above problem. Even if I try to join all the words together, the problem is that I would not be able to detect substrings in the same word (e.g. substring s for {stools, sat} will yield 3).
Is there a more efficient way of solving this problem?

An approach can be to index each chars of the string you want to search in.
"serpent" will give :
s > {0}
e > {1, 4}
r > {2}
p > {3}
n > {5}
t > {6}
From that, for each word you search in this string, look in the dictionary for its first and last char.
If they are found, you'll need to look into the corresponding indexes to find a pair that can match the string length.
Once found, you can do a full string comparison but only in this case.
The code can be like this (c#):
static void Main( string[] args )
{
List<string> words = new List<string>() { "se", "s", "pen", "oo", "st" };
List<int> scores = new List<int>( words.Select( w => 0 ) );
List<string> strings = new List<string>() { "serpent", "lose", "last", "stools", "sat" };
foreach ( var s in strings )
{
var indexes = MakeIndexes( s );
for ( int i = 0 ; i < words.Count ; i++ )
{
scores[i] += Score( words[i], s, indexes );
}
}
}
static int Score( string word, string s, Dictionary<char, List<int>> indexes )
{
int firstPos = 0, lastPos = word.Length - 1;
char first = word[firstPos];
char last = word[lastPos];
List<int> firstIndexes;
if ( indexes.TryGetValue( first, out firstIndexes ) )
{
if ( firstPos == lastPos )
return 1;
else
{
List<int> lastIndexes;
if ( indexes.TryGetValue( last, out lastIndexes ) )
{
int fiPos = 0, liPos = 0;
while ( fiPos < firstIndexes.Count && liPos < lastIndexes.Count )
{
int fi = firstIndexes[fiPos], li = lastIndexes[liPos];
int len = li - fi;
if ( len < lastPos )
liPos++;
else if ( len == lastPos )
{
if ( FullCompare( word, s, fi ) )
return 1;
fiPos++;
liPos++;
}
else
fiPos++;
}
}
}
}
return 0;
}
static bool FullCompare( string word, string s, int from )
{
for ( int i = 0 ; i < word.Length ; i++ )
if ( word[i] != s[i + from] )
return false;
return true;
}
static Dictionary<char, List<int>> MakeIndexes( string s )
{
Dictionary<char, List<int>> result = new Dictionary<char, List<int>>();
for ( int i = 0 ; i < s.Length ; i++ )
{
char c = s[i];
List<int> indexes;
if ( result.TryGetValue( c, out indexes ) == false )
{
indexes = new List<int>();
result.Add( c, indexes );
}
indexes.Add( i );
}
return result;
}

Related

You are given a binary string S consisting of O's and 1's

You are given a binary string S consisting of O's and 1's. You have to tell whether it is possible to split the string into exactly K substrings such that each character belongs to exactly one substring and the decimal value of each substring is the same.
Determine whether it is possible to split the string S into exactly K substrings such that each character belongs to exactly one substring and the decimal value of each substring is the same.
Example:
S: 10100101
K: 2
Result: true
S can be split into K parts, 101 and 00101, which have same decimal value

We can ignore the '0's as leading zeroes but we can't ignore '1's. So, first we need to check if we can divide the ones in k substring. Let's say, we can divide the '1's and each substring contains p number of ones.
Similarly, we can not ignore the '0's which comes after the last '1'. So, each segment must have the same number of trailing '0's as the last one.
We build substrings by following these two conditions and checking if the substrings are equal. Because with two '1's and two trailing '0's in each substring you can have 10100 and 1100 which leads to different decimal values.
Here is my C++ code
bool isSplitPossible(string str, int k) {
int ones = 0;
for(int i = 0 ; i < str.length() ; i++) {
if(str[i] == '1') {
ones++;
}
}
if(ones%k != 0) {
// We can not divide all the ones into k segments
// so its not possible to split
return false;
}
int segOnes = ones/k;
// We can ignore leading zeroes but cant ignore zeroes
// that comes after the last '1'. So, each k segment have same
// amount of trailing zeroes
int trailingZero = 0;
for(int i = str.length()-1 ; i >= 0 ; i--) {
if(str[i] == '1') {
break;
}
trailingZero++;
}
string prevSegment = "";
int oneCnt,zeroCnt,segCount ;
oneCnt = zeroCnt = segCount = 0;
string sub = "";
// Now we try to create segments maintaing two previous conditions
for(int i = 0 ; i < str.length() ; i++) {
if(str[i] == '1') {
sub.push_back('1');
oneCnt++;
}
else {
// We only count trailing zeroes, if oneCnt is not equal segOnes,
// it is leading zero or middle zero and we ignore
if(oneCnt == segOnes) {
sub.push_back('0');
zeroCnt++;
}
}
if(oneCnt == segOnes && trailingZero == zeroCnt) {
// Got a segment, match it with prev segment if it is not first
if(prevSegment.length() == 0 ) {
prevSegment = sub;
}
else {
if(prevSegment != sub) {
return false;
}
}
segCount++;
oneCnt = zeroCnt = 0;
sub = "";
}
}
if(segCount != k) {
return false;
}
return true;
}

bool isSplitPossible(string str, int k) {
int n = str.length();
int allone = 0;
for(int i = 0 ; i < n ; i++) {
if(str[i] == '1') {
allone++;
}
}
if(allone%k != 0) {
return false;
}
int segOnes = allone/k;
int trailingZero = 0;
for(int i = str.length()-1 ; i >= 0 ; i--) {
if(str[i] == '1') {
break;
}
trailingZero++;
}
string prevSegment = "";
int oneCnt,zeroCnt,segCount ;
oneCnt = zeroCnt = segCount = 0;
string sub = "";
for(int i = 0 ; i < str.length() ; i++) {
if(str[i] == '1') {
sub.push_back('1');
oneCnt++;
}
else {
if(oneCnt == segOnes) {
sub.push_back('0');
zeroCnt++;
}
else(oneCnt !=0){
sub.push_back('0');
}
}
if(oneCnt == segOnes && trailingZero == zeroCnt) {
if(prevSegment.length() == 0 ) {
prevSegment = sub;
}
else {
if(prevSegment != sub) {
return false;
}
}
segCount++;
oneCnt = zeroCnt = 0;
sub = "";
}
}
if(segCount != k) {
return false;
}
return true;
}

Calculate and list how many different 4 digit numbers can be done with the numbers 0 to 9?

There is something I want to learn.
Let's say we have some single digit numbers.
Example: 1-2-3-4-5-6-7-8-9-0
Example II: 1-2-4-6-0
And with these numbers, we want to get 4-digit numbers that are different from each other.
And we want to print them as lists.
Result:
4676
4236
1247
1236
....
Is it possible to do this?

You can write and run a macro like this:
// retrieve the selected text
str = document.selection.Text;
// check the input string format. The input must be something like: "1-2-4-6-0"
if( str.length == 0 ) {
alert( "Select the input string" );
Quit();
}
for( i = 0; i < str.length; ++i ) {
c = str.substr( i, 1 );
if( i % 2 == 0 ) {
if( c < '0' || c > '9' ) {
alert( "not digit" );
Quit();
}
}
else {
if( c != '-' ) {
alert( "not separated by '-'" );
Quit();
}
}
}
var arr = new Array();
j = 0;
for( i = 0; i < str.length; ++i ) {
if( i % 2 == 0 ) {
c = str.substr( i, 1 );
arr[j++] = c;
}
}
if( arr.length < 4 ) {
alert( "Input string should contain at least 4 digits" );
Quit();
}
// list all 4-digit combinations
len = arr.length;
str = "";
for( i = 0; i < len; ++i ) {
for( j = 0; j < len; ++j ) {
for( k = 0; k < len; ++k ) {
for( l = 0; l < len; ++l ) {
str += arr[i] + arr[j] + arr[k] + arr[l] + "\r\n";
}
}
}
}
// write the list in a new document
editor.EnableTab = true;
editor.NewFile();
document.write( str );
To run this, save this code as, for instance, GenCombinations.jsee, and then select this file from Select... in the Macros menu. Finally, select Run GenCombinations.jsee in the Macros menu after selecting an input string.

minimum reduced string made up of a,b,c [duplicate]

I have a question which asks us to reduce the string as follows.
The input is a string having only A, B or C. Output must be length of
the reduced string
The string can be reduced by the following rules
If any 2 different letters are adjacent, these two letters can be
replaced by the third letter.
Eg ABA -> CA -> B . So final answer is 1 (length of reduced string)
Eg ABCCCCCCC
This doesn't become CCCCCCCC, as it can be reduced alternatively by
ABCCCCCCC->AACCCCCC->ABCCCCC->AACCCC->ABCCC->AACC->ABC->AA
as here length is 2 < (length of CCCCCCCC)
How do you go about this problem?
Thanks a lot!
To make things clear: the question states it wants the minimum length of the reduced string. So in the second example above there are 2 solutions possible, one CCCCCCCC and the other AA. So 2 is the answer as length of AA is 2 which is smaller than the length of CCCCCCCC = 8.

The way this question is phrased, there are only three distinct possibilities:
If the string has only one unique character, the length is the same as the length of the string.
2/3. If the string contains more than one unique character, the length is either 1 or 2, always (based on the layout of the characters).
Edit:
As a way of proof of concept here is some grammar and its extensions:
I should note that although this seems to me a reasonable proof for the fact that the length will reduce to either 1 or 2, I am reasonably sure that determining which of these lengths will result is not as trivial as I originally thought ( you would still have to recurse through all options to find it out)
S : A|B|C|()
S : S^
where () denotes the empty string, and s^ means any combination of the previous [A,B,C,()] characters.
Extended Grammar:
S_1 : AS^|others
S_2 : AAS^|ABS^|ACS^|others
S_3 : AAAS^|
AABS^ => ACS^ => BS^|
AACS^ => ABS^ => CS^|
ABAS^ => ACS^ => BS^|
ABBS^ => CBS^ => AS^|
ABCS^ => CCS^ | AAS^|
ACAS^ => ABS^ => CS^|
ACBS^ => AAS^ | BBS^|
ACCS^ => BCS^ => AS^|
The same thing will happen with extended grammars starting with B, and C (others). The interesting cases are where we have ACB and ABC (three distinct characters in sequence), these cases result in grammars that appear to lead to longer lengths however:
CCS^: CCAS^|CCBS^|CCCS^|
CBS^ => AS^|
CAS^ => BS^|
CCCS^|
AAS^: AAAS^|AABS^|AACS^|
ACS^ => BS^|
ABS^ => CS^|
AAAS^|
BBS^: BBAS^|BBBS^|BBCS^|
BCS^ => AS^|
BAS^ => CS^|
BBBS^|
Recursively they only lead to longer lengths when the remaining string contains their value only. However we have to remember that this case also can be simplified, since if we got to this area with say CCCS^, then we at one point previous had ABC ( or consequently CBA ). If we look back we could have made better decisions:
ABCCS^ => AACS^ => ABS^ => CS^
CBACS^ => CBBS^ => ABS^ => CS^
So in the best case at the end of the string when we make all the correct decisions we end with a remaining string of 1 character followed by 1 more character(since we are at the end). At this time if the character is the same, then we have a length of 2, if it is different, then we can reduce one last time and we end up with a length of 1.

You can generalize the result based on individual character count of string. The algo is as follows,
traverse through the string and get individual char count.
Lets say if
a = no# of a's in given string
b = no# of b's in given string
c = no# of c's in given string
then you can say that, the result will be,
if((a == 0 && b == 0 && c == 0) ||
(a == 0 && b == 0 && c != 0) ||
(a == 0 && b != 0 && c == 0) ||
(a != 0 && b == 0 && c == 0))
{
result = a+b+c;
}
else if(a != 0 && b != 0 && c != 0)
{
if((a%2 == 0 && b%2 == 0 && c%2 == 0) ||
(a%2 == 1 && b%2 == 1 && c%2 == 1))
result = 2;
else
result = 1;
}
else if((a == 0 && b != 0 && c != 0) ||
(a != 0 && b == 0 && c != 0) ||
(a != 0 && b != 0 && c == 0))
{
if(a%2 == 0 && b%2 == 0 && c%2 == 0)
result = 2;
else
result = 1;
}

I'm assuming that you are looking for the length of the shortest possible string that can be obtained after reduction.
A simple solution would be to explore all possibilities in a greedy manner and hope that it does not explode exponentially. I'm gonna write Python pseudocode here because that's easier to comprehend (at least for me ;)):
from collections import deque
def try_reduce(string):
queue = deque([string])
min_length = len(string)
while queue:
string = queue.popleft()
if len(string) < min_length:
min_length = len(string)
for i in xrange(len(string)-1):
substring = string[i:(i+2)]
if substring == "AB" or substring == "BA":
queue.append(string[:i] + "C" + string[(i+2):])
elif substring == "BC" or substring == "CB":
queue.append(string[:i] + "A" + string[(i+2):])
elif substring == "AC" or substring == "CA":
queue.append(string[:i] + "B" + string[(i+2):])
return min_length
I think the basic idea is clear: you take a queue (std::deque should be just fine), add your string into it, and then implement a simple breadth first search in the space of all possible reductions. During the search, you take the first element from the queue, take all possible substrings of it, execute all possible reductions, and push the reduced strings back to the queue. The entire space is explored when the queue becomes empty.

Let's define an automaton with the following rules (K>=0):
Incoming: A B C
Current: --------------------------
<empty> A B C
A(2K+1) A(2K+2) AB AC
A(2K+2) A(2K+3) AAB AAC
AB CA CB ABC
AAB BA ACB BC
ABC CCA AAB AAC
and all rules obtained by permutations of ABC to get the complete definition.
All input strings using a single letter are irreducible. If the input string contains at least two different letters, the final states like AB or AAB can be reduced to a single letter, and the final states like ABC can be reduced to two letters.
In the ABC case, we still have to prove that the input string can't be reduced to a single letter by another reduction sequence.

Compare two characters at a time and replace if both adjacent characters are not same. To get optimal solution, run once from start of the string and once from end of the string. Return the minimum value.
int same(char* s){
int i=0;
for(i=0;i<strlen(s)-1;i++){
if(*(s+i) == *(s+i+1))
continue;
else
return 0;
}
return 1;
}
int reduceb(char* s){
int ret = 0,a_sum=0,i=0;
int len = strlen(s);
while(1){
i=len-1;
while(i>0){
if ((*(s+i)) == (*(s+i-1))){
i--;
continue;
} else {
a_sum = (*(s+i)) + (*(s+i-1));
*(s+i-1) = SUM - a_sum;
*(s+i) = '\0';
len--;
}
i--;
}
if(same(s) == 1){
return strlen(s);
}
}
}
int reducef(char* s){
int ret = 0,a_sum=0,i=0;
int len = strlen(s);
while(1){
i=0;
while(i<len-1){
if ((*(s+i)) == (*(s+i+1))){
i++;
continue;
} else {
a_sum = (*(s+i)) + (*(s+i+1));
*(s+i) = SUM - a_sum;
int j=i+1;
for(j=i+1;j<len;j++)
*(s+j) = *(s+j+1);
len--;
}
i++;
}
if(same(s) == 1){
return strlen(s);
}
}
}
int main(){
int n,i=0,f=0,b=0;
scanf("%d",&n);
int a[n];
while(i<n){
char* str = (char*)malloc(101);
scanf("%s",str);
char* strd = strdup(str);
f = reducef(str);
b = reduceb(strd);
if( f > b)
a[i] = b;
else
a[i] = f;
free(str);
free(strd);
i++;
}
for(i=0;i<n;i++)
printf("%d\n",a[i]);
}

import java.io.*;
import java.util.*;
class StringSim{
public static void main(String args[]){
Scanner sc = new Scanner(System.in);
StringTokenizer st = new StringTokenizer(sc.nextLine(), " ");
int N = Integer.parseInt(st.nextToken());
String op = "";
for(int i=0;i<N;i++){
String str = sc.nextLine();
op = op + Count(str) + "\n";
}
System.out.println(op);
}
public static int Count( String str){
int min = Integer.MAX_VALUE;
char pre = str.charAt(0);
boolean allSame = true;
//System.out.println("str :" + str);
if(str.length() == 1){
return 1;
}
int count = 1;
for(int i=1;i<str.length();i++){
//System.out.println("pre: -"+ pre +"- char at "+i+" is : -"+ str.charAt(i)+"-");
if(pre != str.charAt(i)){
allSame = false;
char rep = (char)(('a'+'b'+'c')-(pre+str.charAt(i)));
//System.out.println("rep :" + rep);
if(str.length() == 2)
count = 1;
else if(i==1)
count = Count(rep+str.substring(2,str.length()));
else if(i == str.length()-1)
count = Count(str.substring(0,str.length()-2)+rep);
else
count = Count(str.substring(0,i-1)+rep+str.substring(i+1,str.length()));
if(min>count) min=count;
}else if(allSame){
count++;
//System.out.println("count: " + count);
}
pre = str.charAt(i);
}
//System.out.println("min: " + min);
if(allSame) return count;
return min;
}
}

Wouldn't a good start be to count which letter you have the most of and look for ways to remove it? Keep doing this until we only have one letter. We might have it many times but as long as it is the same we do not care, we are finished.
To avoid getting something like ABCCCCCCC becoming CCCCCCCC.
We remove the most popular letter:
-ABCCCCCCC
-AACCCCCC
-ABCCCCC
-AACCCC
-ABCCC
-AACC
-ABC
-AA
I disagree with the previous poster who states we must have a length of 1 or 2 - what happens if I enter the start string AAA?

import java.util.LinkedList;
import java.util.List;
import java.util.Scanner;
public class Sample {
private static char[] res = {'a', 'b', 'c'};
private char replacementChar(char a, char b) {
for(char c : res) {
if(c != a && c != b) {
return c;
}
}
throw new IllegalStateException("cannot happen. you must've mucked up the resource");
}
public int processWord(String wordString) {
if(wordString.length() < 2) {
return wordString.length();
}
String wordStringES = reduceFromEnd(reduceFromStart(wordString));
if(wordStringES.length() == 1) {
return 1;
}
String wordStringSE = reduceFromStart(reduceFromEnd(wordString));
if(wordString.length() == 1) {
return 1;
}
int aLen;
if(isReduced(wordStringSE)) {
aLen = wordStringSE.length();
} else {
aLen = processWord(wordStringSE);
}
int bLen;
if(isReduced(wordStringES)) {
bLen = wordStringES.length();
} else {
bLen = processWord(wordStringES);
}
return Math.min(aLen, bLen);
}
private boolean isReduced(String wordString) {
int length = wordString.length();
if(length < 2) {
return true;
}
for(int i = 1; i < length; ++i) {
if(wordString.charAt(i) != wordString.charAt(i - 1)) {
return false;
}
}
return wordString.charAt(0) == wordString.charAt(length - 1);
}
private String reduceFromStart(String theWord) {
if(theWord.length() < 2) {
return theWord;
}
StringBuilder buffer = new StringBuilder();
char[] word = theWord.toCharArray();
char curChar = word[0];
for(int i = 1; i < word.length; ++i) {
if(word[i] != curChar) {
curChar = replacementChar(curChar, word[i]);
if(i + 1 == word.length) {
buffer.append(curChar);
break;
}
} else {
buffer.append(curChar);
if(i + 1 == word.length) {
buffer.append(curChar);
}
}
}
return buffer.toString();
}
private String reduceFromEnd(String theString) {
if(theString.length() < 2) {
return theString;
}
StringBuilder buffer = new StringBuilder(theString);
int length = buffer.length();
while(length > 1) {
char a = buffer.charAt(0);
char b = buffer.charAt(length - 1);
if(a != b) {
buffer.deleteCharAt(length - 1);
buffer.deleteCharAt(0);
buffer.append(replacementChar(a, b));
length -= 1;
} else {
break;
}
}
return buffer.toString();
}
public void go() {
Scanner scanner = new Scanner(System.in);
int numEntries = Integer.parseInt(scanner.nextLine());
List<Integer> counts = new LinkedList<Integer>();
for(int i = 0; i < numEntries; ++i) {
counts.add((processWord(scanner.nextLine())));
}
for(Integer count : counts) {
System.out.println(count);
}
}
public static void main(String[] args) {
Sample solution = new Sample();
solution.go();
}
}

This is greedy approach and traversing the path starts with each possible pair and checking the min length.
import java.io.*;
import java.util.*;
class StringSim{
public static void main(String args[]){
Scanner sc = new Scanner(System.in);
StringTokenizer st = new StringTokenizer(sc.nextLine(), " ");
int N = Integer.parseInt(st.nextToken());
String op = "";
for(int i=0;i<N;i++){
String str = sc.nextLine();
op = op + Count(str) + "\n";
}
System.out.println(op);
}
public static int Count( String str){
int min = Integer.MAX_VALUE;
char pre = str.charAt(0);
boolean allSame = true;
//System.out.println("str :" + str);
if(str.length() == 1){
return 1;
}
int count = 1;
for(int i=1;i<str.length();i++){
//System.out.println("pre: -"+ pre +"- char at "+i+" is : -"+ str.charAt(i)+"-");
if(pre != str.charAt(i)){
allSame = false;
char rep = (char)(('a'+'b'+'c')-(pre+str.charAt(i)));
//System.out.println("rep :" + rep);
if(str.length() == 2)
count = 1;
else if(i==1)
count = Count(rep+str.substring(2,str.length()));
else if(i == str.length()-1)
count = Count(str.substring(0,str.length()-2)+rep);
else
count = Count(str.substring(0,i-1)+rep+str.substring(i+1,str.length()));
if(min>count) min=count;
}else if(allSame){
count++;
//System.out.println("count: " + count);
}
pre = str.charAt(i);
}
//System.out.println("min: " + min);
if(allSame) return count;
return min;
}
}

Following NominSim's observations, here is probably an optimal solution that runs in linear time with O(1) space usage. Note that it is only capable of finding the length of the smallest reduction, not the reduced string itself:
def reduce(string):
a = string.count('a')
b = string.count('b')
c = string.count('c')
if ([a,b,c].count(0) >= 2):
return a+b+c
elif (all(v % 2 == 0 for v in [a,b,c]) or all(v % 2 == 1 for v in [a,b,c])):
return 2
else:
return 1

There is some underlying structure that can be used to solve this problem in O(n) time.
The rules given are (most of) the rules defining a mathematical group, in particular the group D_2 also sometimes known as K (for Klein's four group) or V (German for Viergruppe, four group). D_2 is a group with four elements, A, B, C, and 1 (the identity element). One of the realizations of D_2 is the set of symmetries of a rectangular box with three different sides. A, B, and C are 180 degree rotations about each of the axes, and 1 is the identity rotation (no rotation). The group table for D_2 is
|1 A B C
-+-------
1|1 A B C
A|A 1 C B
B|B C 1 A
C|C B A 1
As you can see, the rules correspond to the rules given in the problem, except that the rules involving 1 aren't present in the problem.
Since D_2 is a group, it satisfies a number of rules: closure (the product of any two elements of the group is another element), associativity (meaning (x*y)*z = x*(y*z) for any elements x, y, z; i.e., the order in which strings are reduced doesn't matter), existence of identity (there is an element 1 such that 1*x=x*1=x for any x), and existence of inverse (for any element x, there is an element x^{-1} such that x*x^{-1}=1 and x^{-1}*x=1; in our case, every element is its own inverse).
It's also worth noting that D_2 is commutative, i.e., x*y=y*x for any x,y.
Given any string of elements in D_2, we can reduce to a single element in the group in a greedy fashion. For example, ABCCCCCCC=CCCCCCCC=CCCCCC=CCCC=CC=1. Note that we don't write the element 1 unless it's the only element in the string. Associativity tells us that the order of the operations doesn't matter, e.g., we could have worked from right to left or started in the middle and gotten the same result. Let's try from the right: ABCCCCCCC=ABCCCCC=ABCCC=ABC=AA=1.
The situation of the problem is different because operations involving 1 are not allowed, so we can't just eliminate pairs AA, BB, or CC. However, the situation is not that different. Consider the string ABB. We can't write ABB=A in this case. However, we can eliminate BB in two steps using A: ABB=CB=A. Since order of operation doesn't matter by associativity, we're guaranteed to get the same result. So we can't go straight from ABB to A but we can get the same result by another route.
Such alternate routes are available whenever there are at least two different elements in a string. In particular, in each of ABB, ACC, BAA, BCC, CAA, CBB, AAB, AAC, BBA, BBC, CCA, CCB, we can act as if we have the reduction xx=1 and then drop the 1.
It follows that any string that is not homogeneous (not all the same letter) and has a double-letter substring (AA, BB, or CC) can be reduced by removing the double letter. Strings that contain just two identical letters can't be further reduced (because there is no 1 allowed in the problem), so it seems safe to hypothesize that any non-homogeneous string can be reduced to A, B, C, AA, BB, CC.
We still have to be careful, however, because CCAACC could be turned into CCCC by removing the middle pair AA, but that is not the best we can do: CCAACC=AACC=CC or AA takes us down to a string of length 2.
Another situation we have to be careful of is AABBBB. Here we could eliminate AA to end with BBBB, but it's better to eliminate the middle B's first, then whatever: AABBBB=AABB=AA or BB (both of which are equivalent to 1 in the group, but can't be further reduced in the problem).
There's another interesting situation we could have: AAAABBBB. Blindly eliminating pairs takes us to either AAAA or BBBB, but we could do better: AAAABBBB=AAACBBB=AABBBB=AABB=AA or BB.
The above indicate that eliminating doubles blindly is not necessarily the way to proceed, but nevertheless it was illuminating.
Instead, it seems as if the most important property of a string is non-homogeneity. If the string is homogeneous, stop, there's nothing we can do. Otherwise, identify an operation that preserves the non-homogeneity property if possible. I assert that it is always possible to identify an operation that preserves non-homogeneity if the string is non-homogeneous and of length four or greater.
Proof: if a 4-substring contains two different letters, a third letter can be introduced at a boundary between two different letters, e.g., AABA goes to ACA. Since one or the other of the original letters must be unchanged somewhere within the string, it follows that the result is still non-homogeneous.
Suppose instead we have a 4-substring that has three different elements, say AABC, with the outer two elements different. Then if the middle two elements are different, perform the operation on them; the result is non-homogeneous because the two outermost elements are still different. On the other hand, if the two inner elements are the same, e.g., ABBC, then they have to be different from both outermost elements (otherwise we'd only have two elements in the set of four, not three). In that case, perform either the first or third operation; that leaves either the last two elements different (e.g., ABBC=CBC) or the first two elements different (e.g., ABBC=ABA) so non-homogeneity is preserved.
Finally, consider the case where the first and last elements are the same. Then we have a situation like ABCA. The middle two elements both have to be different from the outer elements, otherwise we'd have only two elements in this case, not three. We can take the first available operation, ABCA=CCA, and non-homogeneity is preserved again.
End of proof.
We have a greedy algorithm to reduce any non-homogeneous string of length 4 or greater: pick the first operation that preserves non-homogeneity; such an operation must exist by the above argument.
We have now reduced to the case where we have a non-homogeneous string of 3 elements. If two are the same, we either have doubles like AAB etc., which we know can be reduced to a single element, or we have two elements with no double like ABA=AC=B which can also be reduced to a single element, or we have three different elements like ABC. There are six permutations, all of which =1 in the group by associativity and commutativity; all of them can be reduced to two elements by any operation; however, they can't possibly be reduced below a homogeneous pair (AA, BB, or CC) since 1 is not allowed in the problem, so we know that's the best we can do in this case.
In summary, if a string is homogeneous, there's nothing we can do; if a string is non-homogeneous and =A in the group, it can be reduced to A in the problem by a greedy algorithm which maintains non-homogeneity at each step; the same if the string =B or =C in the group; finally if a string is non-homogeneous and =1 in the group, it can be reduced by a greedy algorithm which maintains non-homogeneity as long as possible to one of AA, BB or CC. Those are the best we can do by the group properties of the operation.
Program solving the problem:
Now, since we know the possible outcomes, our program can run in O(n) time as follows: if all the letters in the given string are the same, no reduction is possible so just output the length of the string. If the string is non-homogeneous, and is equal to the identity in the group, output the number 2; otherwise output the number 1.
To quickly decide whether an element equals the identity in the group, we use commutativity and associativity as follows: just count the number of A's, B's and C's into the variables a, b, c. Replace a = a mod 2, b = b mod 2, c = c mod 2 because we can eliminate pairs AA, BB, and CC in the group. If none of the resulting a, b, c is equal to 0, we have ABC=1 in the group, so the program should output 2 because a reduction to the identity 1 is not possible. If all three of the resulting a, b, c are equal to 0, we again have the identity (A, B, and C all cancelled themselves out) so we should output 2. Otherwise the string is non-identity and we should output 1.

//C# Coding
using System;
using System.Collections.Generic;
namespace ConsoleApplication1
{
class Program
{
static void Main(string[] args)
{
/*
Keep all the rules in Dictionary object 'rules';
key - find string, value - replace with value
eg: find "AB" , replace with "AA"
*/
Dictionary<string, string> rules = new Dictionary<string, string>();
rules.Add("AB", "AA");
rules.Add("BA", "AA");
rules.Add("CB", "CC");
rules.Add("BC", "CC");
rules.Add("AA", "A");
rules.Add("CC", "C");
// example string
string str = "AABBCCCA";
//output
Console.WriteLine(fnRecurence(rules, str));
Console.Read();
}
//funcation for applying all the rules to the input string value recursivily
static string fnRecurence(Dictionary<string, string> rules,string str)
{
foreach (var rule in rules)
{
if (str.LastIndexOf(rule.Key) >= 0)
{
str = str.Replace(rule.Key, rule.Value);
}
}
if(str.Length >1)
{
int find = 0;
foreach (var rule in rules)
{
if (str.LastIndexOf(rule.Key) >= 0)
{
find = 1;
}
}
if(find == 1)
{
str = fnRecurence(rules, str);
}
else
{
//if not find any exit
find = 0;
str = str;
return str;
}
}
return str;
}
}
}

Here is my C# solution.
public static int StringReduction(string str)
{
if (str.Length == 1)
return 1;
else
{
int prevAns = str.Length;
int newAns = 0;
while (prevAns != newAns)
{
prevAns = newAns;
string ansStr = string.Empty;
int i = 1;
int j = 0;
while (i < str.Length)
{
if (str[i] != str[j])
{
if (str[i] != 'a' && str[j] != 'a')
{
ansStr += 'a';
}
else if (str[i] != 'b' && str[j] != 'b')
{
ansStr += 'b';
}
else if (str[i] != 'c' && str[j] != 'c')
{
ansStr += 'c';
}
i += 2;
j += 2;
}
else
{
ansStr += str[j];
i++;
j++;
}
}
if (j < str.Length)
{
ansStr += str[j];
}
str = ansStr;
newAns = ansStr.Length;
}
return newAns;
}
}

Compare two characters at a time and replace if both adjacent characters are not same. To get optimal solution, run once from start of the string and once from end of the string. Return the minimum value.
Rav solution is :-
int same(char* s){
int i=0;
for(i=0;i<strlen(s)-1;i++){
if(*(s+i) == *(s+i+1))
continue;
else
return 0;
}
return 1;
}
int reduceb(char* s){
int ret = 0,a_sum=0,i=0;
int len = strlen(s);
while(1){
i=len-1;
while(i>0){
if ((*(s+i)) == (*(s+i-1))){
i--;
continue;
} else {
a_sum = (*(s+i)) + (*(s+i-1));
*(s+i-1) = SUM - a_sum;
*(s+i) = '\0';
len--;
}
i--;
}
if(same(s) == 1){
return strlen(s);
}
}
}
int reducef(char* s){
int ret = 0,a_sum=0,i=0;
int len = strlen(s);
while(1){
i=0;
while(i<len-1){
if ((*(s+i)) == (*(s+i+1))){
i++;
continue;
} else {
a_sum = (*(s+i)) + (*(s+i+1));
*(s+i) = SUM - a_sum;
int j=i+1;
for(j=i+1;j<len;j++)
*(s+j) = *(s+j+1);
len--;
}
i++;
}
if(same(s) == 1){
return strlen(s);
}
}
}
int main(){
int n,i=0,f=0,b=0;
scanf("%d",&n);
int a[n];
while(i<n){
char* str = (char*)malloc(101);
scanf("%s",str);
char* strd = strdup(str);
f = reducef(str);
b = reduceb(strd);
if( f > b)
a[i] = b;
else
a[i] = f;
free(str);
free(strd);
i++;
}
for(i=0;i<n;i++)
printf("%d\n",a[i]);
}
#Rav
this code will fail for input "abccaccba".
solution should be only "b"
but this code wont give that. Since i am not getting correct comment place(due to low points or any other reason) so i did it here.

This problem can be solved by greedy approach. Try to find the best position to apply transformation until no transformation exists. The best position is the position with max number of distinct neighbors of the transformed character.

You can solve this using 2 pass.
In the first pass you apply
len = strlen (str) ;
index = 0 ;
flag = 0 ;
/* 1st pass */
for ( i = len-1 ; i > 0 ; i -- ) {
if ( str[i] != str[i-1] ) {
str[i-1] = getChar (str[i], str[i-1]) ;
if (i == 1) {
output1[index++] = str[i-1] ;
flag = 1 ;
break ;
}
}
else output1[index++] = str[i] ;
}
if ( flag == 0 )
output1[index++] = str[i] ;
output1[index] = '\0';
And in the 2nd pass you will apply the same on 'output1' to get the result.
So, One is forward pass another one is backward pass.

int previous = a.charAt(0);
boolean same = true;
int c = 0;
for(int i = 0; i < a.length();++i){
c ^= a.charAt(i)-'a'+1;
if(a.charAt(i) != previous) same = false;
}
if(same) return a.length();
if(c==0) return 2;
else return 1;

import java.util.Scanner;
public class StringReduction {
public static void main(String[] args) {
Scanner sc = new Scanner(System.in);
String str = sc.nextLine();
int length = str.length();
String result = stringReduction(str);
System.out.println(result);
}
private static String stringReduction(String str) {
String result = str.substring(0);
if(str.length()<2){
return str;
}
if(str.length() == 2){
return combine(str.charAt(0),str.charAt(1));
}
for(int i =1;i<str.length();i++){
if(str.charAt(i-1) != str.charAt(i)){
String temp = str.substring(0, i-1) + combine(str.charAt(i-1),str.charAt(i)) + str.substring(i+1, str.length());
String sub = stringReduction(temp);
if(sub.length() < result.length()){
result = sub;
}
}
}
return result;
}
private static String combine(char c1, char c2) {
if(c1 == c2){
return "" + c1 + c2;
}
else{
if(c1 == 'a'){
if(c2 == 'b'){
return "" + 'c';
}
if(c2 == 'c') {
return "" + 'b';
}
}
if(c1 == 'b'){
if(c2 == 'a'){
return "" + 'c';
}
if(c2 == 'c') {
return "" + 'a';
}
}
if(c1 == 'c'){
if(c2 == 'a'){
return "" + 'b';
}
if(c2 == 'b') {
return "" + 'a';
}
}
return null;
}
}
}

JAVASCRIPT SOLUTION:
function StringChallenge(str) {
// code goes here
if(str.length == 1) {
return 1;
} else {
let prevAns = str.length;
let newAns = 0;
while(prevAns != newAns) {
prevAns = newAns;
let ansStr = "";
let i = 1;
let j = 0;
while(i < str.length) {
if(str[i] !== str[j]) {
if(str[i] != 'a' && str[j] != 'a') {
ansStr += 'a';
} else if(str[i] != 'b' && str[j] !='b') {
ansStr +='b';
} else if(str[i] != 'c' && str[j] != 'c') {
ansStr += 'c';
}
i += 2;
j += 2;
} else {
ansStr += str[j];
j++;
i++;
}
}
if(j < str.length) {
ansStr += str[j];
}
str = ansStr;
newAns = ansStr.length;
}
return newAns;
}
}

Iterate all possible variants of a sequence

I have a sequence of n letters: e.g. A B C A D , or A B F A
What I want is to get every possible variant with a comma between the letters.
i.e. for A B F A
A B F A
A,B F A
A B,F A
A B F,A
A,B,F A
A B,F,A
A,B F,A
A,B,F,A
Can anyone recommend a good algorithm for doing this? Language not important.

Simple solution to use binary array to represent if there is a comma or not.
A B F A contains three positions where comma may be (AB, BF, FA)
That means if you create 3-element array and try every possible combination of 0 and 1 you'll get the desired result. 000, 001, 010, 011, 100, 101, 110, 111
Simple program in java prints all binary permutation for n bits:
String s = "ABFA";
int bits = s.length() - 1;
int lastNumber = (int)Math.pow(2, bits);
System.out.println(lastNumber);
for (int i = 0; i < lastNumber; i++) {
System.out.println(completeZeros(Integer.toString(i, 2), bits));
}
static String completeZeros(String s, int bits) {
String result = s;
for (int i = 0; i < bits - s.length(); i++) {
result = "0" + result;
}
return result;
}
To apply binary permutation "010" to string "ABFA" use next function:
static String applyBinary(String s, String binary) {
String result = "" + s.charAt(0);
for (int i = 0; i < binary.length(); i++) {
if (binary.charAt(i) == '1') result += ", ";
result += s.charAt(i + 1);
}
return result;
}
The output is:
ABFA
ABF, A
AB, FA
AB, F, A
A, BFA
A, BF, A
A, B, FA
A, B, F, A

Use binary system for this task.
1 means comma is present, 0 means comma is not present. Each position in number informs about presence of another comma. For example for AFA:
00 : A F A
01 : A F,A
10 : A,F A
11 : A,F,A
Numbers must be taken from range [0 .. (n-1)^2-1]

Here's a simple JavaScript demonstration.
var str = "ABFA";
function algo(str) {
var result = [];
var n = str.length;
var total = Math.pow(n-1, 2) - 1;
for(var mask = 0; mask < total; mask++) {
var bits = mask;
var newstr = "";
for(var i=0; i<str.length - 1; i++, bits>>>=1) {
var hasComma = (bits & 1) == 1;
newstr += str.charAt(i);
newstr += (hasComma ? "," : " ");
}
newstr += str.charAt(str.length - 1);
result.push(newstr);
}
return result;
}
algo(str);
You calculate the total number of combinations "total"
You count up to that number "mask"
You use the binary bits of your counter "bits" to add commas

There are two approaches to this problem.
1.Recursive ( Start = "printAllComb ( s , "" , 0 );" )
printAllComb(string s, string const, int i)
{
if ( i == s.length() )
print const
printAllComb(s,const+string.at(i)+',',i+1);
printAllComb(s,const+string.at(i),i+1);
}
2.Dynamic Programming
char binaryS[s.length]="0000";
//Basically no. of zeros = no. of Alphabets in the string
//define a function AddOne() which adds 1 to the character representation
//AddOne() modifies the character array such that it stays in the bit representation
//Characters because to save space
while ( All the bits are not one )
{
for ( int i=0; i<s.length(); i++ )
{
print s.at(i)
if ( binaryS.at(i) == '1' )
print ","
}
print "\n"
AddOne();
}

given a number p , find two elements in array whose product = P

I am looking for solution for :
Given a array and a number P , find two numbers in array whose product equals P.
Looking for solution better than O(n*2) . I am okay with using extra space or other datastructure .Any help is appreciated ?

Make a pass through the array, and add the elements to a Hashtable. For each element x added, check whether P/x already exists in the Hashtable - if it does then x and P/x is one of your solutions. This'd be about as optimal as you'll get.

You can try a sliding window approach. First sort all the numbers increasingly, and then use two integers begin and end to index the current pair of numbers. Initialize begin to 0 and end to the last position. Then compare the product of v[begin] and v[end] with P:
If it is equal, you found the answer.
If it is lower, you must find a bigger product, move begin forward.
If it is higher, you must find a smaller product, move end backward.
Here is a C++ code with this idea implemented. This solution is O(n*log(n)) because of the sorting, if you can assume the data is sorted then you can skip the sorting for an O(n) solution.
pair<int, int> GetProductPair(vector<int>& v, int P) {
sort(v.begin(), v.end());
int begin = 0, end = static_cast<int>(v.size()) - 1;
while (begin < end) {
const int prod = v[begin] * v[end];
if (prod == P) return make_pair(begin, end);
if (prod < P) ++begin;
else --end;
}
return make_pair(-1, -1);
}

This one would work only for integers:
Decompose P as product of prime numbers. By dividing these in two groups you can obtain the pairs that gives P as product. Now you just have to check both of them are present in the array, this is where a hash table would be very useful. Also, while creating the hash table, you could also filter the array of repeating values, values that are greater than P, or even values that have prime factors not contained in P.

Create a hash that will be populated in the steps below.
Iterate over the elements of the array one by one. Say current element is n
If the number P is exactly divisible by n
check if n is one of the values of the hash. If yes then that key, value are the two numbers that we are looking for and we can break.
if n is not in the values of the hash then add n,x in a hash where n*x = P
If the number P is not exactly divisible by n then continue with next element of array
If we reach end of the array then there are no such two numbers in the array whose product is P
This algo is of O(n)

1.sort the numbers into an array A, removing any zeroes, in O(nlogn) time
2.create an array B such that B[i] = P/A[I] in O(n) time
3.for every B[k] in B, do a binary search in A for that element, takes O(nlogn) time in the worst case
if the element B[k] exists in the array A at position m, then A[k] * A[m] = P
otherwise no such pair exists
the total running time is O(nlogn)
Of course this may run into difficulties on a real machine due to floating point error

public boolean factors_Of_Product_In_Array(int a[],int product,int factorsLimit)
{
int i = 0,count = 0;
boolean flag = false;
if(factorsLimit==0)
flag = false;
//If product value is zero - only verify if there is any '0' value in array
else if(product==0)
{
for(i=0;i<a.length;i++)
{
if(a[i]==0)
flag = true;
}
}
//If product value is 1 - Verify at least factorsLimit number of 1's should be present in array
else if(product==1)
{
for(i=0;i<a.length;i++)
{
if(a[i]==0)
count=count+1;
}
if(count==factorsLimit)//Verifying if the number of 1's is equal to number of factors required
flag = true;
}
else
{
for(i=0; i<a.length && count!=factorsLimit ;i++)
{
if(product%a[i]==0)
{
product = product/a[i];
count = count+1;
System.out.println(" "+a[i]+" ");
}
}
if(count==factorsLimit)
flag = true;
}
return flag;
}

Updated to provide the implementation.
O(n+P) solution, ignoring the case of P equal to 0.
#include <stdio.h>
#include <iostream>
#include <vector>
using namespace std;
int main(){
auto arr = new vector<int>();
int P, numOfEle, ele;
cout << "The number of elements to be entered: " << endl;
cin >> numOfEle;
cout << "Please enter the elements: " << endl;
for (int i = 0; i < numOfEle; i++){
cin >> ele;
arr->push_back(ele);
}
cout << "Please enter P: " << endl;
cin >> P;
//O(n+P) solution, ignoring the case of P equal to 0
bool* factorsInNeed = new bool[P];
for (int i = 0; i < P; i++)
factorsInNeed[i] = false;
for (auto i : *arr){
if (i != 0 && P/(double)i == P/i){ //if divisble
if (factorsInNeed[i]){
cout << "A solution: " << i << " & " << P/i << endl;
break;
}
factorsInNeed[P/i] = true;
}
}
}

Here's my shot, it only compares any factors with each other once
P <- The Number
theArray <- new array[theData]
factors <- new array[]
isFactor <- new map(init: false)
factorCount <- 0
i <- 0
while i is in theArray
num <- theArray[i]
if (isFactor[num])
skip
if num modulo P == 0
isFactor[num] <- true
j <- 0
while j is in factors
if factors[j] * num == P
return (num, factors[j])
j++
factors.push(num)
factorCount++
i++

Not sure if this is the best solution but it works. you can try and optimize it.
public class CombInput
{
public int ID;
public string Value;
}
public List<string> GetCombinations(List<string> values)
{
List<CombInput> input = new List<CombInput>();
List<string> outputvalues = new List<string>();
int counter = 1;
foreach (String c in values)
{
input.Add(new CombInput { ID = counter, Value = c });
counter++;
}
var Output = from i in input
select i;
string Final = Output.Select(query => query.Value).Aggregate((a, b) => a + "|" + b);
while (!Output.ToList().Exists(s=>s.Value.ToString()==Final))
{
var store = Output;
var Output1=
(from o in Output
from v in input
where (v.ID < o.ID && !(store.Any(a=>a.Value==v.Value + "|" + o.Value)))
select new CombInput { ID = v.ID, Value = v.Value + "|" + o.Value });
var Outputx = (from s in store select s)
.Concat
(from s in Output1.ToList() select s);
Output = Outputx;
}
foreach (var v in Output)
outputvalues.Add(v.Value);
return outputvalues.ToList();
}
public List<string> GetProductCombinations(List<int> nums, int productCriteria)
{
List<string> input = (from i in nums
select i.ToString()).ToList();
input = GetCombinations(input);
var O = from i in input
where i.Split('|').ToList().Select(x => Convert.ToInt32(x)).ToList().Aggregate((a, b) => a * b) == productCriteria
select i;
List<string> output=new List<string>();
foreach (string o in O)
{
output.Add(o);
}
return output;
}
private void button1_Click(object sender, EventArgs e)
{
List<string> output = new List<string>();
List<int> nums = new List<int>();
int[] numsarr ={1,2,3,4,6,7,8,12};
nums = numsarr.ToList();
output = GetProductCombinations(nums, 12);
}

void PrintPairOfProduct(int arr[],int size,int k)
{
int i,temp[MAX];
memset(temp,1,MAX);
for(i=0;i<size;++i)
{
if(k % arr[i] == 0 && temp[arr[i]] != -1 && temp[k/arr[i]] != -1)
{
if((temp[k/arr[i]] * arr[i]) == k)
{
printf("Product of %d * %d = %d",k/arr[i],arr[i],k);``
temp[k/arr[i]] = -1;
temp[arr[i]] = -1;
}
temp[arr[i]] = arr[i];
}}

#include<stdio.h>
int main()
{
int arr[]={2,15,4,5,6,7};
const int c = 30;
int i = 0,j=1;
int num =0;
while ( i<= 6 )
{
num = arr[i] * arr[j];
if ( num == 30)
{
printf("Pairs[%d,%d]\t",arr[i],arr[j]);
}
if (j == 5 )
{
i = i+1;
j = i + 1;
if (j==6)
{
break;
}
else
{
continue;
}
}
j= j+1;
}
return 0;
}

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio