creating compare string function - string-comparison

I created a function that compares strings, and as I was frustrated about it always missing the last character in the second string and always returning "identical strings" as a result, I noticed that I was messing around and used gets() instead of fgets() for the second string. I changed that and the function works as expected.
My question is, why does the gets() function subtract that last character? Shouldn't it subtract the null and leave it at that?
Does that mean that as a newcomer to C, I should avoid using gets() and focus on fgets() instead? I'm starting to think of them in the same way I think of strcmp() vs strncmp()
Thanks for your time everyone!
Note: I'm aware that I don't really need the (i==j) at the end, I just left it there (extra security, maybe?).
bool compare_string(const char *string1, const char *string2) {
int i = 0, j = 0, result = 0;
while (string1[i] != '\0') {
i++;
}
while (string2[j] != '\0') {
j++;
}
i = 0;
j = 0;
while ((string1[i] != '\0') && (string2[j] != '\0')) {
if (string1[i] < string2[j]) {
result = -1;
break;
} else if (string1[i] > string2[j]) {
result = 1;
break;
} else if (string1[i] == string2[j]) {
result = 0;
}
i++;
j++;
}
if ((result == 0) && (i==j)) {
printf("identical strings \n");
} else if (result == -1) {
printf("not identical, -1 \n");
} else if (result == 1) {
printf("not identical, 1 \n");
}
}
//in main
char str_compare1[STRING_LIMIT];
char str_compare2[STRING_LIMIT];
printf("enter 1st string to compare, (100) characters or less: \n");
fgets(str_compare1, STRING_LIMIT, stdin);
printf("enter 2nd string to compare, (100) characters or less \n");
fgets(str_compare2, STRING_LIMIT, stdin);
result = compare_string(str_compare1, str_compare2);

Related

Any faster way to replace substring in AWK

I have a long string of about 50,000,000 long... , and I am substituting it part by part
cat FILE | tail -n+2 | awk -v k=100 '{
i = 1
while (i<length($0)-k+1) {
x = substr($0, i, k)
if (CONDITION) {
x changed sth
$0 = substr($0,1,i-1) x substr($0,i+k)
}
i += 1
}
gsub(sth,sth,$0)
printf("%s",$0) >> FILE
}'
Are there any ways to replace $0 at position i with x of length k without using this method?
The string is too long and the commands runs extremely slow
sample input:
NNNNNNNNNNggcaaacagaatccagcagcacatcaaaaagcttatccacAGTAATTCATTATATCAAAATGCTCCAggccaggcgtggtggcttatgcc
sample output:
NNNNNNNNNNggcnnncngnnnccngcngcncnncnnnnngcnnnnccncNGNNNNNCNNNNNNNCNNNNNGCNCCNggccnggcgnggnggcnnnngcc
If substring with length k=10 contains >50% of A || a || T || t
(so there are length($0)-k+1 substrings)
substitute A and T with N, a and t with n
The $0 string must maintain it size and sequence (Case sensitive)
EDIT:
I misunderstood the requirement of this problem, and repost the question at here.
Basically:
read a window of characters to two buffers - scratch buffer and output buffer
if in the scratch buffer there are more then some count of characters ATat
then replace all characters ATat in the output buffer buffer to Nn respectively
output one character from the output buffer
flush one character in both buffers
and go to step 1 to repeat reading the characters into buffers
when the end of line is encountered, just flush output buffer and reset it all
A small C program for sure is going to be the fastest:
// The window size
#define N 10
// The percent of the window that has to be equal to one of [AaTt]
#define PERCENT 50
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
// output a string
static void output(char *outme, size_t n) {
fwrite(outme, n, 1, stdout);
}
// is one of [AaTt]
static bool is_one_of_them(char c) {
switch(c) {
case 'A':
case 'a':
case 'T':
case 't':
return true;
}
return false;
}
// Convert one of characters to n/N depending on case
static char convert_them_to_n(char c) {
// switch(c){ case 'T': case 'A': return true; } return false;
// ASCII is assumed
const char m = ~0x1f;
const char w = 'n' & ~m;
return (c & m) | w;
}
static const unsigned threshold = N * PERCENT / 100;
// Store the input in buf
static char buf[N];
// Store the output to-be-outputted in out
static char out[N];
// The current position in buf and out
// The count of readed characters
static size_t pos;
// The count of one of searched characters in buf
static unsigned count_them;
static void buf_reset(void) {
pos = 0;
count_them = 0;
}
static void buf_flush(void) {
output(out, pos);
buf_reset();
}
static void buf_replace_them(void) {
// TODO: this could keep count of characters alrady replaced in out to save CPU
for (size_t i = 0; i < N; ++i) {
if (is_one_of_them(out[i])) {
out[i] = convert_them_to_n(out[i]);
}
}
}
static void buf_flush_one(void) {
assert(pos > 0);
assert(pos == N);
output(out, 1);
count_them -= is_one_of_them(buf[0]);
memmove(buf, buf + 1, pos - 1);
memmove(out, out + 1, pos - 1);
pos--;
}
static void buf_add(char c) {
buf[pos] = out[pos] = c;
pos++;
count_them += is_one_of_them(c);
// if we reached the substring length
if (pos == N) {
// if the count reached the threshold
if (count_them >= threshold) {
// convert the characters to n
buf_replace_them();
}
// flush one character only at a time
buf_flush_one();
}
}
int main() {
int c;
buf_reset();
while ((c = getchar()) != EOF) {
if (c == '\n') {
// If its a newline, just flush what we have buffered
buf_flush();
output("\n", 1);
continue;
}
buf_add(c);
}
buf_flush();
}
Such a C program is easily transferable to for example an awk script, just one need to read one character at a time. Below I split the characters with split, like:
awk -v N=10 -v percent=50 '
BEGIN{ threshold = N * percent / 100; pos=0 }
function is_one_of_them(c) {
return c ~ /^[aAtT]$/;
}
function buf_flush(i) {
for (i = 0; i < pos; ++i) {
printf "%s", out[i]
}
pos = 0
count_them = 0
}
function buf_replace_them(i) {
for (i = 0; i < pos; ++i) {
if (is_one_of_them(out[i])) {
out[i] = out[i] ~ /[AT]/ ? "N" : "n";
}
}
}
function buf_flush_one(i) {
printf "%s", out[0]
count_them -= is_one_of_them(buf[0])
if(0 && debug) {
printf(" count_them %s ", count_them)
for (i = 0; i < pos-1; ++i) {
printf("%s", buf[i+1])
} printf(" ");
for (i = 0; i < pos-1; ++i) {
printf("%s", out[i+1])
}
printf("\n");
}
for (i = 0; i < pos-1; ++i) {
buf[i] = buf[i+1]
out[i] = out[i+1]
}
pos--
}
function buf_add(c) {
buf[pos]=c; out[pos]=c; pos++
count_them += is_one_of_them(c)
if (pos == N) {
if (count_them >= threshold) {
buf_replace_them()
}
buf_flush_one()
}
}
{
split($0, chars, "")
for (idx = 0; idx <= length($0); idx++) {
buf_add(chars[idx])
}
buf_flush();
printf "\n";
}
'
Both programs when run with the input presented in the first line produce the output presented in the second line (note that lone a near the end is not replaced, because there are no 5 charactets ATat in a window of 10 characters from it):
NNNNNNNNNNggcaaacagaatccagcagcacatcaaaaagcttatccacAGTAATTCATTATATCAAAATGCTCCAggccaggcgtggtggcttatgcc
NNNNNNNNNNggcnnncngnnnccngcngcncnncnnnnngcnnnnccncNGNNNNNCNNNNNNNCNNNNNGCNCCNggccaggcgnggnggcnnnngcc
Both solutions were tested on repl.
You need to be careful with how you address this problem. You cannot work on the substituted string. You need to keep track of the original string. Here is a simple example. Assume we have a string consisting of x and y and we want to replace all y with z if there are 8 y in a substring of 10. Imagine your input looks like:
yyyyyyyyxxy
The first substring of 10 reads yyyyyyyyxx and would be translated into zzzzzzzzxx. If you perform the substitution directly into the original string, you get zzzzzzzzxxy. The second substring now reads zzzzzzzxxy, and does not contain 8 times y, while in the original string it does. So according to the solution of the OP, this would lead into inconsistent results, depending on if you start from the front or the back. So a quick solution would be:
awk -v N=10 -v p=50 '
BEGIN { n = N*p/100 }
{ s = $0 }
{ for(i=1;i<=length-N;++i) {
str=substr($0,i,N)
c=gsub(/[AT]/,"N",str) + gsub(/[at]/,"n",str)
if(c >= n) s = substr(s,1,i-1) str substr(s,i+N)
}
}
{ print s }' file
There is ofcourse quite some work you do double here. Imagine you have a string of the form xxyyyyyyyyxx, you would perform 4 concatinations while you only need to do one. So the best idea is to minimalise the work and only check the substrings which end with the respective character:
awk -v N=10 -v p=50 '
BEGIN { n = N*p/100 }
{ s = $0 }
{ i=N; while (match(substr($0,i),/[ATat]/)) {
str=substr($0,i+RSTART-N,N)
c=gsub(/[AT]/,"N",str) + gsub(/[at]/,"n",str)
if(c >= n) { s = substr(s,1,i+RSTART-N-1) str substr(s,i+RSTART)}
i=i+RSTART
}
}
{ print s }' file
To replace $0 at position i with x do:
awk 'BEGIN{i=12345;x="blubber"}
{
printf("%s",substr($0,1,i));
printf("%s",x);
printf("%s",substr($0,i+length(x)));
}'
I don't think there is any faster method.
To replace AGCT with N and agct with n use tr. To replace them only within a range and using awk you should do:
awk 'BEGIN{i=12345;n=123}
{
printf("%s",substr($0,1,i-1));
printf(gsub(/[atgc]/,"n",gsub(/[ATGC]/,"N",substr($0,i,i+n-1))));
printf("%s",substr($0,i+n));
}'
To do more advanced and faster processing you should consider c/c++.

parentheses balanced in C language [closed]

Closed. This question needs debugging details. It is not currently accepting answers.
Edit the question to include desired behavior, a specific problem or error, and the shortest code necessary to reproduce the problem. This will help others answer the question.
Closed 6 years ago.
Improve this question
This is a parentheses balanced code. I submitted and got comment from my professor.
"Your stack is supposed to grow and shrink dynamically to accommodate any number of characters. DO NOT USE scanf for %s, this is risky behavior and Dr. Adams does not approve. Write a helper function to read in characters until newline."
Can you help me to fix this problem?
Your professor is correct and he gave you the solution: do not read a line into a buffer with scanf("%s",...): an arbitrary long line will cause a buffer overflow. You do not need to read a full line, just make check_balanced read one character at a time:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "stack.h"
#define TRUE 1
#define FALSE 0
int check_matching(void);
int main(int argc, char *argv[]) {
int n, i, c;
/* get the number of cases */
if (scanf("%d", &n) != 1) {
printf("invalid number\n");
exit(1);
}
/* consume the rest of the line */
while ((c = getchar()) != EOF && c != '\n')
continue;
for (i = 0; i < n; i++) {
if (check_matching()) {
printf("yes\n");
} else {
printf("no\n");
}
}
return 0;
}
int check_matching(void) {
int ret = TRUE, symbol, checkSymbol;
LinkedStack *pStack;
StackNode *pNode;
StackNode node;
pStack = createLinkedStack();
if (pStack == NULL) {
printf("createLinkedStack failed\n");
exit(1);
}
/* read a full line, one byte at a time */
while ((symbol = getchar()) != EOF && symbol != '\n') {
if (ret == FALSE)
continue;
switch (symbol) {
case '(':
case '[':
case '{':
node.data = symbol;
pushLS(pStack, node);
break;
case ')':
case ']':
case '}':
pNode = popLS(pStack);
if (pNode == NULL) {
ret = FALSE;
break;
} else {
checkSymbol = pNode->data;
if ((symbol == ')' && checkSymbol == '(')
|| (symbol == ']' && checkSymbol == '[')
|| (symbol == '}' && checkSymbol == '{')) {
// Right case. do nothing.
} else {
ret = FALSE;
}
free(pNode);
}
break;
}
}
if (isLinkedStackEmpty(pStack) == FALSE) {
ret = FALSE;
}
deleteLinkedStack(pStack);
return ret;
}

Finding shortest repeating cycle in word?

I'm about to write a function which, would return me a shortest period of group of letters which would eventually create the given word.
For example word abkebabkebabkeb is created by repeated abkeb word. I would like to know, how efficiently analyze input word, to get the shortest period of characters creating input word.
Here is a correct O(n) algorithm. The first for loop is the table building portion of KMP. There are various proofs that it always runs in linear time.
Since this question has 4 previous answers, none of which are O(n) and correct, I heavily tested this solution for both correctness and runtime.
def pattern(inputv):
if not inputv:
return inputv
nxt = [0]*len(inputv)
for i in range(1, len(nxt)):
k = nxt[i - 1]
while True:
if inputv[i] == inputv[k]:
nxt[i] = k + 1
break
elif k == 0:
nxt[i] = 0
break
else:
k = nxt[k - 1]
smallPieceLen = len(inputv) - nxt[-1]
if len(inputv) % smallPieceLen != 0:
return inputv
return inputv[0:smallPieceLen]
O(n) solution. Assumes that the entire string must be covered. The key observation is that we generate the pattern and test it, but if we find something along the way that doesn't match, we must include the entire string that we already tested, so we don't have to reobserve those characters.
def pattern(inputv):
pattern_end =0
for j in range(pattern_end+1,len(inputv)):
pattern_dex = j%(pattern_end+1)
if(inputv[pattern_dex] != inputv[j]):
pattern_end = j;
continue
if(j == len(inputv)-1):
print pattern_end
return inputv[0:pattern_end+1];
return inputv;
This is an example for PHP:
<?php
function getrepeatedstring($string) {
if (strlen($string)<2) return $string;
for($i = 1; $i<strlen($string); $i++) {
if (substr(str_repeat(substr($string, 0, $i),strlen($string)/$i+1), 0, strlen($string))==$string)
return substr($string, 0, $i);
}
return $string;
}
?>
Most easiest one in python:
def pattern(self, s):
ans=(s+s).find(s,1,-1)
return len(pat) if ans == -1 else ans
I believe there is a very elegant recursive solution. Many of the proposed solutions solve the extra complexity where the string ends with part of the pattern, like abcabca. But I do not think that is asked for.
My solution for the simple version of the problem in clojure:
(defn find-shortest-repeating [pattern string]
(if (empty? (str/replace string pattern ""))
pattern
(find-shortest-repeating (str pattern (nth string (count pattern))) string)))
(find-shortest-repeating "" "abcabcabc") ;; "abc"
But be aware that this will not find patterns that are uncomplete at the end.
I found a solution based on your post, that could take an incomplete pattern:
(defn find-shortest-repeating [pattern string]
(if (or (empty? (clojure.string/split string (re-pattern pattern)))
(empty? (second (clojure.string/split string (re-pattern pattern)))))
pattern
(find-shortest-repeating (str pattern (nth string (count pattern))) string)))
My Solution:
The idea is to find a substring from the position zero such that it becomes equal to the adjacent substring of same length, when such a substring is found return the substring. Please note if no repeating substring is found I am printing the entire input String.
public static void repeatingSubstring(String input){
for(int i=0;i<input.length();i++){
if(i==input.length()-1){
System.out.println("There is no repetition "+input);
}
else if(input.length()%(i+1)==0){
int size = i+1;
if(input.substring(0, i+1).equals(input.substring(i+1, i+1+size))){
System.out.println("The subString which repeats itself is "+input.substring(0, i+1));
break;
}
}
}
}
This is a solution I came up with using the queue, it passed all the test cases of a similar problem in codeforces. Problem No is 745A.
#include<bits/stdc++.h>
using namespace std;
typedef long long ll;
int main()
{
ios_base::sync_with_stdio(false);
cin.tie(NULL);
string s, s1, s2; cin >> s; queue<char> qu; qu.push(s[0]); bool flag = true; int ind = -1;
s1 = s.substr(0, s.size() / 2);
s2 = s.substr(s.size() / 2);
if(s1 == s2)
{
for(int i=0; i<s1.size(); i++)
{
s += s1[i];
}
}
//cout << s1 << " " << s2 << " " << s << "\n";
for(int i=1; i<s.size(); i++)
{
if(qu.front() == s[i]) {qu.pop();}
qu.push(s[i]);
}
int cycle = qu.size();
/*queue<char> qu2 = qu; string str = "";
while(!qu2.empty())
{
cout << qu2.front() << " ";
str += qu2.front();
qu2.pop();
}*/
while(!qu.empty())
{
if(s[++ind] != qu.front()) {flag = false; break;}
qu.pop();
}
flag == true ? cout << cycle : cout << s.size();
return 0;
}
Simpler answer which I can come up in an interview is just a O(n^2) solution, which tries out all combinations of substring starting from 0.
int findSmallestUnit(string str){
for(int i=1;i<str.length();i++){
int j=0;
for(;j<str.length();j++){
if(str[j%i] != str[j]){
break;
}
}
if(j==str.length()) return str.substr(0,i);
}
return str;
}
Now if someone is interested in O(n) solution to this problem in c++:
int findSmallestUnit(string str){
vector<int> lps(str.length(),0);
int i=1;
int len=0;
while(i<str.length()){
if(str[i] == str[len]){
len++;
lps[i] = len;
i++;
}
else{
if(len == 0) i++;
else{
len = lps[len-1];
}
}
}
int n=str.length();
int x = lps[n-1];
if(n%(n-x) == 0){
return str.substr(0,n-x);
}
return str;
}
The above is just #Buge's answer in c++, since someone asked in comments.
Regex solution:
Use the following regex replacement to find the shortest repeating substring, and only keeping that substring:
^(.+?)\1*$
$1
Explanation:
^(.+?)\1*$
^ $ # Start and end, to match the entire input-string
( ) # Capture group 1:
.+ # One or more characters,
? # with a reluctant instead of greedy match†
\1* # Followed by the first capture group repeated zero or more times
$1 # Replace the entire input-string with the first capture group match,
# removing all other duplicated substrings
† Greedy vs reluctant would in this case mean: greedy = consumes as many characters as it can; reluctant = consumes as few characters as it can. Since we want the shortest repeating substring, we would want a reluctant match in our regex.
Example input: "abkebabkebabkeb"
Example output: "abkeb"
Try it online in Retina.
Here an example implementation in Java.
Super delayed answer, but I got the question in an interview, here was my answer (probably not the most optimal but it works for strange test cases as well).
private void run(String[] args) throws IOException {
File file = new File(args[0]);
BufferedReader buffer = new BufferedReader(new FileReader(file));
String line;
while ((line = buffer.readLine()) != null) {
ArrayList<String> subs = new ArrayList<>();
String t = line.trim();
String out = null;
for (int i = 0; i < t.length(); i++) {
if (t.substring(0, t.length() - (i + 1)).equals(t.substring(i + 1, t.length()))) {
subs.add(t.substring(0, t.length() - (i + 1)));
}
}
subs.add(0, t);
for (int j = subs.size() - 2; j >= 0; j--) {
String match = subs.get(j);
int mLength = match.length();
if (j != 0 && mLength <= t.length() / 2) {
if (t.substring(mLength, mLength * 2).equals(match)) {
out = match;
break;
}
} else {
out = match;
}
}
System.out.println(out);
}
}
Testcases:
abcabcabcabc
bcbcbcbcbcbcbcbcbcbcbcbcbcbc
dddddddddddddddddddd
adcdefg
bcbdbcbcbdbc
hellohell
Code returns:
abc
bc
d
adcdefg
bcbdbc
hellohell
Works in cases such as bcbdbcbcbdbc.
function smallestRepeatingString(sequence){
var currentRepeat = '';
var currentRepeatPos = 0;
for(var i=0, ii=sequence.length; i<ii; i++){
if(currentRepeat[currentRepeatPos] !== sequence[i]){
currentRepeatPos = 0;
// Add next character available to the repeat and reset i so we don't miss any matches inbetween
currentRepeat = currentRepeat + sequence.slice(currentRepeat.length, currentRepeat.length+1);
i = currentRepeat.length-1;
}else{
currentRepeatPos++;
}
if(currentRepeatPos === currentRepeat.length){
currentRepeatPos = 0;
}
}
// If repeat wasn't reset then we didn't find a full repeat at the end.
if(currentRepeatPos !== 0){ return sequence; }
return currentRepeat;
}
I came up with a simple solution that works flawlessly even with very large strings.
PHP Implementation:
function get_srs($s){
$hash = md5( $s );
$i = 0; $p = '';
do {
$p .= $s[$i++];
preg_match_all( "/{$p}/", $s, $m );
} while ( ! hash_equals( $hash, md5( implode( '', $m[0] ) ) ) );
return $p;
}

How to find validity of a string of parentheses, curly brackets and square brackets?

I recently came in contact with this interesting problem. You are given a string containing just the characters '(', ')', '{', '}', '[' and ']', for example, "[{()}]", you need to write a function which will check validity of such an input string, function may be like this:
bool isValid(char* s);
these brackets have to close in the correct order, for example "()" and "()[]{}" are all valid but "(]", "([)]" and "{{{{" are not!
I came out with following O(n) time and O(n) space complexity solution, which works fine:
Maintain a stack of characters.
Whenever you find opening braces '(', '{' OR '[' push it on the stack.
Whenever you find closing braces ')', '}' OR ']' , check if top of stack is corresponding opening bracket, if yes, then pop the stack, else break the loop and return false.
Repeat steps 2 - 3 until end of the string.
This works, but can we optimize it for space, may be constant extra space, I understand that time complexity cannot be less than O(n) as we have to look at every character.
So my question is can we solve this problem in O(1) space?
With reference to the excellent answer from Matthieu M., here is an implementation in C# that seems to work beautifully.
/// <summary>
/// Checks to see if brackets are well formed.
/// Passes "Valid parentheses" challenge on www.codeeval.com,
/// which is a programming challenge site much like www.projecteuler.net.
/// </summary>
/// <param name="input">Input string, consisting of nothing but various types of brackets.</param>
/// <returns>True if brackets are well formed, false if not.</returns>
static bool IsWellFormedBrackets(string input)
{
string previous = "";
while (input.Length != previous.Length)
{
previous = input;
input = input
.Replace("()", String.Empty)
.Replace("[]", String.Empty)
.Replace("{}", String.Empty);
}
return (input.Length == 0);
}
Essentially, all it does is remove pairs of brackets until there are none left to remove; if there is anything left the brackets are not well formed.
Examples of well formed brackets:
()[]
{()[]}
Example of malformed brackets:
([)]
{()[}]
Actually, there's a deterministic log-space algorithm due to Ritchie and Springsteel: http://dx.doi.org/10.1016/S0019-9958(72)90205-7 (paywalled, sorry not online). Since we need log bits to index the string, this is space-optimal.
If you're willing to accept one-sided error, then there's an algorithm that uses n polylog(n) time and polylog(n) space: http://www.eccc.uni-trier.de/report/2009/119/
If the input is read-only, I don't think we can do O(1) space. It is a well known fact that any O(1) space decidable language is regular (i.e writeable as a regular expression). The set of strings you have is not a regular language.
Of course, this is about a Turing Machine. I would expect it to be true for fixed word RAM machines too.
Edit: Although simple, this algorithm is actually O(n^2) in terms of character comparisons. To demonstrate it, one can simply generate a string as '(' * n + ')' * n.
I have a simple, though perhaps erroneous idea, that I will submit to your criticisms.
It's a destructive algorithm, which means that if you ever need the string it would not help (since you would need to copy it down).
Otherwise, the algorithm work with a simple index within the current string.
The idea is to remove pairs one after the others:
([{}()])
([()])
([])
()
empty -> OK
It is based on the simple fact that if we have matching pairs, then at least one is of the form () without any pair character in between.
Algorithm:
i := 0
Find a matching pair from i. If none is found, then the string is not valid. If one is found, let i be the index of the first character.
Remove [i:i+1] from the string
If i is at the end of the string, and the string is not empty, it's a failure.
If [i-1:i] is a matching pair, i := i-1 and back to 3.
Else, back to 1.
The algorithm is O(n) in complexity because:
each iteration of the loop removes 2 characters from the string
the step 2., which is linear, is naturally bound (i cannot grow indefinitely)
And it's O(1) in space because only the index is required.
Of course, if you can't afford to destroy the string, then you'll have to copy it, and that's O(n) in space so no real benefit there!
Unless, of course, I am deeply mistaken somewhere... and perhaps someone could use the original idea (there is a pair somewhere) to better effect.
I doubt you'll find a better solution, since even if you use internal functions to regexp or count occurrences, they still have a O(...) cost. I'd say your solution is the best :)
To optimize for space you could do some run-length encoding on your stack, but I doubt it would gain you very much, except in cases like {{{{{{{{{{}}}}}}}}}}.
http://www.sureinterview.com/shwqst/112007
It is natural to solve this problem with a stack.
If only '(' and ')' are used, the stack is not necessary. We just need to maintain a counter for the unmatched left '('. The expression is valid if the counter is always non-negative during the match and is zero at the end.
In general case, although the stack is still necessary, the depth of the stack can be reduced by using a counter for unmatched braces.
This is an working java code where I filter out the brackets from the string expression and then check the well formedness by replacing wellformed braces by nulls
Sample input = (a+{b+c}-[d-e])+[f]-[g] FilterBrackets will output = ({}[])[][] Then I check for wellformedness.
Comments welcome.
public class ParanString {
public static void main(String[] args) {
String s = FilterBrackets("(a+{b+c}-[d-e])[][]");
while ((s.length()!=0) && (s.contains("[]")||s.contains("()")||s.contains("{}")))
{
//System.out.println(s.length());
//System.out.println(s);
s = s.replace("[]", "");
s = s.replace("()", "");
s = s.replace("{}", "");
}
if(s.length()==0)
{
System.out.println("Well Formed");
}
else
{
System.out.println("Not Well Formed");
}
}
public static String FilterBrackets(String str)
{
int len=str.length();
char arr[] = str.toCharArray();
String filter = "";
for (int i = 0; i < len; i++)
{
if ((arr[i]=='(') || (arr[i]==')') || (arr[i]=='[') || (arr[i]==']') || (arr[i]=='{') || (arr[i]=='}'))
{
filter=filter+arr[i];
}
}
return filter;
}
}
The following modification of Sbusidan's answer is O(n2) time complex but O(log n) space simple.
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
char opposite(char bracket) {
switch(bracket) {
case '[':
return ']';
case '(':
return ')';
}
}
bool is_balanced(int length, char *s) {
int depth, target_depth, index;
char target_bracket;
if(length % 2 != 0) {
return false;
}
for(target_depth = length/2; target_depth > 0; target_depth--) {
depth=0;
for(index = 0; index < length; index++) {
switch(s[index]) {
case '(':
case '[':
depth++;
if(depth == target_depth) target_bracket = opposite(s[index]);
break;
case ')':
case ']':
if(depth == 0) return false;
if(depth == target_depth && s[index] != target_bracket) return false;
depth--;
break;
}
}
}
}
void main(char* argv[]) {
char input[] = "([)[(])]";
char *balanced = is_balanced(strlen(input), input) ? "balanced" : "imbalanced";
printf("%s is %s.\n", input, balanced);
}
If you can overwrite the input string (not reasonable in the use cases I envision, but what the heck...) you can do it in constant space, though I believe the time requirement goes up to O(n2).
Like this:
string s = input
char c = null
int i=0
do
if s[i] isAOpenChar()
c = s[i]
else if
c = isACloseChar()
if closeMatchesOpen(s[i],c)
erase s[i]
while s[--i] != c ;
erase s[i]
c == null
i = 0; // Not optimal! It would be better to back up until you find an opening character
else
return fail
end if
while (s[++i] != EOS)
if c==null
return pass
else
return fail
The essence of this is to use the early part of the input as the stack.
I know I'm a little late to this party; it's also my very first post on StackOverflow.
But when I looked through the answers, I thought I might be able to come up with a better solution.
So my solution is to use a few pointers.
It doesn't even have to use any RAM storage, as registers can be used for this.
I have not tested the code; it's written it on the fly.
You'll need to fix my typos, and debug it, but I believe you'll get the idea.
Memory usage: Only the CPU registers in most cases.
CPU usage: It depends, but approximately twice the time it takes to read the string.
Modifies memory: No.
b: string beginning, e: string end.
l: left position, r: right position.
c: char, m: match char
if r reaches the end of the string, we have a success.
l goes backwards from r towards b.
Whenever r meets a new start kind, set l = r.
when l reaches b, we're done with the block; jump to beginning of next block.
const char *chk(const char *b, int len) /* option 2: remove int len */
{
char c, m;
const char *l, *r;
e = &b[len]; /* option 2: remove. */
l = b;
r = b;
while(r < e) /* option 2: change to while(1) */
{
c = *r++;
/* option 2: if(0 == c) break; */
if('(' == c || '{' == c || '[' == c)
{
l = r;
}
else if(')' == c || ']' == c || '}' == c)
{
/* find 'previous' starting brace */
m = 0;
while(l > b && '(' != m && '[' != m && '{' != m)
{
m = *--l;
}
/* now check if we have the correct one: */
if(((m & 1) + 1 + m) != c) /* cryptic: convert starting kind to ending kind and match with c */
{
return(r - 1); /* point to error */
}
if(l <= b) /* did we reach the beginning of this block ? */
{
b = r; /* set new beginning to 'head' */
l = b; /* obsolete: make left is in range. */
}
}
}
m = 0;
while(l > b && '(' != m && '[' != m && '{' != m)
{
m = *--l;
}
return(m ? l : NULL); /* NULL-pointer for OK */
}
After thinking about this approach for a while, I realized that it will not work as it is right now.
The problem will be that if you have "[()()]", it'll fail when reaching the ']'.
But instead of deleting the proposed solution, I'll leave it here, as it's actually not impossible to make it work, it does require some modification, though.
/**
*
* #author madhusudan
*/
public class Main {
/**
* #param args the command line arguments
*/
public static void main(String[] args) {
new Main().validateBraces("()()()()(((((())))))()()()()()()()()");
// TODO code application logic here
}
/**
* #Use this method to validate braces
*/
public void validateBraces(String teststr)
{
StringBuffer teststr1=new StringBuffer(teststr);
int ind=-1;
for(int i=0;i<teststr1.length();)
{
if(teststr1.length()<1)
break;
char ch=teststr1.charAt(0);
if(isClose(ch))
break;
else if(isOpen(ch))
{
ind=teststr1.indexOf(")", i);
if(ind==-1)
break;
teststr1=teststr1.deleteCharAt(ind).deleteCharAt(i);
}
else if(isClose(ch))
{
teststr1=deleteOpenBraces(teststr1,0,i);
}
}
if(teststr1.length()>0)
{
System.out.println("Invalid");
}else
{
System.out.println("Valid");
}
}
public boolean isOpen(char ch)
{
if("(".equals(Character.toString(ch)))
{
return true;
}else
return false;
}
public boolean isClose(char ch)
{
if(")".equals(Character.toString(ch)))
{
return true;
}else
return false;
}
public StringBuffer deleteOpenBraces(StringBuffer str,int start,int end)
{
char ar[]=str.toString().toCharArray();
for(int i=start;i<end;i++)
{
if("(".equals(ar[i]))
str=str.deleteCharAt(i).deleteCharAt(end);
break;
}
return str;
}
}
Instead of putting braces into the stack, you could use two pointers to check the characters of the string. one start from the beginning of the string and the other start from end of the string. something like
bool isValid(char* s) {
start = find_first_brace(s);
end = find_last_brace(s);
while (start <= end) {
if (!IsPair(start,end)) return false;
// move the pointer forward until reach a brace
start = find_next_brace(start);
// move the pointer backward until reach a brace
end = find_prev_brace(end);
}
return true;
}
Note that there are some corner case not handled.
I think that you can implement an O(n) algorithm. Simply you have to initialise an counter variable for each type: curly, square and normal brackets. After than you should iterate the string and should increase the coresponding counter if the bracket is opened, otherwise to decrease it. If the counter is negative return false. AfterI think that you can implement an O(n) algorithm. Simply you have to initialise an counter variable for each type: curly, square and normal brackets. After than you should iterate the string and should increase the coresponding counter if the bracket is opened, otherwise to decrease it. If the counter is negative return false. After you count all brackets, you should check if all counters are zero. In that case, the string is valid and you should return true.
You could provide the value and check if its a valid one, it would print YES otherwise it would print NO
static void Main(string[] args)
{
string value = "(((([{[(}]}]))))";
List<string> jj = new List<string>();
if (!(value.Length % 2 == 0))
{
Console.WriteLine("NO");
}
else
{
bool isValid = true;
List<string> items = new List<string>();
for (int i = 0; i < value.Length; i++)
{
string item = value.Substring(i, 1);
if (item == "(" || item == "{" || item == "[")
{
items.Add(item);
}
else
{
string openItem = items[items.Count - 1];
if (((item == ")" && openItem == "(")) || (item == "}" && openItem == "{") || (item == "]" && openItem == "["))
{
items.RemoveAt(items.Count - 1);
}
else
{
isValid = false;
break;
}
}
}
if (isValid)
{
Console.WriteLine("Yes");
}
else
{
Console.WriteLine("NO");
}
}
Console.ReadKey();
}
var verify = function(text)
{
var symbolsArray = ['[]', '()', '<>'];
var symbolReg = function(n)
{
var reg = [];
for (var i = 0; i < symbolsArray.length; i++) {
reg.push('\\' + symbolsArray[i][n]);
}
return new RegExp('(' + reg.join('|') + ')','g');
};
// openReg matches '(', '[' and '<' and return true or false
var openReg = symbolReg(0);
// closeReg matches ')', ']' and '>' and return true or false
var closeReg = symbolReg(1);
// nestTest matches openSymbol+anyChar+closeSymbol
// and returns an obj with the match str and it's start index
var nestTest = function(symbols, text)
{
var open = symbols[0]
, close = symbols[1]
, reg = new RegExp('(\\' + open + ')([\\s\\S])*(\\' + close + ')','g')
, test = reg.exec(text);
if (test) return {
start: test.index,
str: test[0]
};
else return false;
};
var recursiveCheck = function(text)
{
var i, nestTests = [], test, symbols;
// nestTest with each symbol
for (i = 0; i < symbolsArray.length; i++)
{
symbols = symbolsArray[i];
test = nestTest(symbols, text);
if (test) nestTests.push(test);
}
// sort tests by start index
nestTests.sort(function(a, b)
{
return a.start - b.start;
});
if (nestTests.length)
{
// build nest data: calculate match end index
for (i = 0; i < nestTests.length; i++)
{
test = nestTests[i];
var end = test.start + ( (test.str) ? test.str.length : 0 );
nestTests[i].end = end;
var last = (nestTests[i + 1]) ? nestTests[i + 1].index : text.length;
nestTests[i].pos = text.substring(end, last);
}
for (i = 0; i < nestTests.length; i++)
{
test = nestTests[i];
// recursive checks what's after the nest
if (test.pos.length && !recursiveCheck(test.pos)) return false;
// recursive checks what's in the nest
if (test.str.length) {
test.str = test.str.substring(1, test.str.length - 1);
return recursiveCheck(test.str);
} else return true;
}
} else {
// if no nests then check for orphan symbols
var closeTest = closeReg.test(text);
var openTest = openReg.test(text);
return !(closeTest || openTest);
}
};
return recursiveCheck(text);
};
Using c# OOPS programming... Small and simple solution
Console.WriteLine("Enter the string");
string str = Console.ReadLine();
int length = str.Length;
if (length % 2 == 0)
{
while (length > 0 && str.Length > 0)
{
for (int i = 0; i < str.Length; i++)
{
if (i + 1 < str.Length)
{
switch (str[i])
{
case '{':
if (str[i + 1] == '}')
str = str.Remove(i, 2);
break;
case '(':
if (str[i + 1] == ')')
str = str.Remove(i, 2);
break;
case '[':
if (str[i + 1] == ']')
str = str.Remove(i, 2);
break;
}
}
}
length--;
}
if(str.Length > 0)
Console.WriteLine("Invalid input");
else
Console.WriteLine("Valid input");
}
else
Console.WriteLine("Invalid input");
Console.ReadKey();
This is my solution to the problem.
O(n) is the complexity of time without complexity of space.
Code in C.
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
bool checkBraket(char *s)
{
int curly = 0, rounded = 0, squre = 0;
int i = 0;
char ch = s[0];
while (ch != '\0')
{
if (ch == '{') curly++;
if (ch == '}') {
if (curly == 0) {
return false;
} else {
curly--; }
}
if (ch == '[') squre++;
if (ch == ']') {
if (squre == 0) {
return false;
} else {
squre--;
}
}
if (ch == '(') rounded++;
if (ch == ')') {
if (rounded == 0) {
return false;
} else {
rounded--;
}
}
i++;
ch = s[i];
}
if (curly == 0 && rounded == 0 && squre == 0){
return true;
}
else {
return false;
}
}
void main()
{
char mystring[] = "{{{{{[(())}}]}}}";
int answer = checkBraket(mystring);
printf("my answer is %d\n", answer);
return;
}

using sscanf(), read string to array of int?

i have this string:
12 4 the quick 99 -1 fox dog \
what i want in my program:
myArray[] = {12, 4, 99, -1};
how i do a multiple number scanning?
See my answer to your other question here. It's a relatively simple matter to replace the strtok section to recognize non-numeric words and neither increment the count (in the first pass) nor load them into the array (in the second pass).
The code has changed as follows:
Using an input file of:
12 3 45 6 7 8
3 5 6 7
7 0 -1 4 5
12 4 the quick 99 -1 fox dog \
it produces output along the lines of:
0x8e42170, size = 6:
12 3 45 6 7 8
0x8e421d0, size = 4:
3 5 6 7
0x8e421e0, size = 5:
7 0 -1 4 5
0x8e42278, size = 4:
12 4 99 -1
Here's the code that produced that output:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
// This is the linked list of integer arrays.
typedef struct _tIntArray {
int size;
int *array;
struct _tIntArray *next;
} tIntArray;
static tIntArray *first = NULL;
static tIntArray *last = NULL;
// Check that argument is numeric, optional minus sign followed by
// zero or more digits (you may want one or more).
static int isAllNumeric (char *word) {
char *s = word;
if (*s == '-')
s++;
for (; *s != '\0'; s++)
if ((*s < '0') || (*s > '9'))
return 0;
return 1;
}
// Add a line of integers as a node.
static int addNode (char *str) {
tIntArray *curr; // pointers for new integer array.
char *word; // word within string.
char *tmpStr; // temp copy of buffer.
int fldCnt; // field count for line.
int i;
// Count number of fields.
if ((tmpStr = strdup (str)) == NULL) {
printf ("Cannot allocate duplicate string (%d).\n", errno);
return 1;
}
fldCnt = 0;
for (word = strtok (tmpStr, " "); word; word = strtok (NULL, " "))
if (isAllNumeric (word))
fldCnt++;
free (tmpStr);
// Create new linked list node.
if ((curr = malloc (sizeof (tIntArray))) == NULL) {
printf ("Cannot allocate integer array node (%d).\n", errno);
return 1;
}
curr->size = fldCnt;
if ((curr->array = malloc (fldCnt * sizeof (int))) == NULL) {
printf ("Cannot allocate integer array (%d).\n", errno);
free (curr);
return 1;
}
curr->next = NULL;
for (i = 0, word = strtok (str, " "); word; word = strtok (NULL, " "))
if (isAllNumeric (word))
curr->array[i++] = atoi (word);
if (last == NULL)
first = last = curr;
else {
last->next = curr;
last = curr;
}
return 0;
}
int main(void) {
int lineSz; // current line size.
char *buff; // buffer to hold line.
FILE *fin; // input file handle.
long offset; // offset for re-allocating line buffer.
tIntArray *curr; // pointers for new integer array.
int i;
// Open file.
if ((fin = fopen ("qq.in", "r")) == NULL) {
printf ("Cannot open qq.in, errno = %d\n", errno);
return 1;
}
// Allocate initial line.
lineSz = 2;
if ((buff = malloc (lineSz+1)) == NULL) {
printf ("Cannot allocate initial memory, errno = %d.\n", errno);
return 1;
}
// Loop forever.
while (1) {
// Save offset in case we need to re-read.
offset = ftell (fin);
// Get line, exit if end of file.
if (fgets (buff, lineSz, fin) == NULL)
break;
// If no newline, assume buffer wasn't big enough.
if (buff[strlen(buff)-1] != '\n') {
// Get bigger buffer and seek back to line start and retry.
free (buff);
lineSz += 3;
if ((buff = malloc (lineSz+1)) == NULL) {
printf ("Cannot allocate extra memory, errno = %d.\n", errno);
return 1;
}
if (fseek (fin, offset, SEEK_SET) != 0) {
printf ("Cannot seek, errno = %d.\n", errno);
return 1;
}
continue;
}
// Remove newline and process.
buff[strlen(buff)-1] = '\0';
if (addNode (buff) != 0)
return 1;
}
// Dump table for debugging.
for (curr = first; curr != NULL; curr = curr->next) {
printf ("%p, size = %d:\n ", curr, curr->size);
for (i = 0; i < curr->size; i++)
printf (" %d", curr->array[i]);
printf ("\n");
}
// Free resources and exit.
free (buff);
fclose (fin);
return 0;
}

Resources