I wanted to make a custom ITOA function to put large numbers into small strings, this is what I have coded :
main(){
printf("itoa(2000000000,36)= '%s'",itoa(2000000000,36));
printf("itoa(36,36)= '%s'",itoa(36,36));
printf("itoa(37,36)= '%s'",itoa(37,36));
return 1;
}
stock itoa(val, base)
{
new buf[1024] = {0,...};
new i = 1023;
new LETTERZ[37] = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z',0};
for(; val && i; --i, val /= base)
buf[i] = LETTERZ[val % base];
return buf[i+1];
}
It's based on 'C' code from this page: http://www.jb.man.ac.uk/~slowe/cpp/itoa.html
But somehow this is the output:
[20:34:35] itoa(2000000000,36)= 'X'
[20:34:35] itoa(36,36)= '1'
[20:34:35] itoa(37,36)= '1'
And this is totally wrong, I don't know which output to expect but 36 and 37 for sure can't be the same output and 2 000 000 000 can't be just 'X', as X is suposed to be 35, not 2 000 000 000,
ZZ should be 1295 I think... I want to base this on the hexadecimal system, but with all the alfabet letters.
Could anyone tell me what's wrong here?
I'm working with a typeless language called PAWN (also known as SMALL) and later i want to use this code in VB.NET
/* itoa example */
#include <stdio.h>
#include <stdlib.h>
int main ()
{
int i;
char buffer [33];
printf ("Enter a number: ");
scanf ("%d",&i);
itoa (i,buffer,10);
printf ("decimal: %s\n",buffer);
itoa (i,buffer,16);
printf ("hexadecimal: %s\n",buffer);
itoa (i,buffer,2);
printf ("binary: %s\n",buffer);
return 0;
}
You only give the number and the base, but parameter 2 needs a pointer to char already allocated. Use a buffer or try NULL, so the function will return the result.
THe solution seemed to be simple, the return buf[i+1] just returned one character so what I did is make it return an array:
new _s#T[4096];
#define sprintf(%1) (format(_s#T, SPRINTF_MAX_STRING, %1), _s#T)
main(){
new num = atoi("ABCDEFG",36);
printf("%d",num);
printf("%s",itoa(num,36));
return 1;
}
stock itoa(val, base)
{
new buf[1024] = {0,...};
new LETTERZ[37] = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z',0};
for(new pos = 0; val;++pos,val = floatround(val/base,floatround_floor))
strins(buf,sprintf("%c",LETTERZ[val % base]),0);
return buf;
}
stock atoi(val[], base)
{
new CURRNUM = 0;
new len = strlen(val);
new LETTERZ[37] = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z',0};
for(new i = 0; i < len; ++i)
{
for(new x = 0; x < base; ++x)
{
new y = (len-i)-1;
if(val[y] == LETTERZ[x])
{
CURRNUM += x*floatround(floatpower(base,i));
}
}
}
return CURRNUM;
}
Related
I have a long string of about 50,000,000 long... , and I am substituting it part by part
cat FILE | tail -n+2 | awk -v k=100 '{
i = 1
while (i<length($0)-k+1) {
x = substr($0, i, k)
if (CONDITION) {
x changed sth
$0 = substr($0,1,i-1) x substr($0,i+k)
}
i += 1
}
gsub(sth,sth,$0)
printf("%s",$0) >> FILE
}'
Are there any ways to replace $0 at position i with x of length k without using this method?
The string is too long and the commands runs extremely slow
sample input:
NNNNNNNNNNggcaaacagaatccagcagcacatcaaaaagcttatccacAGTAATTCATTATATCAAAATGCTCCAggccaggcgtggtggcttatgcc
sample output:
NNNNNNNNNNggcnnncngnnnccngcngcncnncnnnnngcnnnnccncNGNNNNNCNNNNNNNCNNNNNGCNCCNggccnggcgnggnggcnnnngcc
If substring with length k=10 contains >50% of A || a || T || t
(so there are length($0)-k+1 substrings)
substitute A and T with N, a and t with n
The $0 string must maintain it size and sequence (Case sensitive)
EDIT:
I misunderstood the requirement of this problem, and repost the question at here.
Basically:
read a window of characters to two buffers - scratch buffer and output buffer
if in the scratch buffer there are more then some count of characters ATat
then replace all characters ATat in the output buffer buffer to Nn respectively
output one character from the output buffer
flush one character in both buffers
and go to step 1 to repeat reading the characters into buffers
when the end of line is encountered, just flush output buffer and reset it all
A small C program for sure is going to be the fastest:
// The window size
#define N 10
// The percent of the window that has to be equal to one of [AaTt]
#define PERCENT 50
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
// output a string
static void output(char *outme, size_t n) {
fwrite(outme, n, 1, stdout);
}
// is one of [AaTt]
static bool is_one_of_them(char c) {
switch(c) {
case 'A':
case 'a':
case 'T':
case 't':
return true;
}
return false;
}
// Convert one of characters to n/N depending on case
static char convert_them_to_n(char c) {
// switch(c){ case 'T': case 'A': return true; } return false;
// ASCII is assumed
const char m = ~0x1f;
const char w = 'n' & ~m;
return (c & m) | w;
}
static const unsigned threshold = N * PERCENT / 100;
// Store the input in buf
static char buf[N];
// Store the output to-be-outputted in out
static char out[N];
// The current position in buf and out
// The count of readed characters
static size_t pos;
// The count of one of searched characters in buf
static unsigned count_them;
static void buf_reset(void) {
pos = 0;
count_them = 0;
}
static void buf_flush(void) {
output(out, pos);
buf_reset();
}
static void buf_replace_them(void) {
// TODO: this could keep count of characters alrady replaced in out to save CPU
for (size_t i = 0; i < N; ++i) {
if (is_one_of_them(out[i])) {
out[i] = convert_them_to_n(out[i]);
}
}
}
static void buf_flush_one(void) {
assert(pos > 0);
assert(pos == N);
output(out, 1);
count_them -= is_one_of_them(buf[0]);
memmove(buf, buf + 1, pos - 1);
memmove(out, out + 1, pos - 1);
pos--;
}
static void buf_add(char c) {
buf[pos] = out[pos] = c;
pos++;
count_them += is_one_of_them(c);
// if we reached the substring length
if (pos == N) {
// if the count reached the threshold
if (count_them >= threshold) {
// convert the characters to n
buf_replace_them();
}
// flush one character only at a time
buf_flush_one();
}
}
int main() {
int c;
buf_reset();
while ((c = getchar()) != EOF) {
if (c == '\n') {
// If its a newline, just flush what we have buffered
buf_flush();
output("\n", 1);
continue;
}
buf_add(c);
}
buf_flush();
}
Such a C program is easily transferable to for example an awk script, just one need to read one character at a time. Below I split the characters with split, like:
awk -v N=10 -v percent=50 '
BEGIN{ threshold = N * percent / 100; pos=0 }
function is_one_of_them(c) {
return c ~ /^[aAtT]$/;
}
function buf_flush(i) {
for (i = 0; i < pos; ++i) {
printf "%s", out[i]
}
pos = 0
count_them = 0
}
function buf_replace_them(i) {
for (i = 0; i < pos; ++i) {
if (is_one_of_them(out[i])) {
out[i] = out[i] ~ /[AT]/ ? "N" : "n";
}
}
}
function buf_flush_one(i) {
printf "%s", out[0]
count_them -= is_one_of_them(buf[0])
if(0 && debug) {
printf(" count_them %s ", count_them)
for (i = 0; i < pos-1; ++i) {
printf("%s", buf[i+1])
} printf(" ");
for (i = 0; i < pos-1; ++i) {
printf("%s", out[i+1])
}
printf("\n");
}
for (i = 0; i < pos-1; ++i) {
buf[i] = buf[i+1]
out[i] = out[i+1]
}
pos--
}
function buf_add(c) {
buf[pos]=c; out[pos]=c; pos++
count_them += is_one_of_them(c)
if (pos == N) {
if (count_them >= threshold) {
buf_replace_them()
}
buf_flush_one()
}
}
{
split($0, chars, "")
for (idx = 0; idx <= length($0); idx++) {
buf_add(chars[idx])
}
buf_flush();
printf "\n";
}
'
Both programs when run with the input presented in the first line produce the output presented in the second line (note that lone a near the end is not replaced, because there are no 5 charactets ATat in a window of 10 characters from it):
NNNNNNNNNNggcaaacagaatccagcagcacatcaaaaagcttatccacAGTAATTCATTATATCAAAATGCTCCAggccaggcgtggtggcttatgcc
NNNNNNNNNNggcnnncngnnnccngcngcncnncnnnnngcnnnnccncNGNNNNNCNNNNNNNCNNNNNGCNCCNggccaggcgnggnggcnnnngcc
Both solutions were tested on repl.
You need to be careful with how you address this problem. You cannot work on the substituted string. You need to keep track of the original string. Here is a simple example. Assume we have a string consisting of x and y and we want to replace all y with z if there are 8 y in a substring of 10. Imagine your input looks like:
yyyyyyyyxxy
The first substring of 10 reads yyyyyyyyxx and would be translated into zzzzzzzzxx. If you perform the substitution directly into the original string, you get zzzzzzzzxxy. The second substring now reads zzzzzzzxxy, and does not contain 8 times y, while in the original string it does. So according to the solution of the OP, this would lead into inconsistent results, depending on if you start from the front or the back. So a quick solution would be:
awk -v N=10 -v p=50 '
BEGIN { n = N*p/100 }
{ s = $0 }
{ for(i=1;i<=length-N;++i) {
str=substr($0,i,N)
c=gsub(/[AT]/,"N",str) + gsub(/[at]/,"n",str)
if(c >= n) s = substr(s,1,i-1) str substr(s,i+N)
}
}
{ print s }' file
There is ofcourse quite some work you do double here. Imagine you have a string of the form xxyyyyyyyyxx, you would perform 4 concatinations while you only need to do one. So the best idea is to minimalise the work and only check the substrings which end with the respective character:
awk -v N=10 -v p=50 '
BEGIN { n = N*p/100 }
{ s = $0 }
{ i=N; while (match(substr($0,i),/[ATat]/)) {
str=substr($0,i+RSTART-N,N)
c=gsub(/[AT]/,"N",str) + gsub(/[at]/,"n",str)
if(c >= n) { s = substr(s,1,i+RSTART-N-1) str substr(s,i+RSTART)}
i=i+RSTART
}
}
{ print s }' file
To replace $0 at position i with x do:
awk 'BEGIN{i=12345;x="blubber"}
{
printf("%s",substr($0,1,i));
printf("%s",x);
printf("%s",substr($0,i+length(x)));
}'
I don't think there is any faster method.
To replace AGCT with N and agct with n use tr. To replace them only within a range and using awk you should do:
awk 'BEGIN{i=12345;n=123}
{
printf("%s",substr($0,1,i-1));
printf(gsub(/[atgc]/,"n",gsub(/[ATGC]/,"N",substr($0,i,i+n-1))));
printf("%s",substr($0,i+n));
}'
To do more advanced and faster processing you should consider c/c++.
I am trying to write a simple nonogram solver, in a kind of bruteforce way, but I am stuck on a relatively easy task. Let's say I have a row with clues [2,3] that has a length of 10
so the solutions are:
$$-$$$----
$$--$$$---
$$---$$$--
$$----$$$-
$$-----$$$
-$$----$$$
--$$---$$$
---$$--$$$
----$$-$$$
-$$---$$$-
--$$-$$$--
I want to find all the possible solutions for a row
I know that I have to consider each block separately, and each block will have an availible space of n-(sum of remaining blocks length + number of remaining blocks) but I do not know how to progress from here
Well, this question already have a good answer, so think of this one more as an advertisement of python's prowess.
def place(blocks,total):
if not blocks: return ["-"*total]
if blocks[0]>total: return []
starts = total-blocks[0] #starts = 2 means possible starting indexes are [0,1,2]
if len(blocks)==1: #this is special case
return [("-"*i+"$"*blocks[0]+"-"*(starts-i)) for i in range(starts+1)]
ans = []
for i in range(total-blocks[0]): #append current solutions
for sol in place(blocks[1:],starts-i-1): #with all possible other solutiona
ans.append("-"*i+"$"*blocks[0]+"-"+sol)
return ans
To test it:
for i in place([2,3,2],12):
print(i)
Which produces output like:
$$-$$$-$$---
$$-$$$--$$--
$$-$$$---$$-
$$-$$$----$$
$$--$$$-$$--
$$--$$$--$$-
$$--$$$---$$
$$---$$$-$$-
$$---$$$--$$
$$----$$$-$$
-$$-$$$-$$--
-$$-$$$--$$-
-$$-$$$---$$
-$$--$$$-$$-
-$$--$$$--$$
-$$---$$$-$$
--$$-$$$-$$-
--$$-$$$--$$
--$$--$$$-$$
---$$-$$$-$$
This is what i got:
#include <iostream>
#include <vector>
#include <string>
using namespace std;
typedef std::vector<bool> tRow;
void printRow(tRow row){
for (bool i : row){
std::cout << ((i) ? '$' : '-');
}
std::cout << std::endl;
}
int requiredCells(const std::vector<int> nums){
int sum = 0;
for (int i : nums){
sum += (i + 1); // The number + the at-least-one-cell gap at is right
}
return (sum == 0) ? 0 : sum - 1; // The right-most number don't need any gap
}
bool appendRow(tRow init, const std::vector<int> pendingNums, unsigned int rowSize, std::vector<tRow> &comb){
if (pendingNums.size() <= 0){
comb.push_back(init);
return false;
}
int cellsRequired = requiredCells(pendingNums);
if (cellsRequired > rowSize){
return false; // There are no combinations
}
tRow prefix;
int gapSize = 0;
std::vector<int> pNumsAux = pendingNums;
pNumsAux.erase(pNumsAux.begin());
unsigned int space = rowSize;
while ((gapSize + cellsRequired) <= rowSize){
space = rowSize;
space -= gapSize;
prefix.clear();
prefix = init;
for (int i = 0; i < gapSize; ++i){
prefix.push_back(false);
}
for (int i = 0; i < pendingNums[0]; ++i){
prefix.push_back(true);
space--;
}
if (space > 0){
prefix.push_back(false);
space--;
}
appendRow(prefix, pNumsAux, space, comb);
++gapSize;
}
return true;
}
std::vector<tRow> getCombinations(const std::vector<int> row, unsigned int rowSize) {
std::vector<tRow> comb;
tRow init;
appendRow(init, row, rowSize, comb);
return comb;
}
int main(){
std::vector<int> row = { 2, 3 };
auto ret = getCombinations(row, 10);
for (tRow r : ret){
while (r.size() < 10)
r.push_back(false);
printRow(r);
}
return 0;
}
And my output is:
$$-$$$----
$$--$$$---
$$---$$$--
$$----$$$--
$$-----$$$
-$$-$$$----
-$$--$$$--
-$$---$$$-
-$$----$$$-
--$$-$$$--
--$$--$$$-
--$$---$$$
---$$-$$$-
---$$--$$$
----$$-$$$
For sure, this must be absolutely improvable.
Note: i did't test it more than already written case
Hope it works for you
Status bit_flags_set_flag(BIT_FLAGS hBit_flags, int flag_position) {
Bit_Flags* temp = (Bit_Flags*)hBit_flags;
int* nums;
int i;
int old_size;
if (temp->size < flag_position) {
nums = malloc(sizeof(int)*flag_position+1);
if (nums == NULL) {
return FAILURE;
}
for (i = 0; i < temp->size; i++) {
nums[i] = temp->data[i];
}
free(temp->data);
temp->data = nums;
old_size = temp->size;
temp->size = flag_position + 1;
for (i = old_size; i < temp->size; i++) {
temp->data[i] = 0;
}
}
temp->data[flag_position / 32] |= 1 << flag_position % 32;
return SUCCESS;
}
according to the debugger the error is from the free(temp->data) part. however. I only run into the error the second time I go through the function. any ideas what is happening here.
am getting a heap corruption error on visual studio.
I am writing on some assumptions like you are assuming int size is 32 bits and you are trying to set the bit at flag_position in the bitset and you are using 1 int for 1 bit for setting and unsetting bits
Few comments now
temp->data[flag_position / 32] |= 1 << flag_position % 32; now this doesn't make any sense, this line role is to set bit at flag_position, this should be temp->data[flag_position] = 1; instead because if you see your code your are using ints for each bit.
Also this line temp->size = flag_position + 1; is also incorrect , this should be temp->size = flag_position;
I have a large list of digit strings like this one. The individual strings are relatively short (say less than 50 digits).
data = [
'300303334',
'53210234',
'123456789',
'5374576807063874'
]
I need to find out a efficient data structure (speed first, memory second) and algorithm which returns only those strings that are composed of a given set of digits.
Example results:
filter(data, [0,3,4]) = ['300303334']
filter(data, [0,1,2,3,4,5]) = ['300303334', '53210234']
The data list will usually fit into memory.
For each digit, precompute a postings list that don't contain the digit.
postings = [[] for _ in xrange(10)]
for i, d in enumerate(data):
for j in xrange(10):
digit = str(j)
if digit not in d:
postings[j].append(i)
Now, to find all strings that contain, for example, just the digits [1, 3, 5] you can merge the postings lists for the other digits (ie: 0, 2, 4, 6, 7, 8, 9).
def intersect_postings(p0, p1):
i0, i1 = next(p0), next(p1)
while True:
if i0 == i1:
yield i0
i0, i1 = next(p0), next(p1)
elif i0 < i1: i0 = next(p0)
else: i1 = next(p1)
def find_all(digits):
p = None
for d in xrange(10):
if d not in digits:
if p is None: p = iter(postings[d])
else: p = intersect_postings(p, iter(postings[d]))
return (data[i] for i in p) if p else iter(data)
print list(find_all([0, 3, 4]))
print list(find_all([0, 1, 2, 3, 4, 5]))
A string can be encoded by a 10-bit number. There are 2^10, or 1,024 possible values.
So create a dictionary that uses an integer for a key and a list of strings for the value.
Calculate the value for each string and add that string to the list of strings for that value.
General idea:
Dictionary Lookup;
for each (string in list)
value = 0;
for each character in string
set bit N in value, where N is the character (0-9)
Lookup[value] += string // adds string to list for this value in dictionary
Then, to get a list of the strings that match your criteria, just compute the value and do a direct dictionary lookup.
So if the user asks for strings that contain only 3, 5, and 7:
value = (1 << 3) || (1 << 5) || (1 << 7);
list = Lookup[value];
Note that, as Matt pointed out in comment below, this will only return strings that contain all three digits. So, for example, it wouldn't return 37. That seems like a fatal flaw to me.
Edit
If the number of symbols you have to deal with is very large, then the number of possible combinations becomes too large for this solution to be practical.
With a large number of symbols, I'd recommend an inverted index as suggested in the comments, combined with a secondary filter that removes the strings that contain extraneous digits.
Consider a function f which constructs a bitmask for each string with bit i set if digit i is in the string.
For example,
f('0') = 0b0000000001
f('00') = 0b0000000001
f('1') = 0b0000000010
f('1100') = 0b0000000011
Then I suggest storing a list of strings for each bitmask.
For example,
Bitmask 0b0000000001 -> ['0','00']
Once you have prepared this data structure (which is the same size as your original list), you can then easily access all the strings for a particular filter by accessing all lists where the bitmask is a subset of the digits in your filter.
So for your example of filter [0,3,4] you would return the lists from:
Strings containing just 0
Strings containing just 3
Strings containing just 4
Strings containing 0 and 3
Strings containing 0 and 4
Strings containing 3 and 4
Strings containing 0 and 3 and 4
Example Python Code
from collections import defaultdict
import itertools
raw_data = [
'300303334',
'53210234',
'123456789',
'5374576807063874'
]
def preprocess(raw_data):
data = defaultdict(list)
for s in raw_data:
bitmask = 0
for digit in s:
bitmask |= 1<<int(digit)
data[bitmask].append(s)
return data
def filter(data,mask):
for r in range(len(mask)):
for m in itertools.combinations(mask,r+1):
bitmask = sum(1<<digit for digit in m)
for s in data[bitmask]:
yield s
data = preprocess(raw_data)
for a in filter(data, [0,1,2,3,4,5]):
print a
Just for kicks, I have coded up Jim's lovely algorithm and the Perl is here if anyone wants to play with it. Please do not accept this as an answer or anything, pass all credit to Jim:
#!/usr/bin/perl
use strict;
use warnings;
my $Debug=1;
my $Nwords=1000;
my ($word,$N,$value,$i,$j,$k);
my (#dictionary,%Lookup);
################################################################################
# Generate "words" with random number of characters 5-30
################################################################################
print "DEBUG: Generating $Nwords word dictionary\n" if $Debug;
for($i=0;$i<$Nwords;$i++){
$j = rand(25) + 5; # length of this word
$word="";
for($k=0;$k<$j;$k++){
$word = $word . int(rand(10));
}
$dictionary[$i]=$word;
print "$word\n" if $Debug;
}
# Add some obvious test cases
$dictionary[++$i]="0" x 50;
$dictionary[++$i]="1" x 50;
$dictionary[++$i]="2" x 50;
$dictionary[++$i]="3" x 50;
$dictionary[++$i]="4" x 50;
$dictionary[++$i]="5" x 50;
$dictionary[++$i]="6" x 50;
$dictionary[++$i]="7" x 50;
$dictionary[++$i]="8" x 50;
$dictionary[++$i]="9" x 50;
$dictionary[++$i]="0123456789";
################################################################################
# Encode words
################################################################################
for $word (#dictionary){
$value=0;
for($i=0;$i<length($word);$i++){
$N=substr($word,$i,1);
$value |= 1 << $N;
}
push(#{$Lookup{$value}},$word);
print "DEBUG: $word encoded as $value\n" if $Debug;
}
################################################################################
# Do lookups
################################################################################
while(1){
print "Enter permitted digits, separated with commas: ";
my $line=<STDIN>;
my #digits=split(",",$line);
$value=0;
for my $d (#digits){
$value |= 1<<$d;
}
print "Value: $value\n";
print join(", ",#{$Lookup{$value}}),"\n\n" if defined $Lookup{$value};
}
I like Jim Mischel's approach. It has pretty efficient look up and bounded memory usage. Code in C follows:
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <readline/readline.h>
#include <readline/history.h>
enum {
zero = '0',
nine = '9',
numbers = nine - zero + 1,
masks = 1 << numbers,
};
typedef uint16_t mask;
struct list {
char *s;
struct list *next;
};
typedef struct list list_cell;
typedef struct list *list;
static inline int is_digit(char c) { return c >= zero && c <= nine; }
static inline mask char2mask(char c) { return 1 << (c - zero); }
static inline mask add_char2mask(mask m, char c) {
return m | (is_digit(c) ? char2mask(c) : 0);
}
static inline int is_set(mask m, mask n) { return (m & n) != 0; }
static inline int is_set_char(mask m, char c) { return is_set(m, char2mask(c)); }
static inline int is_submask(mask sub, mask m) { return (sub & m) == sub; }
static inline char *sprint_mask(char buf[11], mask m) {
char *s = buf;
char i;
for(i = zero; i <= nine; i++)
if(is_set_char(m, i)) *s++ = i;
*s = 0;
return buf;
}
static inline mask get_mask(char *s) {
mask m=0;
for(; *s; s++)
m = add_char2mask(m, *s);
return m;
}
static inline int is_empty(list l) { return !l; }
static inline list insert(list *l, char *s) {
list cell = (list)malloc(sizeof(list_cell));
cell->s = s;
cell->next = *l;
return *l = cell;
}
static void *foreach(void *f(char *, void *), list l, void *init) {
for(; !is_empty(l); l = l->next)
init = f(l->s, init);
return init;
}
struct printer_state {
int first;
FILE *f;
};
static void *prin_list_member(char *s, void *data) {
struct printer_state *st = (struct printer_state *)data;
if(st->first) {
fputs(", ", st->f);
} else
st->first = 1;
fputs(s, st->f);
return data;
}
static void print_list(list l) {
struct printer_state st = {.first = 0, .f = stdout};
foreach(prin_list_member, l, (void *)&st);
putchar('\n');
}
static list *init_lu(void) { return (list *)calloc(sizeof(list), masks); }
static list *insert2lu(list lu[masks], char *s) {
mask i, m = get_mask(s);
if(m) // skip string without any number
for(i = m; i < masks; i++)
if(is_submask(m, i))
insert(lu+i, s);
return lu;
}
int usage(const char *name) {
fprintf(stderr, "Usage: %s filename\n", name);
return EXIT_FAILURE;
}
#define handle_error(msg) \
do { perror(msg); exit(EXIT_FAILURE); } while (0)
static inline void chomp(char *s) { if( (s = strchr(s, '\n')) ) *s = '\0'; }
list *load_file(FILE *f) {
char *line = NULL;
size_t len = 0;
ssize_t read;
list *lu = init_lu();
for(; (read = getline(&line, &len, f)) != -1; line = NULL) {
chomp(line);
insert2lu(lu, line);
}
return lu;
}
void read_reqs(list *lu) {
char *line;
char buf[11];
for(; (line = readline("> ")); free(line))
if(*line) {
add_history(line);
mask m = get_mask(line);
printf("mask: %s\nstrings: ", sprint_mask(buf, m));
print_list(lu[m]);
};
putchar('\n');
}
int main(int argc, const char* argv[] ) {
const char *name = argv[0];
FILE *f;
list *lu;
if(argc != 2) return usage(name);
f = fopen(argv[1], "r");
if(!f) handle_error("open");
lu = load_file(f);
fclose(f);
read_reqs(lu);
return EXIT_SUCCESS;
}
To compile use
gcc -lreadline -o digitfilter digitfilter.c
And test run:
$ cat data.txt
300303334
53210234
123456789
5374576807063874
$ ./digitfilter data.txt
> 034
mask: 034
strings: 300303334
> 0,1,2,3,4,5
mask: 012345
strings: 53210234, 300303334
> 0345678
mask: 0345678
strings: 5374576807063874, 300303334
Put each value into a set-- Eg.: '300303334'={3, 0, 4}.
Since the length of your data items are bound by a constant (50),
you can do these at O(1) time for each item using Java HashSet. The overall complexity of this phase adds up to O(n).
For each filter set, use containsAll() of HashSet to see whether
each of these data items is a subset of your filter. Takes O(n).
Takes O(m*n) in the overall where n is the number of data items and m the number of filters.
I just saw that this could technically work, the only mistake I couldn´t resolve was the last ASCII character that gets printed everytime I test it out, I also tested this out without using the name variable, I mean just making a substraction of 32 to any lower case letter in ASCII should give me their upper case one and it does, but I´m curious on why I´m getting an additional char, wich from what I see in screen is apparently Û.
#include <stdio.h>
main()
{
char name[22];
int i;
fputs("Type your name ",stdout);
fgets(name,22,stdin);
for (i = 0; name[i] != '\0'; i = i + 1)
printf("%c",(name[i])-32); /*This will convert lower case to upper */
/* using as reference the ASCII table*/
fflush(stdin);
getchar();
}
Perhaps there is a line break character at the end of the string.
You can check the chararacter code, so that you only convert characters that actually are lower case letters:
for (i = 0; name[i] != '\0'; i = i + 1) {
char c = name[i];
if (c => 97 && c <= 122) {
c -= 32;
}
printf("%c", c);
}
void read_chararray(char in_array[], int* Length)
{
int Indx = 0, Indx2 = 0, Indx3 = 0; // int declarations for indexs of some loops
char cinput = { 0 }, word[255] = { 0 }, word2[255] = { 0 }; // declaration of cinput and first char array before punctiation removed
for (Indx = 0; (cinput = getchar()) != '\n'; Indx++) { // Loop for getting characters from user stop at <enter>
word[Indx] = cinput; // Placing char into array while changing to lowercase
}
Indx2 = Indx; // Set Indx2 to Indx for loop operation
for (Indx = 0; Indx < Indx2; Indx++) { // Loop to check and replace upper characters with lower
cinput = word[Indx];
if (cinput >= 65 && cinput <= 90) { // If cinput is within the ASCII range 65 and 90, this indicates upper characters
cinput += 32; // Add 32 to cinput to shift to the lower character range within the ASCII table
in_array[Indx] = cinput; // Input new value into array pointer
}
else if (cinput >= 97 && cinput <= 122) // scans if character are lower ASCII, places them in array irraticating punctuation and whitespce
in_array[Indx] = cinput; // Input remaining lower case into array pointer
}
*Length = Indx; // final size of array set to Length variable for future use
}
#include<stdio.h>
void upper(char);
void main()
{
char ch;
printf("\nEnter the character in lower case");
scanf("%c", &ch);
upper(ch);
}
void upper( char c)
{
printf("\nUpper Case: %c", c-32);
}