getline() reads first line multiple times

getline() reads first line multiple times - c++11

Why does getline() reads only the first line multiple times ? I also tried using getline() within the while loop but it gives the same result.Any specific reason why this is happening ?
void closestPair(char inFile[50],char outFile[50])
{
int num1,num2;
int i =0;
string line ="";
stringstream ss;
ifstream fp(inFile);
while(!fp.eof())
{
getline(fp,line);
ss<<line;
ss>>num1>>num2;
A[i].x = num1;
A[i].y = num2;
i++;
printf(" %d %d \n", num1, num2);
}
fp.close();
}
my input file:
1 3
4 6
7 9
8 5
2 5
output:
1 3
1 3
1 3
1 3
1 3

I would suggest changing the scope of ss to see predictable results.
void closestPair(char inFile[50],char outFile[50])
{
int num1,num2;
int i =0;
string line ="";
ifstream fp(inFile);
while( getline(fp, line) )
{
stringstream ss; // Move it inside the loop.
ss<<line;
ss>>num1>>num2;
A[i].x = num1;
A[i].y = num2;
i++;
printf(" %d %d \n", num1, num2);
}
fp.close();
}
Better still, construct a std::ostringstream from line and use it to extract the numbers.
void closestPair(char inFile[50],char outFile[50])
{
int num1,num2;
int i =0;
string line ="";
ifstream fp(inFile);
while( getline(fp, line) )
{
std::ostringstream ss(line);
ss>>num1>>num2;
A[i].x = num1;
A[i].y = num2;
i++;
printf(" %d %d \n", num1, num2);
}
fp.close();
}
See Why is iostream::eof inside a loop condition (i.e. `while (!stream.eof())`) considered wrong? regarding while(!fp.eof()).

Related

Any faster way to replace substring in AWK

I have a long string of about 50,000,000 long... , and I am substituting it part by part
cat FILE | tail -n+2 | awk -v k=100 '{
i = 1
while (i<length($0)-k+1) {
x = substr($0, i, k)
if (CONDITION) {
x changed sth
$0 = substr($0,1,i-1) x substr($0,i+k)
}
i += 1
}
gsub(sth,sth,$0)
printf("%s",$0) >> FILE
}'
Are there any ways to replace $0 at position i with x of length k without using this method?
The string is too long and the commands runs extremely slow
sample input:
NNNNNNNNNNggcaaacagaatccagcagcacatcaaaaagcttatccacAGTAATTCATTATATCAAAATGCTCCAggccaggcgtggtggcttatgcc
sample output:
NNNNNNNNNNggcnnncngnnnccngcngcncnncnnnnngcnnnnccncNGNNNNNCNNNNNNNCNNNNNGCNCCNggccnggcgnggnggcnnnngcc
If substring with length k=10 contains >50% of A || a || T || t
(so there are length($0)-k+1 substrings)
substitute A and T with N, a and t with n
The $0 string must maintain it size and sequence (Case sensitive)
EDIT:
I misunderstood the requirement of this problem, and repost the question at here.

Basically:
read a window of characters to two buffers - scratch buffer and output buffer
if in the scratch buffer there are more then some count of characters ATat
then replace all characters ATat in the output buffer buffer to Nn respectively
output one character from the output buffer
flush one character in both buffers
and go to step 1 to repeat reading the characters into buffers
when the end of line is encountered, just flush output buffer and reset it all
A small C program for sure is going to be the fastest:
// The window size
#define N 10
// The percent of the window that has to be equal to one of [AaTt]
#define PERCENT 50
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
// output a string
static void output(char *outme, size_t n) {
fwrite(outme, n, 1, stdout);
}
// is one of [AaTt]
static bool is_one_of_them(char c) {
switch(c) {
case 'A':
case 'a':
case 'T':
case 't':
return true;
}
return false;
}
// Convert one of characters to n/N depending on case
static char convert_them_to_n(char c) {
// switch(c){ case 'T': case 'A': return true; } return false;
// ASCII is assumed
const char m = ~0x1f;
const char w = 'n' & ~m;
return (c & m) | w;
}
static const unsigned threshold = N * PERCENT / 100;
// Store the input in buf
static char buf[N];
// Store the output to-be-outputted in out
static char out[N];
// The current position in buf and out
// The count of readed characters
static size_t pos;
// The count of one of searched characters in buf
static unsigned count_them;
static void buf_reset(void) {
pos = 0;
count_them = 0;
}
static void buf_flush(void) {
output(out, pos);
buf_reset();
}
static void buf_replace_them(void) {
// TODO: this could keep count of characters alrady replaced in out to save CPU
for (size_t i = 0; i < N; ++i) {
if (is_one_of_them(out[i])) {
out[i] = convert_them_to_n(out[i]);
}
}
}
static void buf_flush_one(void) {
assert(pos > 0);
assert(pos == N);
output(out, 1);
count_them -= is_one_of_them(buf[0]);
memmove(buf, buf + 1, pos - 1);
memmove(out, out + 1, pos - 1);
pos--;
}
static void buf_add(char c) {
buf[pos] = out[pos] = c;
pos++;
count_them += is_one_of_them(c);
// if we reached the substring length
if (pos == N) {
// if the count reached the threshold
if (count_them >= threshold) {
// convert the characters to n
buf_replace_them();
}
// flush one character only at a time
buf_flush_one();
}
}
int main() {
int c;
buf_reset();
while ((c = getchar()) != EOF) {
if (c == '\n') {
// If its a newline, just flush what we have buffered
buf_flush();
output("\n", 1);
continue;
}
buf_add(c);
}
buf_flush();
}
Such a C program is easily transferable to for example an awk script, just one need to read one character at a time. Below I split the characters with split, like:
awk -v N=10 -v percent=50 '
BEGIN{ threshold = N * percent / 100; pos=0 }
function is_one_of_them(c) {
return c ~ /^[aAtT]$/;
}
function buf_flush(i) {
for (i = 0; i < pos; ++i) {
printf "%s", out[i]
}
pos = 0
count_them = 0
}
function buf_replace_them(i) {
for (i = 0; i < pos; ++i) {
if (is_one_of_them(out[i])) {
out[i] = out[i] ~ /[AT]/ ? "N" : "n";
}
}
}
function buf_flush_one(i) {
printf "%s", out[0]
count_them -= is_one_of_them(buf[0])
if(0 && debug) {
printf(" count_them %s ", count_them)
for (i = 0; i < pos-1; ++i) {
printf("%s", buf[i+1])
} printf(" ");
for (i = 0; i < pos-1; ++i) {
printf("%s", out[i+1])
}
printf("\n");
}
for (i = 0; i < pos-1; ++i) {
buf[i] = buf[i+1]
out[i] = out[i+1]
}
pos--
}
function buf_add(c) {
buf[pos]=c; out[pos]=c; pos++
count_them += is_one_of_them(c)
if (pos == N) {
if (count_them >= threshold) {
buf_replace_them()
}
buf_flush_one()
}
}
{
split($0, chars, "")
for (idx = 0; idx <= length($0); idx++) {
buf_add(chars[idx])
}
buf_flush();
printf "\n";
}
'
Both programs when run with the input presented in the first line produce the output presented in the second line (note that lone a near the end is not replaced, because there are no 5 charactets ATat in a window of 10 characters from it):
NNNNNNNNNNggcaaacagaatccagcagcacatcaaaaagcttatccacAGTAATTCATTATATCAAAATGCTCCAggccaggcgtggtggcttatgcc
NNNNNNNNNNggcnnncngnnnccngcngcncnncnnnnngcnnnnccncNGNNNNNCNNNNNNNCNNNNNGCNCCNggccaggcgnggnggcnnnngcc
Both solutions were tested on repl.

You need to be careful with how you address this problem. You cannot work on the substituted string. You need to keep track of the original string. Here is a simple example. Assume we have a string consisting of x and y and we want to replace all y with z if there are 8 y in a substring of 10. Imagine your input looks like:
yyyyyyyyxxy
The first substring of 10 reads yyyyyyyyxx and would be translated into zzzzzzzzxx. If you perform the substitution directly into the original string, you get zzzzzzzzxxy. The second substring now reads zzzzzzzxxy, and does not contain 8 times y, while in the original string it does. So according to the solution of the OP, this would lead into inconsistent results, depending on if you start from the front or the back. So a quick solution would be:
awk -v N=10 -v p=50 '
BEGIN { n = N*p/100 }
{ s = $0 }
{ for(i=1;i<=length-N;++i) {
str=substr($0,i,N)
c=gsub(/[AT]/,"N",str) + gsub(/[at]/,"n",str)
if(c >= n) s = substr(s,1,i-1) str substr(s,i+N)
}
}
{ print s }' file
There is ofcourse quite some work you do double here. Imagine you have a string of the form xxyyyyyyyyxx, you would perform 4 concatinations while you only need to do one. So the best idea is to minimalise the work and only check the substrings which end with the respective character:
awk -v N=10 -v p=50 '
BEGIN { n = N*p/100 }
{ s = $0 }
{ i=N; while (match(substr($0,i),/[ATat]/)) {
str=substr($0,i+RSTART-N,N)
c=gsub(/[AT]/,"N",str) + gsub(/[at]/,"n",str)
if(c >= n) { s = substr(s,1,i+RSTART-N-1) str substr(s,i+RSTART)}
i=i+RSTART
}
}
{ print s }' file

To replace $0 at position i with x do:
awk 'BEGIN{i=12345;x="blubber"}
{
printf("%s",substr($0,1,i));
printf("%s",x);
printf("%s",substr($0,i+length(x)));
}'
I don't think there is any faster method.
To replace AGCT with N and agct with n use tr. To replace them only within a range and using awk you should do:
awk 'BEGIN{i=12345;n=123}
{
printf("%s",substr($0,1,i-1));
printf(gsub(/[atgc]/,"n",gsub(/[ATGC]/,"N",substr($0,i,i+n-1))));
printf("%s",substr($0,i+n));
}'
To do more advanced and faster processing you should consider c/c++.

CUDA string search in large file, wrong result

I am working on simple naive string search in CUDA.
I am new in CUDA. It works fine fol smaller files ( aprox. ~1MB ). After I make these files bigger ( ctrl+a ctrl+c several times in notepad++ ), my program's results are higher ( about +1% ) than a
grep -o text file_name | wc -l
It is very simple function, so I don't know what could cause this. I need it to work with larger files ( ~500MB ).
Kernel code ( gpuCount is a __device__ int global variable ):
__global__ void stringSearchGpu(char *data, int dataLength, char *input, int inputLength){
int id = blockDim.x*blockIdx.x + threadIdx.x;
if (id < dataLength)
{
int fMatch = 1;
for (int j = 0; j < inputLength; j++)
{
if (data[id + j] != input[j]) fMatch = 0;
}
if (fMatch)
{
atomicAdd(&gpuCount, 1);
}
}
}
This is calling the kernel in main function:
int blocks = 1, threads = fileSize;
if (fileSize > 1024)
{
blocks = (fileSize / 1024) + 1;
threads = 1024;
}
clock_t cpu_start = clock();
// kernel call
stringSearchGpu<<<blocks, threads>>>(cudaBuffer, strlen(buffer), cudaInput, strlen(input));
cudaDeviceSynchronize();
After this I just copy the result to Host and print it.
Can anyone please help me with this?

First of all, you should always check return values of CUDA functions to check for errors. Best way to do so would be the following:
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
Wrap your CUDA calls, such as:
gpuErrchk(cudaDeviceSynchronize());
Second, your kernel accesses out of bounds memory. Suppose, dataLength=100, inputLength=7 and id=98. In your kernel code:
if (id < dataLength) // 98 is less than 100, so condition true
{
int fMatch = 1;
for (int j = 0; j < inputLength; j++) // j runs from [0 - 6]
{
// if j>1 then id+j>=100, which is out of bounds, illegal operation
if (data[id + j] != input[j]) fMatch = 0;
}
Change the condition to something like:
if (id < dataLength - inputLength)

finding shortest path of all nodes from a given node using BFS

when i increase or decrease INF value the Output Behaves Unexpectedly..
I think INF should not have any effect on the output..
length of each edge is 6
for input
1
4 2
1 2
1 3
1
the output is 6 6 -1
when I change INF to 1e8 the output is 0 0 0
#include<iostream>
#include<vector>
#include<queue>
#include<cstring>
using namespace std;
#define MAX 2000
#define INF 1000000
vector<int> adj[MAX];
int d[MAX];
bool visited[MAX];
void initialise(){
for(int i=0;i<=MAX;i++){
visited[i]=false;
}
}
void shortestPathBfs(int start){
queue<int> q;
q.push(start);
visited[start]=true;
d[start]=0;
while(!q.empty()){
int p=q.front();
q.pop();
for(int i=0;i<adj[p].size();i++){
int v=adj[p][i];
if(!visited[v] && d[v]>d[p]+6){
d[v]=d[p]+6;
q.push(v);
visited[v]=true;
}
}
}
}
int main(){
int T,N,M,S,x,y;
cin>>T;
while(T--){
cin>>N>>M;
for(int i=0;i<M;i++){
cin>>x>>y;
adj[x].push_back(y);
adj[y].push_back(x);
}
cin>>S;
initialise();
memset(d,INF,sizeof(d));
shortestPathBfs(S);
for(int i = 1; i <=N; i++) {
if(i == S)
continue;
if(d[i] >= INF)
cout<<"-1"<<" ";
else
cout<<d[i]<<" ";
}
}
}

The problem is with
memset(d,INF,sizeof(d));
memset() only fills memory with a byte value. Here it will wind up filling the array with the least significant byte of the int value INF. To fix it, create an explicit for loop or use std::fill() instead.

How to print numbers in a spiral order?

Thank you ,
i am trying to solve a project euler problem it wants me to print the sum of
21 22 23 24 25
20 7 8 9 10
19 6 1 2 11
18 5 4 3 12
17 16 15 14 13
this is formed by starting with the number 1 and moving to the right in a clockwise direction for a 5 by 5 matrix but i am in trouble writing a code for the spiral matrix !!

It is Highly recommended to do project Euler problems on your own and ask for help if you are really stuck
here is how i will write a code in c to print a spiral as suggested in the question
#include<stdio.h>
main()
{
int i,j,nq=9;//nq is a odd number which represents the order of the matrix
int lim=(int)nq/2,cnt=2;
int a[nq][nq];
for(i=0;i<nq;i++){
for(j=0;j<nq;j++)
a[i][j]=0;
}
a[lim][lim]=1;
a[lim][lim+1]=2;
int i1=lim,j1=lim+1;i=lim,j=lim;
while(1){
if(cnt>(nq*nq))
break;
cnt++;
if(i==i1)
{ j=j1;
if(i<=lim)
{
i=i1;
if(a[i1+1][j1]==0)
a[++i1][j]=cnt;
else
a[i1][++j1]=cnt;
}
else
{ i=i1;
if(a[i1-1][j1]==0)
a[--i1][j1]=cnt;
else
a[i1][--j1]=cnt;
}
}
else
{ i=i1;
if(j<lim)
{
j=j1;
if(a[i1][j+1]==0)
a[i1][++j1]=cnt;
else
a[--i1][j1]=cnt;
}
else
{ j=j1;
if(a[i1][j1-1]==0)
a[i1][--j1]=cnt;
else
a[++i1][j1]=cnt;
}
}
}
for(i=0;i<nq;i++){
for(j=0;j<nq;j++)
printf(" %d ",a[i][j]);
printf("\n");
}
}
I Googled your question http://projecteuler.net/problem=28 this can also be solved by taking advantage of its mathematical nature note that
Top right corner is n^2
and other corners can be shown to be n^2-2n+2 ,n^2-n+1, and n^2-3n+3. you just need to sum those corners which comes to be
= 4*n^2 - 6*n + 6
hence the final answer can be calculated by iterating over every second number from 1001 to 3
long int sum(int n){
long int sum=1;
while(n>1){
sum=sum+4*n*n-6*n+6;
n=n-2;
}
return sum;
}

I dont know whether you actually want to print the spiral but see below for my solution for #28 written in Python 2.7.
l = [1]
def corners(step,l):
counter = 0
while counter < 4:
l.append(max(l)+step)
counter +=1
return l
step = 2
while step < 1001:
l = corners(step, l)
step += 2
print sum(l)

void printSpiral(int A[3][5],int m, int n)
{
int T=0; int B=m-1; int L=0; int R=n-1;
int dir=0;
int i =0; int j=0; int k=0; int l=0;
while(T<=B && L<=R)
{
//printf("dir %d ",dir);
if(dir == 0)
{
for( i=L;i<=R;i++)
{
printf("%d ",A[T][i]);
//printf("\n");
}
T++;
dir=1;
}
else if(dir == 1)
{
// printf("%d R ",R);
for( j=T;j<= B;j++)
{
printf("%d ",A[j][R]);
//printf("\n");
//printf("dir1");
}
dir=2;
R--;
}
else if(dir == 2)
{
for(k=R;k>= L;k--)
{
printf("%d ",A[B][k]);
}
dir=3;
B--;
}
else if(dir == 3)
{
for( l=B;l>= T;l--)
{
printf("%d ",A[l][L]);
}
L++;
dir=0;
}
}
}

using sscanf(), read string to array of int?

i have this string:
12 4 the quick 99 -1 fox dog \
what i want in my program:
myArray[] = {12, 4, 99, -1};
how i do a multiple number scanning?

See my answer to your other question here. It's a relatively simple matter to replace the strtok section to recognize non-numeric words and neither increment the count (in the first pass) nor load them into the array (in the second pass).
The code has changed as follows:
Using an input file of:
12 3 45 6 7 8
3 5 6 7
7 0 -1 4 5
12 4 the quick 99 -1 fox dog \
it produces output along the lines of:
0x8e42170, size = 6:
12 3 45 6 7 8
0x8e421d0, size = 4:
3 5 6 7
0x8e421e0, size = 5:
7 0 -1 4 5
0x8e42278, size = 4:
12 4 99 -1
Here's the code that produced that output:
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
// This is the linked list of integer arrays.
typedef struct _tIntArray {
int size;
int *array;
struct _tIntArray *next;
} tIntArray;
static tIntArray *first = NULL;
static tIntArray *last = NULL;
// Check that argument is numeric, optional minus sign followed by
// zero or more digits (you may want one or more).
static int isAllNumeric (char *word) {
char *s = word;
if (*s == '-')
s++;
for (; *s != '\0'; s++)
if ((*s < '0') || (*s > '9'))
return 0;
return 1;
}
// Add a line of integers as a node.
static int addNode (char *str) {
tIntArray *curr; // pointers for new integer array.
char *word; // word within string.
char *tmpStr; // temp copy of buffer.
int fldCnt; // field count for line.
int i;
// Count number of fields.
if ((tmpStr = strdup (str)) == NULL) {
printf ("Cannot allocate duplicate string (%d).\n", errno);
return 1;
}
fldCnt = 0;
for (word = strtok (tmpStr, " "); word; word = strtok (NULL, " "))
if (isAllNumeric (word))
fldCnt++;
free (tmpStr);
// Create new linked list node.
if ((curr = malloc (sizeof (tIntArray))) == NULL) {
printf ("Cannot allocate integer array node (%d).\n", errno);
return 1;
}
curr->size = fldCnt;
if ((curr->array = malloc (fldCnt * sizeof (int))) == NULL) {
printf ("Cannot allocate integer array (%d).\n", errno);
free (curr);
return 1;
}
curr->next = NULL;
for (i = 0, word = strtok (str, " "); word; word = strtok (NULL, " "))
if (isAllNumeric (word))
curr->array[i++] = atoi (word);
if (last == NULL)
first = last = curr;
else {
last->next = curr;
last = curr;
}
return 0;
}
int main(void) {
int lineSz; // current line size.
char *buff; // buffer to hold line.
FILE *fin; // input file handle.
long offset; // offset for re-allocating line buffer.
tIntArray *curr; // pointers for new integer array.
int i;
// Open file.
if ((fin = fopen ("qq.in", "r")) == NULL) {
printf ("Cannot open qq.in, errno = %d\n", errno);
return 1;
}
// Allocate initial line.
lineSz = 2;
if ((buff = malloc (lineSz+1)) == NULL) {
printf ("Cannot allocate initial memory, errno = %d.\n", errno);
return 1;
}
// Loop forever.
while (1) {
// Save offset in case we need to re-read.
offset = ftell (fin);
// Get line, exit if end of file.
if (fgets (buff, lineSz, fin) == NULL)
break;
// If no newline, assume buffer wasn't big enough.
if (buff[strlen(buff)-1] != '\n') {
// Get bigger buffer and seek back to line start and retry.
free (buff);
lineSz += 3;
if ((buff = malloc (lineSz+1)) == NULL) {
printf ("Cannot allocate extra memory, errno = %d.\n", errno);
return 1;
}
if (fseek (fin, offset, SEEK_SET) != 0) {
printf ("Cannot seek, errno = %d.\n", errno);
return 1;
}
continue;
}
// Remove newline and process.
buff[strlen(buff)-1] = '\0';
if (addNode (buff) != 0)
return 1;
}
// Dump table for debugging.
for (curr = first; curr != NULL; curr = curr->next) {
printf ("%p, size = %d:\n ", curr, curr->size);
for (i = 0; i < curr->size; i++)
printf (" %d", curr->array[i]);
printf ("\n");
}
// Free resources and exit.
free (buff);
fclose (fin);
return 0;
}

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

getline() reads first line multiple times - c++11

Related

Any faster way to replace substring in AWK

CUDA string search in large file, wrong result

finding shortest path of all nodes from a given node using BFS

How to print numbers in a spiral order?

using sscanf(), read string to array of int?

Categories

Resources