I am converting a CSV file into a table format, and I wrote an AWK script and saved it as my.awk. Here is the my script:
#AWK for test
awk -F , '
BEGIN {
aa = 0;
}
{
hdng = "fname,lname,salary,city";
l1 = length($1);
l13 = length($13);
if ((l1 > 2) && (l13 == 0)) {
fname = substr($1, 2, 1);
l1 = length($3) - 4;
lname = substr($3, l1, 4);
processor = substr($1, 2);
#printf("%s,%s,%s,%s\n", fname, lname, salary, $0);
}
if ($0 ~ ",,,,")
aa++
else if ($0 ~ ",fname")
printf("%s\n", hdng);
else if ((l1 > 2) && (l13 == 0)) {
a++;
}
else {
perf = $11;
if (perf ~/^[0-9\.\" ]+$/)
type = "num"
else
type = "char";
if (type == "num")
printf("Mr%s,%s,%s,%s,,N,N,,\n", $0,fname,lname, city);
}
}
END {
} ' < life.csv > life_out.csv*
How can I run this script on a Unix server? I tried to run this my.awk file by using this command:
awk -f my.awk life.csv
The file you give is a shell script, not an awk program. So, try sh my.awk.
If you want to use awk -f my.awk life.csv > life_out.cs, then remove awk -F , ' and the last line from the file and add FS="," in BEGIN.
If you put #!/bin/awk -f on the first line of your AWK script it is easier. Plus editors like Vim and ... will recognize the file as an AWK script and you can colorize. :)
#!/bin/awk -f
BEGIN {} # Begin section
{} # Loop section
END{} # End section
Change the file to be executable by running:
chmod ugo+x ./awk-script
and you can then call your AWK script like this:
`$ echo "something" | ./awk-script`
Put the part from BEGIN....END{} inside a file and name it like my.awk.
And then execute it like below:
awk -f my.awk life.csv >output.txt
Also I see a field separator as ,. You can add that in the begin block of the .awk file as FS=","
Related
I have the following in a text file called data.txt
&st=1000&type=rec&uniId=5800000000&acceptCode=1000&drainNel=supp&
&st=1100&type=rec&uniId=5800000000&acceptCode=1000&drainNel=supp&
&st=4100&type=rec&uniId=6500000000&acceptCode=4100&drainNel=ured&
&st=4200&type=rec&uniId=6500000000&acceptCode=4100&drainNel=iris&
&st=4300&type=rec&uniId=6500000000&acceptCode=4100&drainNel=iris&
&st=8300&type=rec&uniId=7700000000&acceptCode=8300&drainNel=teef&
1) Script will take an input argument in the form of a number, e.g: 979035210000000098
2) I want to replace all the text value for uniId=xxxxxxxxxx with the given long number passed in the argument to script. IMPORTANT: if uniID is same, it will replace same value for all of them. (In this case, first two lines are same, then next three lines are same, then last one is same) For the next batch, it will replace + increment (5,000,000,000) from last one
Ignore all other fields and they should not be modified.
So essentially doing this:
./script.sh 979035210000000098
.. still confused? Well, the final result could be this:
&st=1000&type=rec&uniId=979035210000000098&acceptCode=1000&drainNel=supp&
&st=1100&type=rec&uniId=979035210000000098&acceptCode=1000&drainNel=supp&
&st=4100&type=rec&uniId=979035215000000098&acceptCode=4100&drainNel=ured&
&st=4200&type=rec&uniId=979035215000000098&acceptCode=4100&drainNel=iris&
&st=4300&type=rec&uniId=979035215000000098&acceptCode=4100&drainNel=iris&
&st=8300&type=rec&uniId=979035220000000098&acceptCode=8300&drainNel=teef&
This ^ should be REPLACED and applied to tempfile datanew.txt - not just print on screen.
An AWK script exists which does replacement for &st=xxx and for &acceptCode=xxx but perhaps I can reuse, not able to get it working as I expect?
# $./script.sh [STARTCOUNT] < data.txt > datanew.txt
# $ mv -f datanew.txt data.txt
awk -F '&' -v "cnt=${1:-10000}" -v 'OFS=&' \
'NR == 1 { ac = cnt; uni = $4; }
NR > 1 && $4 == uni { cnt += 100 }
$4 != uni { cnt += 5000000000; ac = cnt; uni = $4 }
{ $2 = "st=" cnt; $5 = "acceptCode=" ac; print }'
Using gnu awk you may use this:
awk -M -i inplace -v num=979035210000000098 'BEGIN{FS=OFS="&"}
!seen[$4]++{p = (NR>1 ? p+5000000000 : num)} {$4="uniId=" p} 1' file
&st=1000&type=rec&uniId=979035210000000098&acceptCode=1000&drainNel=supp&
&st=1100&type=rec&uniId=979035210000000098&acceptCode=1000&drainNel=supp&
&st=4100&type=rec&uniId=979035215000000098&acceptCode=4100&drainNel=ured&
&st=4200&type=rec&uniId=979035215000000098&acceptCode=4100&drainNel=iris&
&st=4300&type=rec&uniId=979035215000000098&acceptCode=4100&drainNel=iris&
&st=8300&type=rec&uniId=979035220000000098&acceptCode=8300&drainNel=teef&
Options -M or --bignum forces arbitrary precision arithmetic on numbers in gnu awk.
Suppose i have a very file which i created from two files one is old & another is the updated file by using cat & sort on the primary key.
File1
102310863||7097881||6845193||271640||06007709532577||||
102310863||7097881||6845123||271640||06007709532577||||
102310875||7092992||6840808||023740||10034500635650||||
102310875||7092992||6840818||023740||10034500635650||||
So pattern of this file is line 1 = old value & line 2 = updated value & so on..
now I want to process the file in such a way that awk first process the first two lines of the file & find out the difference & then move on two the next two lines.
now the process is
if($[old record]!=$[new record])
i= [new record]#[old record];
Desired output
102310863||7097881||6845123#6845193||271640||06007709532577||||
102310875||7092992||6840818#6840808||023740||10034500635650||||
$ cat tst.awk
BEGIN { FS="[|][|]"; OFS="||" }
NR%2 { split($0,old); next }
{
for (i=1;i<=NF;i++) {
if (old[i] != $i) {
$i = $i "#" old[i]
}
}
print
}
$
$ awk -f tst.awk file
102310863||7097881||6845123#6845193||271640||06007709532577||||
102310875||7092992||6840818#6840808||023740||10034500635650||||
This awk could help:
$ awk -F '\\|\\|' '{
getline new;
split(new, new_array, "\\|\\|");
for(i=1;i<=NF;i++) {
if($i != new_array[i]) {
$i = new_array[i]"#"$i;
}
}
} 1' OFS="||" < input_file
102310863||7097881||6845123#6845193||271640||06007709532577||||
102310875||7092992||6840818#6840808||023740||10034500635650||||
I think, you are good enough in awk to understand above code. Skipping the explanation.
Updated version, and thanks #martin for the double | trick:
$ cat join.awk
BEGIN {new=0; FS="[|]{2}"; OFS="||"}
new==0 {
split($0, old_data, "[|]{2}")
new=1
next
}
new==1 {
split($0, new_data, "[|]{2}")
for (i = 1; i <= 7; i++) {
if (new_data[i] != old_data[i]) new_data[i] = new_data[i] "#" old_data[i]
}
print new_data[1], new_data[2], new_data[3], new_data[4], new_data[5], new_data[6], new_data[7]
new = 0
}
$ awk -f join.awk data.txt
102310863||7097881||6845123#6845193||271640||06007709532577||||
102310875||7092992||6840818#6840808||023740||10034500635650||||
I have the following text line :
"Field1":"Data1","Field2":"Data2","Field3":"Data3","Field4":"Data4" ...
And I need to generate the following INSERT statement :
INSERT INTO data (Field1,Field2,Field3,Field4 ... ) VALUES(Data1,Data2,Data3,Data4 ... );
Any ideas on how to do it in BASH ?
Thanks in advance!
$ cat file
"Field1":"Data1","Field2":"Data2","Field3":"Data3","Field4":"Data4"
$
$ cat tst.awk
BEGIN { FS="^\"|\"[:,]\"|\"$" }
{
fields = values = ""
for (i=2; i<NF; i+=2) {
fields = fields (i>2 ? "," : "") $i
values = values (i>2 ? "," : "") $(i+1)
}
printf "INSERT INTO data (%s) VALUES(%s);\n", fields, values
}
$
$ awk -f tst.awk file
INSERT INTO data (Field1,Field2,Field3,Field4) VALUES(Data1,Data2,Data3,Data4);
You could try this awk command:
$ cat file
"Field1":"Data1","Field2":"Data2","Field3":"Data3","Field4":"Data4"
$ awk -F'[:"]+' '{s=(NR>1?",":""); fields=fields s $2;data=data s $3}END{printf "INSTERT INTO data(%s) VALUES(%s)\n", fields,data}' RS="," file
INSTERT INTO data(Field1,Field2,Field3,Field4) VALUES(Data1,Data2,Data3,Data4)
Or a bit more readable
#!/usr/bin/awk -f
BEGIN {
FS ="[:\"]+";
RS=",";
}
{
s=(NR>1?",":"")
fields=fields s $2
data=data s $3
}
END{
printf "INSTERT INTO data(%s) VALUES(%s)\n", fields,data
}
Save it in a file named script.awk, and run it like:
./script.awk file
Since you specifically asked for a BASH solution (rather than awk, perl, or python):
data='"Field1":"Data1","Field2":"Data2","Field3":"Data3","Field4":"Data4"'
data=${data//,/$'\n'} # replace comma with new-lines
data=${data//\"/} # remove the quotes
while IFS=':' read -r field item
do
if [[ -n $fields ]]
then
fields="$fields,$field"
items="$items,$item"
else
fields=$field
items=$item
fi
done < <(echo "$data")
stmt="INSERT INTO data ($fields) VALUES($items);"
echo "$stmt"
sed -n 's/$/) VALUES(/
: next
s/"\([^"]*\)":"\([^"]*\)"\(.*\)) VALUES(\(.*\)/\1\3) VALUES(\4,\2/
t next
s/VALUES(,/VALUES(/
s/.*/INSERT INTO data (&)/
p
' YourFile
Assuming there is no " in data value nor ) VALUES( (could be treated also if needed)
I have a csv file which I'll be using as input with a format looking like this:
xValue,value1-avg,value1-median,value2-avg,value3-avg,value3-median
1,3,4,20,14,20
The key attributes of the input file are that each "value" will have a variable number of statistics, but the statistic type and "value" will always be separated by a "-". I then want to output the statistics of all the "values" to separate csv files.
The output would then look something like this:
value1.csv
xvalue,value1-avg,value1-median
1,3,4
value2.csv
xvalue,value2-avg
1,20
I've tried finding solutions to this, but all I can find are ways to copy by the column number, not the header name. I need to be able to use the header names to append the associated statistics to each of the output csv files.
Any help is greatly appreciated!
P.S. the output file may have already been written to during previous runs of this script, meaning the code should append to the output file
Untested but should be close:
awk -F, '
NR==1 {
for (i=2;i<=NF;i++) {
outfile = $i
sub(/-.*/,".csv",outfile)
outfiles[i] = outfile
}
}
{
delete(outstr)
for (i=2;i<=NF;i++) {
outfile = outfiles[i]
outstr[outfile] = outstr[outfile] FS $i
}
for (outfile in outstr)
print $1 outstr[outfile] >> outfile
}
' inFile.csv
Note that deleting a whole array with delete(outstr) is gawk-specific. With other awks you can use split("",outstr) to get the same effect.
Note that this appends the output you wanted to existing files BUT that means you'll get the header line repeated on every execution. If that's an issue, tell us how to know when to generate the header line or not but the solution I THINK you'll want would look something like this:
awk -F, '
NR==1 {
for (i=2;i<=NF;i++) {
outfile = $i
sub(/-.*/,".csv",outfile)
outfiles[i] = outfile
}
for (outfile in outfiles) {
exists[outfile] = ( ((getline tmp < outfile) > 0) && (tmp != "") )
close(outfile)
}
}
{
delete(outstr)
for (i=2;i<=NF;i++) {
outfile = outfiles[i]
outstr[outfile] = outstr[outfile] FS $i
}
for (outfile in outstr)
if ( (NR > 1) || !exists[outfile] )
print $1 outstr[outfile] >> outfile
}
' inFile.csv
Just figure out the name associated with each column and use that mapping to manipulate the columns. If you're trying to do this in awk, you can use associative arrays to store the column names and the rows those correspond to. If you're using ksh93 or bash, you can use associative arrays to store the column names and the rows those correspond to. If you're using perl or python or ruby or ... you can...
Or push the columns into an array to map the numbers to column numbers.
Either way, then you have a list of column headers, which can further be manipulated however you need to.
The solution I have found most useful to this kind of problem is to first retrieve the column number using an AWK script (encapsulated in a shell function) and then follow with a cut statement. This technique/strategy turns into a very concise, general and fast solution that can take advantage of co-processing. The non-append case is cleaner, but here is an example that handles the complication of the append you mentioned:
#! /bin/sh
fields() {
LC_ALL=C awk -F, -v pattern="$1" '{
j=0; split("", f)
for (i=1; i<=NF; i++) if ($(i) ~ pattern) f[j++] = i
if (j) {
printf("%s", f[0])
for (i=1; i<j; i++) printf(",%s", f[i])
}
exit 0
}' "$2"
}
cut_fields_with_append() {
if [ -s "$3" ]
then
cut -d, -f `fields "$1" "$2"` "$2" | sed '1 d' >> "$3"
else
cut -d, -f `fields "$1" "$2"` "$2" > "$3"
fi
}
cut_fields_with_append '^[^-]+$|1-' values.csv value1.csv &
cut_fields_with_append '^[^-]+$|2-' values.csv value2.csv &
cut_fields_with_append '^[^-]+$|3-' values.csv value3.csv &
wait
The result is as you would expect:
$ ls
values values.csv
$ cat values.csv
xValue,value1-avg,value1-median,value2-avg,value3-avg,value3-median
1,3,4,20,14,20
$ ./values
$ ls
value1.csv value2.csv value3.csv values values.csv
$ cat value1.csv
xValue,value1-avg,value1-median
1,3,4
$ cat value2.csv
xValue,value2-avg
1,20
$ cat value3.csv
xValue,value3-avg,value3-median
1,14,20
$ ./values
$ cat value1.csv
xValue,value1-avg,value1-median
1,3,4
1,3,4
$ cat value2.csv
xValue,value2-avg
1,20
1,20
$ cat value3.csv
xValue,value3-avg,value3-median
1,14,20
1,14,20
$
I have a file, of which a part is shown below:
OUTPUT_FILENAME="out.Received.Power.x.0.y.1.z.0.41
X_TX=0
Y_TX=1
Z_TX=0.41
I would like to automatically change some part of it with BASH: every time i see OUTPUT_FILENAME i want to over write the name next to it and change it with a new one. Then i want to do the same with the values X_TX, Y_TX and Z_TX: delete the value next to it and rewrite a new one. For example instead of X_TX=0 i want X_TX=0.3 or viceversa.
Do you think it's possible?Maybe with grep or so..
You can use sed like this:
i.e. to replace X_TX= with X_TX=123 you can do:
sed -i -e 's/X_TX=.*/X_TX=123/g' /tmp/file1.txt
One option using awk. Your values are passed as variables to the awk script and substituted when exists a match:
awk -v outfile="str_outfile" -v x_tx="str_x" -v y_tx="str_y" -v z_tx="str_z" '
BEGIN { FS = OFS = "=" }
$1 == "OUTPUT_FILENAME" { $2 = outfile; print; next }
$1 == "X_TX" { $2 = x_tx; print $0; next }
$1 == "Y_TX" { $2 = y_tx; print $0; next }
$1 == "Z_TX" { $2 = z_tx; print $0; next }
' infile