Define field with FPAT - bash

I am trying to split data into field in awk, but I cant come up with the right regex using FPAT.
I have tried:
echo 'C002 2019-06-28;16:03;approved;content=L1-34,EE;not taken;;1024 ' | awk 'BEGIN {FPAT = "([^ ]+) +[^ ]+|;"} {print "f1:"$1;print "f2:"$2;print "f3:"$3;print "f6:"$6;print "f7:"$7}'
Expected result:
f1:C002
f2:2019-06-28
f3:16:03
f6:not taken
f7:

There are no simple way to separate random space from random space.
You need to do as David writes, separate using ; and then split first field by space.
awk -F";" '{split($1,a,"[ \t]+");print "a[1]---"a[1]"\na[2]---"a[2];for (i=1;i<=NF;i++) print i"---"$i}'
a[1]---C002
a[2]---2019-06-28
1---C002 2019-06-28
2---16:03
3---approved
4---content=L1-34,EE
5---not taken
6---
7---1024

A bit similar to the answer of Jotne, but you could write a function to split the record according to your wishes:
awk 'function split_record(string,f, t,n,m) {
n=split(string,t,";"); m=split(t[1],f,"[ \t]+")
for(i=2;i<=n;++i) f[m+i-1]=t[i]
return m+n-1
}
{ split_record($0,f) }
{print "f1:"f[1];print "f2:"f[2];print "f3:"f[3];print "f6:"f[6];print "f7:"f[7]}'
This returns:
f1:C002
f2:2019-06-28
f3:16:03
f6:not taken
f7:
You can update the split record in any way you like.

awk '
BEGIN { FS=OFS=";" }
{
split($1,a,/[[:space:]]+/)
$1 = ""
$0 = a[1] FS a[2] $0
for (i=1; i<=NF; i++) {
print "f" i ":" $i
}
}
' file
f1:C002
f2:2019-06-28
f3:16:03
f4:approved
f5:content=L1-34,EE
f6:not taken
f7:
f8:1024

Related

awk to get value for a column of next line and add it to the current line in shellscript

I have a csv file lets say lines
cat lines
1:abc
6:def
17:ghi
21:tyu
I wanted to achieve something like this
1:6:abc
6:17:def
17:21:ghi
21::tyu
Tried the below code by didn't work
awk 'BEGIN{FS=OFS=":"}NR>1{nln=$1;cl=$2}NR>0{print $1,nln,$2}' lines
1::abc
6:6:def
17:17:ghi
21:21:tyu
Can you please help ?
Here is a potential AWK solution:
cat lines
1:abc
6:def
17:ghi
21:tyu
awk -F":" '{num[NR]=$1; letters[NR]=$2}; END{for(i=1;i<=NR;i++) print num[i] ":" num[i + 1] ":" letters[i]}' lines
1:6:abc
6:17:def
17:21:ghi
21::tyu
Formatted:
awk '
BEGIN {FS=OFS=":"}
{
num[NR] = $1;
letters[NR] = $2
}
END {for (i = 1; i <= NR; i++)
print num[i], num[i + 1], letters[i]
}
' lines
1:6:abc
6:17:def
17:21:ghi
21::tyu
Basically this is your solution but I switched the order of the code blocks and added the END block to output the last record, you were close:
awk 'BEGIN{FS=OFS=":"}FNR>1{print p,$1,q}{p=$1;q=$2}END{print p,"",q}' file
Explained:
$ awk 'BEGIN {
FS=OFS=":" # delims
}
FNR>1 { # all but the first record
print p,$1,q # output $1 and $2 from the previous round
}
{
p=$1 # store for the next round
q=$2
}
END { # gotta output the last record in the END
print p,"",q # "" feels like cheating
}' file
Output:
1:6:abc
6:17:def
17:21:ghi
21::tyu
1st solution: Here is a tac + awk + tac solution. Written and tested with shown samples only.
tac Input_file |
awk '
BEGIN{
FS=OFS=":"
}
{
prev=(prev?$2=prev OFS $2:$2=OFS $2)
}
{
prev=$1
}
1
' | tac
Explanation: Adding detailed explanation for above code.
tac Input_file | ##Printing lines from bottom to top of Input_file.
awk ' ##Getting input from previous command as input to awk.
BEGIN{ ##Starting BEGIN section from here.
FS=OFS=":" ##Setting FS and OFS as colon here.
}
{
prev=(prev?$2=prev OFS $2:$2=OFS $2) ##Creating prev if previous NOT NULL then add its value prior to $2 with prev OFS else add OFS $2 in it.
}
{
prev=$1 ##Setting prev to $1 value here.
}
1 ##printing current line here.
' | tac ##Sending awk output to tac to make it in actual sequence.
2nd solution: Adding Only awk solution with 2 times passing Input_file to it.
awk '
BEGIN{
FS=OFS=":"
}
FNR==NR{
if(FNR>1){
arr[FNR-1]=$1
}
next
}
{
$2=(FNR in arr)?(arr[FNR] OFS $2):OFS $2
}
1
' Input_file Input_file

print multiple fields if multiple pattern matches

I have a comma delimited file like below
0,category=a,type=b,value=1
1,category=c,type=b,.....,original_value=0
2,category=b,type=c,....,original_value=1,....,corrected_value=3
A line in the file can contain
(1)only 'value'
(2)only 'original_value'
(3)both 'original value' and 'corrected_value'
The values can be in any column.
The following awk command I wrote can only print one field after pattern match.
cat file | awk -F, 'BEGIN{OFS=","} /value/ { for (x=1;x<=NF;x++) if ($x~"value") {print $2,$3,$(x)} }' | sort -u
Current Output:
category=a,type=b,value=1
category=b,type=c,corrected_value=3
category=b,type=c,original_value=1
category=c,type=b,original_value=0
How do I print two fields (columns) of a line if two pattern matches occur? In this case, if both original_value and corrected_value exist.
Expected Output:
category=a,type=b,value=1
category=b,type=c,original_value=1,corrected_value=3
category=c,type=b,original_value=0
Bash Version: 4.3.11
You can use this awk command:
awk 'BEGIN{FS=OFS=","} {printf "%s%s%s", $2,OFS,$3; for(i=4; i<=NF; i++)
if ($i ~ /value/) printf "%s%s", OFS,$i; print ""}' file
category=a,type=b,value=1
category=c,type=b,original_value=0
category=b,type=c,original_value=1,corrected_value=3
Similar to #anubhava's answer, but does not rely on the category or type being in a particular column:
awk -F, '
BEGIN { pattern = "^(category|type|value|original_value|corrected_value)" }
{
sep = ""
for (i=1; i<=NF; i++) {
if ($i ~ pattern) {
printf "%s%s", sep, $i
sep = ","
}
}
print ""
}
' file

Spreading cell values into columns using UNIX

Suppose we have this file:
head file
id,name,value
1,Je,1
2,Je,1
3,Ko,1
4,Ne,1
5,Ne,1
6,Je,1
7,Ko,1
8,Ne,1
9,Ne,1
And I'd like to get this out:
id,Je,Ko,Ne
1,1,0,0
2,1,0,0
3,0,1,0
4,0,0,1
5,0,0,1
6,1,0,0
7,0,1,0
8,0,0,1
9,0,0,1
Does someone know how to get this output, using awk or sed?
Assuming that the possible values of name are only Je or Ko or Ne, you can do:
awk -F, 'BEGIN{print "id,Je,Ko,Ne"}
NR==1{ next }
{je=$2=="Je"?"1":"0";
ko=$2=="Ko"?"1":"0";
ne=$2=="Ne"?"1":"0";
print $1","je","ko","ne}' file
If you want something that will print the values in the same order they are read and not limited to your example fields, you could do:
awk -F, 'BEGIN{OFS=FS; x=1;y=1}
NR==1 { next }
!($2 in oa){ oa[$2]=1; ar[x++]=$2}
{lines[y++]=$0;}
END{
s="";
for (i=1; i<x; i++)
s=s==""?ar[i]:s OFS ar[i];
print "id" OFS s;
for (j=1; j<y; j++){
split(lines[j], a)
s=""
for (i=1; i<x; i++) {
tt=ar[i]==a[2]?"1":"0"
s=s==""?tt:s OFS tt;
}
print a[1] OFS s;
}
}
' file
Here's a "two-pass solution" (along the lines suggested by #Drakosha) implemented using a single invocation of awk. The implementation would be a little simpler if there was no requirement regarding the ordering of names.
awk -F, '
# global: n, array a
function println(ix,name,value, i,line) {
line=ix;
for (i=0;i<n;i++) {
if (a[i]==name) {line=line OFS value} else {line=line OFS 0}
}
print line;
}
BEGIN {OFS=FS; n=0}
FNR==1 {next} # skip the header each time
NR==FNR {if (!mem[$2]) {mem[$2] = a[n++] = $2}; next}
!s { s="id"; for (i=0;i<n;i++) {s=s OFS a[i]}; print s}
{println($1, $2, $3)}
' file file
I suggest 2 passes.
1st will generate all the possible values of column 2 (Je, Ko, Ne,
...).
2nd will be able to trivially generate the output you are looking for.
awk -F, 'BEGIN{s="Je,Ko,Ne";print "id,"s}
NR>1 {m=s; sub($2,1,m); gsub("[^0-9,]+","0",m); print $1","m}' file

How to print a pattern using AWK?

I need to find in file word that matches regex pattern.
So if in line, i have:
00:10:20,918 I [AbstractAction.java] - register | 0.0.0.0 | {GW_CHANNEL=AA, PWD=********, ID=777777, GW_USER=BB, NUM=3996, SYSTEM_USER=OS, LOGIC_ID=0}
awk -F' ' '{for(i=1;i<=NF;i++){ if($i ~ /GW_USER/ && /GW_CHANNEL/){print $5 " " $i} } }'
Print only:
register GW_USER=BB
I wonna get:
register GW_USER=BB GW_CHANNEL=AA
How to print GW_USER and GW_CHANNEL columns?
Your if condition isn't looking right, you can use regex alternation:
awk '{for(i=1;i<=NF;i++){ if($i ~ /GW_USER|GW_CHANNEL/) print $5, $i } }' file
There is no need to use -F" " and " " in print as that is default field separator.
Your condition:
if($i ~ /GW_USER/ && /GW_CHANNEL/)
Will match FW_USER against $i but will match GW_CHANNEL in whole line.
Whenever you have name=value pairs in your input, it's a good idea to create an array that maps the names to the values and then print by name:
$ cat tst.awk
match($0,/{[^}]+/) {
str = substr($0,RSTART+1,RLENGTH-1)
split(str,arr,/[ ,=]+/)
delete n2v
for (i=1; i in arr; i+=2) {
n2v[arr[i]] = arr[i+1]
}
print $5, fmt("GW_USER"), fmt("GW_CHANNEL")
}
function fmt(name) { return (name "=" n2v[name]) }
$
$ awk -f tst.awk file
register GW_USER=BB GW_CHANNEL=AA
that way you trivially print or do anything else you want with any other field in future.

Format date in a column using awk

I just want to fix this problem. I am running the code below
awk -F, 'NR>1{gsub(/\:/,"",$4);gsub(/\-/,"",$4);gsub(/\.0/,"",$4);gsub(/\ /,",",$4);NF--}{$1=$1}1' OFS=, sample
$cat sample
1,0,null,2014-11-24 08:15:18.0,1
1,0,null,2014-11-24 08:15:16.0,1
The output is
1,0,null,2014-11-24 08:15:18.0,1
1,0,null,20141124,081516
My expected output:
1,0,null,20141124,081518,1
1,0,null,20141124,081516,1
Anyone who could help me with my code above?
You probably just need
awk -F, '{gsub(/[-:]/,"",$4);sub(/ /,OFS,$4);sub(/\.0$/,"",$4)}1' OFS=, sample
Instead of using gsub, you are better off using split.
awk '
BEGIN { FS = OFS = "," }
{
split ($4, flds, /[- :.]/);
$4 = flds[1] flds[2] flds[3] FS flds[4] flds[5] flds[6]
}1' sample
1,0,null,20141124,081518,1
1,0,null,20141124,081516,1
We set the input and output field separator in the BEGIN block to ,.
Using split, we break the forth field on -, :, . and space in to an array.
We then re-construct the forth field by concatenating the array elements.
1 at the end will do default awk action, that is print.
#!/usr/bin/awk -f
$1 {
gsub(/(\.0|[-:])/, "")
gsub(/ /, ",")
print
}
$ awk 'BEGIN{FS=OFS=","} {gsub(/[-:]|\.0/,"",$4); sub(/ /,OFS,$4)} 1' file
1,0,null,20141124,081518,1
1,0,null,20141124,081516,1
or:
$ awk 'BEGIN{FS="[ ,]";OFS=","} {gsub(/-/,"",$4); gsub(/:|\.0/,"",$5)} 1' file
1,0,null,20141124,081518,1
1,0,null,20141124,081516,1

Resources