How to reorder file using awk - shell

I have a file as following format
Item-abc,c1,300
Item-abc,c2,500
Item-pqr,c1,900
Item-pqr,c2,800
Item-pqr,c3,600
Item-pqr,c4,700
Item-xyz,c1,950
Item-asd,
Item-jkl
I need this file rearranged in following manner
Item-abc,c1=300,c2=500
Item-pqr,c1=900,c2=800,c3=600,c4=700
Item-xyz,c1=950
If second and third columns are empty then that line should ve removed completely

$ cat tst.awk
BEGIN { FS=OFS="," }
$1 != prev { if (NR>2) print rec; rec=prev=$1 }
{ rec = rec OFS $2 "=" $3 }
END { print rec }
$ awk -f tst.awk file
Item-abc,c1=300,c2=500
Item-pqr,c1=900,c2=800,c3=600

cat answer.awk
BEGIN {
FS=","
RS="\r\n" # For Windows"
}
{
N[$1,$2]= $3
}
END {
for (comb in N) {
split (comb,S,SUBSEP)
K[S[1]]=K[S[1]] "," S[2] "=" N[S[1],S[2]]
}
for (j in K) if (j != "Name") print j K[j]
}
awk -f answer.awk file
Item-abc,c1=300,c2=500
Item-pqr,c1=900,c2=800,c3=600

#Jerin, this variant will strip \r'ss
BEGIN {
FS=","
}
{
sub(/\x0d/,"",$0)
split($0,Cols)
N[Cols[1],Cols[2]]= Cols[3]
}
END {
for (comb in N) {
split (comb,S,SUBSEP)
K[S[1]]=K[S[1]] "," S[2] "=" N[S[1],S[2]]
}
for (j in K) if (j != "Name") print j K[j]
}

Related

Check for and combine overlapping ranges

How can I check if ranges overlap other ranges and combine the ones that do?
Example:
10-1000,15-350,50-1500,2100,1700-1800,45,40,145,2-1300
The result I want is:
2-1500,1700-1800,2100
I tried to make a plan on how to code it but it's getting me nowhere. Is there a useful package that I can use? Or if not what should my approach be?
Using sort and awk:
tr , '\n' | sort -n | awk '
BEGIN {
FS = OFS = "-"
}
NF == 1 {
$2 = $1
}
$2 <= end {
next
}
$1 <= end {
end = $2
next
}
{
emit()
start = $1
end = $2
}
END {
emit()
}
function emit() {
if (NR != 1) {
if (start == end)
print start
else
print start, end
}
}' | paste -sd,
$ sh ./merge.sh <<<10-1000,15-350,50-1500,2100,1700-1800,45,40,145,2-1300
2-1500,1700-1800,2100

how to group sequence of item into square brackets

how to group sequence of item into square brackets
for example
List of items
cat item.txt
sn01
sn02
sn03
sn05
sn07
sn08
Desired output
sn[01-03,05,07-08]
If your data is same as shown Input_file sample then following may help you in same.
awk 'FNR==1{line=$0} {sub(/[a-z]+/,"")} $0-val>1 && val1!=val{out=out?out "," val1"-"val:line"[" val1"-"val;val1=$0} $0-val>1 && val1==val{out=out?out "," val1:out "," val1;val1=$0} {if(FNR==1){sub(/[0-9]+/,"",line);val1=$0};val=$0}END{if(val1!=val){print out "," val1"-"val"]"} else {print out "," val"]"}}' Input_file
Adding non-one liner form of solution too.
awk '
FNR==1{
line=$0
}
{
sub(/[a-z]+/,"")
}
$0-val>1 && val1!=val{
out=out?out "," val1"-"val:line"[" val1"-"val;
val1=$0
}
$0-val>1 && val1==val{
out=out?out "," val1:out "," val1;
val1=$0
}
{
if(FNR==1){
sub(/[0-9]+/,"",line);
val1=$0
};
val=$0
}
END{
if(val1!=val){
print out "," val1"-"val"]"
}
else{
print out "," val"]"
}
}
' Input_file
Output will be as follows.
sn[01-03,05,07-08]
"sn" being static here. it should pick it from the input file. when I
given list of items start with "cn". still it picks "sn"
Using awk:
$ cat infile
sn01
sn02
sn03
sn05
sn07
sn08
cn08
cn09
cn10
cn11
cn15
when search='sn'
$ awk -v search='sn' 'function pr(){if(f && l)printf("%s%s",n?",":search"[",f==l?f:f"-"l)}$0!~"^"search{next}{t=$1;sub(/[^0-9]+/,"",t)}f==""{f=l=t;next}t==l+1{l=t;next}{pr();f=l=t;n++}END{pr(); print n?"]":"Nothing matched for keyword :"search}' infile
sn[01-03,05,07-08]
when search='cn'
$ awk -v search='cn' 'function pr(){if(f && l)printf("%s%s",n?",":search"[",f==l?f:f"-"l)}$0!~"^"search{next}{t=$1;sub(/[^0-9]+/,"",t)}f==""{f=l=t;next}t==l+1{l=t;next}{pr();f=l=t;n++}END{pr(); print n?"]":"Nothing matched for keyword :"search}' infile
cn[08-11,15]
Better Readable :
awk -v search='sn' '
function pr()
{
if(f && l)
printf("%s%s",n?",":search"[",f==l?f:f"-"l)
}
$0!~"^"search{
next
}
{
t=$1;
sub(/[^0-9]+/,"",t)
}
f==""{
f=l=t;
next
}
t==l+1{
l=t;
next
}
{
pr();
f=l=t;
n++
}
END{
pr();
print n?"]":"Nothing matched for keyword :"search
}' infile
a simple awk solution
We're aiming to set LB and UB for each possible range.
Starting from LB, the last number in sequence upto which the common difference is 1 gives us the UB.
If difference is more than 1 print the last range and set LB again.
$ awk 'FNR==1{ $1=$1; prefix=substr($0,1,2);} {gsub(/[^0-9]/,"",$1); a[++i]=$1;} END{ printf prefix"["; LB=UB=prev=a[1]; for(i=1; i<=NR; i++){ if(int(a[i+1])==int(prev+1)) { UB=a[i+1]; prev=UB; } else { if(LB==UB) { printf LB"," } else {delim=(i==NR)? "]" :","; printf LB "-" UB delim; } prev=LB=UB=a[i+1]; }} }' file
sn[01-03,05,07-08]
gsub(/[^0-9]/,"",$1) : This sets all non-digit chars to null. Therefore $1 ends up with just numbers;
To understand it better :
$ awk 'FNR==1{ $1=$1; prefix=substr($0,1,2); } {gsub(/[^0-9]/,"",$1); a[++i]=$1;}
END
{
printf prefix"["; LB=UB=prev=a[1];
for(i=1; i<=NR; i++)
{
if(int(a[i+1])==int(prev+1))
{
UB=a[i+1];
prev=UB; }
else
{
if(LB==UB)
{
printf LB","
}
else
{
delim=(i==NR)? "]" :",";
printf LB "-" UB delim;
}
prev=LB=UB=a[i+1];
}
}
}' file
Awk solution:
awk '{ v=substr($0,3) }NR==1{ pfx=substr($0,1,2); r=a=v; next }
{ diff=v-a; if(diff>1) { r=r ((a==last)? ",":"-"a",")v; last=v } a=v }
END{ if(diff==1) r=r"-"v; print pfx"["r"]" }' file
The output:
sn[01-03,05,07-08]

User defined function issue in awk

My code 1 :
awk -F'|' -v PARM_VAL="${PARM_VALUE[*]}" '
BEGIN { split(PARM_VAL,pa," ")
fn_1()
{
print "inside fn"
}
}
FNR==NR{ for(i=1;i<=NF;i++) a[NR,i]=$i; }
{if (FILENAME == "SPP_OUT") {print $1}}
fn_1
END {printf " second value of SPPIN : "a[2,2]} ' SPP_IN SPP_OUT
I am getting error fatal: function `fn_1' not defined
My code 2 :
awk -F'|' -v PARM_VAL="${PARM_VALUE[*]}" '
BEGIN { split(PARM_VAL,pa," ")
fn_1()
{
ret = "returned"
return ret
}
}
FNR==NR{ for(i=1;i<=NF;i++) a[NR,i]=$i; }
{if (FILENAME == "SPP_OUT") {print $1}}
m=fn_1()
END {printf " second value of SPPIN : "a[2,2];print $m} ' SPP_IN SPP_OUT
I am facing
awk: cmd. line:6: return ret
awk: cmd. line:6: ^ `return' used outside function context
Can any asssist ?
Thanks
The function should be defined out of the BEGIN block. For example:
$ cat function.awk
function fib(n, n_1, n_2)
{
if (n < 2) {
return n
} else {
n_1 = fib(n - 1)
n_2 = fib(n - 2)
return n_1 + n_2
}
}
BEGIN {
for (i = 0; i < 5; ++i) {
printf("fib(%d) = %d\n", i, fib(i));
}
}
$ awk -f function.awk
fib(0) = 0
fib(1) = 1
fib(2) = 1
fib(3) = 2
fib(4) = 3
$
The user-defined awk function syntax is
function NAME(PARAMETER-LIST)
{
BODY-OF-FUNCTION
}
The tricky part is:
PARAMETER-LIST is a list of the function's arguments and local variable
names, separated by commas. When the function is called, the argument
names are used to hold the argument values given in the call. The
local variables are initialized to the empty string. A function cannot
have two parameters with the same name, nor may it have a parameter
with the same name as the function itself.
See the awk manual for more details.
To get the nucleus of your code working I had to add the function keyword when defining the function, and parentheses when making the call like so:
$ cat foo.awk
BEGIN { print "begin" }
function fn_1()
{
print "inside fn"
}
{
fn_1()
}
END { print "end" }
$ echo 'xyz' | awk -f foo.awk
begin
inside fn
end
From the awk manual:
The definition of a function named name looks like this:
function name(parameter-list)
{
body-of-function
}

How to Convert two columns of CSV files to consecutive integers?

Hello say I have this file file1.csv and it has 2 columns a and b which are both 22 char strings. It looks like something like this:
hWcYwgRKOD77hfm1oKE0IA,5HleiJXMsFkGEsr8Jqr3Ug
hWcYwgRKOD77hfm1oKE0IA,rCDlYd2WHJuiT05sYGxaVA
65q0c2Iw03B8eSuHHTETHw,G40NUD0/op+13yjzBw+hrw
65q0c2Iw03B8eSuHHTETHw,1u8UW/cQ4i1vbSF9wvzu3w
...
And I would like to convert the a, b columns into consecutive integers like:
1,1
1,2
2,3
2,4
Does anyone know how can I do it? I am using Ubuntu 12.04 by the way
And what if I have another file file2.csv with column a' and b'. And is there any way to do the same thing to file2 and if "hWcYwgRKOD77hfm1oKE0IA" is 1 in file1 then "hWcYwgRKOD77hfm1oKE0IA" is 1 in file2 if it appears. Same for column b and b'. And I would like to have single output from those two files: result1.csv and result2.csv
awk -F, -v OFS=, '{ if ($1 in a) { $1 = a[$1] } else { $1 = a[$1] = ++x }
if ($2 in b) { $2 = b[$2] } else { $2 = b[$2] = ++y } } 1' file
Or perhaps simpler but may be less efficient:
awk -F, -v OFS=, '!($1 in a) { a[$1] = ++x } { $1 = a[$1] }
!($2 in b) { b[$2] = ++y } { $2 = b[$2] } 1' file
Or dynamic to any number of columns:
awk -F, -v OFS=, '{ for (i = 1; i <= NF; ++i)
if ((i, $i) in a) { $i = a[i, $i] }
else { $i = a[i, $i] = ++x[i] } } 1' file
Which is also similar to
awk -F, -v OFS=, '{ for (i = 1; i <= NF; ++i) {
if (!((i, $i) in a)) a[i, $i] = ++x[i]
$i = a[i, $i] } } 1' file
Output:
1,1
1,2
2,3
2,4
UPDATE
To apply on two files, try:
awk -F, -v OFS=, '{ if ($1 in a) { $1 = a[$1] } else { $1 = a[$1] = ++x }
if ($2 in b) { $2 = b[$2] } else { $2 = b[$2] = ++y }
print > "result_" FILENAME }' file1 file2
UPDATE 02
awk -F, -v OFS=, '!($1 in a) { a[$1] = ++x } !($2 in b) { b[$2] = ++y }
{ print $1, $2, a[$1], b[$2] }' file
Output:
hWcYwgRKOD77hfm1oKE0IA,5HleiJXMsFkGEsr8Jqr3Ug,1,1
hWcYwgRKOD77hfm1oKE0IA,rCDlYd2WHJuiT05sYGxaVA,1,2
65q0c2Iw03B8eSuHHTETHw,G40NUD0/op+13yjzBw+hrw,2,3
65q0c2Iw03B8eSuHHTETHw,1u8UW/cQ4i1vbSF9wvzu3w,2,4
File by file version:
awk -F, -v OFS=, '!($1 in a) { a[$1] = ++x } !($2 in b) { b[$2] = ++y }
{ print $1, $2, a[$1], b[$2] > "result_" FILENAME }' file1 file2

awk script, need to return results on multi lines following keyword

Using this:
awk '$1 == "pool" { f=1; print $1,$2; next }
f == 1 { if ($1 == "pool") { print }
else if ($1 == "members") { print }
else if ($0 ~ /^}/) { f=0 }
}' bigip.conf
That works fine until the config has the IPs on following lines.
How can I get it to print the IPs if they are on following lines.
The config has both, some have it on the same line, some on then next 1, 2 or 3 lines.
the data :
pool pl_stage_xxx_microsites_9483 {
monitor all tcp_half_open
members {
11.11.11.11:9483 {}
11.22.22.22:9483 {
session user disabled
}
}
}
Try the following awk code:
awk '
$1 == "pool" {
f=1
print $1,$2
next
}
f == 1 {
if ($1 == "pool") {
print
}
else if ($1 == "members") {
print
getline
while ($0 ~ "[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}:[0-9]{1,5}"){
print
getline
}
}
else if ($0 ~ /^}/) {
f=0
}
}'
That will print the IP lines while they exists.
It's hard to say without seeing more of your data and your expected output but I think all you need is something like this:
awk '
/^}/ { inPool=0 }
$1 == "pool" { inPool=1; inMembers=0 }
inPool {
if ($1 == "pool") {
print $1, $2
print
}
else if ($1 == "members") {
inMembers = 1
}
if (inMembers) {
print
}
}
' file
The above should be a good starting point at least. wrt the other answer posted using getline - getline has some appropriate uses but this isn't one of them, don't use getline until you fully understand and can live with all of it's caveats, see http://awk.info/?tip/getline.

Resources