Create a matrix out of table using awk - matrix

I want to use this table:
a 16 moe max us
b 11 tom mic us
d 14 roe fox au
t 29 ann teo au
n 28 joe joe ca
and make this matrix by using awk (or any other simple option in bash):
a_16; b_11; d_14; t_29; n_28
us; moe_max; tom_mic; ; ;
au; ; ; roe_fox; ann_teo;
ca; ; ; ; ; joe_joe
I tried this but it didn't work:
awk '{a[$5]=a[$5]?a[$5] FS $1"_"$2:$1"_"$2; b[$5]=b[$5]?b[$5] FS $3"_"$4:$3"_"$4;} END{for (i in a){print i"\t" a[i] "\t" b[i];}}' fis.txt

Using any awk
$ cat tst.awk
{
row = $NF
col = $1 "_" $2
vals[row,col] = $3 "_" $4
}
!seenRow[row]++ { rows[++numRows] = row }
!seenCol[col]++ { cols[++numCols] = col }
END {
OFS = "; "
printf " "
for ( colNr=1; colNr<=numCols; colNr++ ) {
col = cols[colNr]
printf "%s%s", col, (colNr<numCols ? OFS : ORS)
}
for ( rowNr=1; rowNr<=numRows; rowNr++ ) {
row = rows[rowNr]
printf "%s%s", row, OFS
for ( colNr=1; colNr<=numCols; colNr++ ) {
col = cols[colNr]
#val = ((row,col) in vals ? vals[row,col] : " ")
val = vals[row,col]
printf "%s%s", val, (colNr<numCols ? OFS : ORS)
}
}
}
$ awk -f tst.awk file
a_16; b_11; d_14; t_29; n_28
us; moe_max; tom_mic; ; ;
au; ; ; roe_fox; ann_teo;
ca; ; ; ; ; joe_joe
I can't see the pattern in the expected output in your question of when there should be 1, 2, 3, or 4 spaces after each ; so I just used a consistent 2 in the above. Massage it to suit.

Using gawk multidimensional arrays for collecting header columns and row indices:
awk '{
head[NR] = $1"_"$2;
idx[$5][NR] = $3"_"$4
}
END {
h = ""; col_size = length(head);
for (i = 1; i <= col_size; i++) {
h = sprintf("%s %s", h, head[i])
}
print h;
for (lab in idx) {
printf("%s", lab);
for (i = 1; i <= col_size; i++) {
v = sprintf("%s; %s", v, idx[lab][i])
}
print v;
v = "";
}
}' test.txt
a_16 b_11 d_14 t_29 n_28
ca; ; ; ; ; joe_joe
au; ; ; roe_fox; ann_teo;
us; moe_max; tom_mic; ; ;

Here is a ruby to do that:
ruby -e 'd=$<.read.
split(/\R/).
map(&:split).
map{|sa| sa.each_slice(2).map{|ss| ss.join("_") } }.
group_by{|sa| sa[-1] }
# {"us"=>[["a_16", "moe_max", "us"], ["b_11", "tom_mic", "us"]], "au"=>[["d_14", "roe_fox", "au"], ["t_29", "ann_teo", "au"]], "ca"=>[["n_28", "joe_joe", "ca"]]}
heads=d.values.flatten(1).map{|sa| sa[0]}
# ["a_16", "b_11", "d_14", "t_29", "n_28"]
hsh=Hash.new {|h,k| h[k] = ["\t"]*heads.length}
d.each{|k,v|
v.each{|sa|
hsh[k][heads.index(sa[0])]="\t#{sa[1]}"
}
}
puts heads.map{|e| "\t#{e}" }.join(";")
hsh.each{|k,v| puts "#{k};\t#{v.join(";")}"}
' file
Prints:
a_16; b_11; d_14; t_29; n_28
us; moe_max; tom_mic; ; ;
au; ; ; roe_fox; ann_teo;
ca; ; ; ; ; joe_joe

Related

How to reorder file using awk

I have a file as following format
Item-abc,c1,300
Item-abc,c2,500
Item-pqr,c1,900
Item-pqr,c2,800
Item-pqr,c3,600
Item-pqr,c4,700
Item-xyz,c1,950
Item-asd,
Item-jkl
I need this file rearranged in following manner
Item-abc,c1=300,c2=500
Item-pqr,c1=900,c2=800,c3=600,c4=700
Item-xyz,c1=950
If second and third columns are empty then that line should ve removed completely
$ cat tst.awk
BEGIN { FS=OFS="," }
$1 != prev { if (NR>2) print rec; rec=prev=$1 }
{ rec = rec OFS $2 "=" $3 }
END { print rec }
$ awk -f tst.awk file
Item-abc,c1=300,c2=500
Item-pqr,c1=900,c2=800,c3=600
cat answer.awk
BEGIN {
FS=","
RS="\r\n" # For Windows"
}
{
N[$1,$2]= $3
}
END {
for (comb in N) {
split (comb,S,SUBSEP)
K[S[1]]=K[S[1]] "," S[2] "=" N[S[1],S[2]]
}
for (j in K) if (j != "Name") print j K[j]
}
awk -f answer.awk file
Item-abc,c1=300,c2=500
Item-pqr,c1=900,c2=800,c3=600
#Jerin, this variant will strip \r'ss
BEGIN {
FS=","
}
{
sub(/\x0d/,"",$0)
split($0,Cols)
N[Cols[1],Cols[2]]= Cols[3]
}
END {
for (comb in N) {
split (comb,S,SUBSEP)
K[S[1]]=K[S[1]] "," S[2] "=" N[S[1],S[2]]
}
for (j in K) if (j != "Name") print j K[j]
}

Awk separate column output

What I wanted to do is to create a Table (maximum=4 rows) from a one-column file using awk.
I have a file:
1 a,b
2 r,i
3 w
4 r,t
5 o,s
6 y
The desire output:
1 a,b 5 o,s
2 r,i 6 y
3 w
4 r,t
So far, I just separating the rows into different files and "paste" them into one. I would appreciate of any of more sophisticated method.
$ cat tst.awk
BEGIN {
numRows = 4
OFS = "\t"
}
{
rowNr = (NR - 1 ) % numRows + 1
if ( rowNr == 1 ) {
numCols++
}
val[rowNr,numCols] = $0
}
END {
for (rowNr=1; rowNr<=numRows; rowNr++) {
for (colNr=1; colNr<=numCols; colNr++) {
printf "%s%s", val[rowNr,colNr], (colNr<numCols ? OFS : ORS)
}
}
}
$
$ awk -f tst.awk file
1 a,b 5 o,s
2 r,i 6 y
3 w
4 r,t
Combination of awk to join lines and column to pretty-print them:
awk -v max=4 '
{ i = (NR-1) % max + 1; line[i] = line[i] "\t" $0 }
END { for(i=1; i<=max && i<=length(line); i++) print line[i] }' file | column -t -s $'\t'
Output:
1 a,b 5 o,s
2 r,i 6 y
3 w
4 r,t
Another:
$ awk ' {
i=(NR%4) # using modulo indexed array
a[i]=a[i] (a[i]==""?"":" ") $0 # append to it
}
END { # in the END
for(i=1;i<=4;i++) # loop all indexes in order
print a[i%4] # dont forget the modulo
}' file
1 a,b 5 o,s
2 r,i 6 y
3 w
4 r,t
Naturally it will be ugly if there are missing columns.
Here is another awk approach:-
awk '
{
A[++c] = $0
}
END {
m = sprintf ( "%.0f", ( c / 4 ) )
for ( i = 1; i <= 4; i++ )
{
printf "%s\t", A[i]
for ( j = 1; j <= m; j++ )
printf "%s\t", A[i+(j*4)]
printf "\n"
}
}
' file
you can combine split and paste
split -l 4 file part- && paste part-*
-l <number> means to split file to smaller files of <number> lines each.
part- is a prefix of our choice to be used for the new files. Note that they will be in alphabetical order, e.g. part-aa, part-ab etc. So paste will paste them as expected.

AWK: Count age mean average for each profession in file.

Input :
name;surname;street;profession;sex;age;city
name1;surname1;street1;prof1;male;22;city1
name2;surname2;street2;prof2;male;25;city2
name1;surname1;street1;prof1;male;23;city3
Data is stored in *.csv file, the goal is count average of age for each profession. So, expected output is:
avg of prof1 = 22,5
avg of prof2 = 25
awk to the rescue!
$ awk -F';' 'NR>1 {k=$4; sum[k]+=$6; count[k]++}
END {for(k in sum) print "avg of " k " = " sum[k]/count[k]}' file
avg of prof1 = 22.5
avg of prof2 = 25
$ cat age.awk
BEGIN {
FS = ";"
}
NR == 1 {
for (i = 1; i <= NF; i++) { # where are profession and age?
if ($(i) == "profession") pro_id = i
if ($(i) == "age") age_id = i
}
next
}
{
pro = $(pro_id)
age = $(age_id)
av[pro] += age
n[pro] ++
}
END {
for (pro in av) av[pro] /= n[pro]
for (pro in av)
printf "avg of %s = %s\n", pro, av[pro] | "sort"
}
Usage:
$ awk -f age.awk file.csv

Read a variable with multiple occurrences from a file using AWK

I want to get the value (usually a string) of var1 in a file my_file.dat and save this value to x.
I managed to do this using the following command:
x = `awk '$1 == "var1" {print $2}' my_file.dat`
It now turns out that there can be several occurrences of var1 in my_file.dat, e.g.:
Series1
var1 = temp/data/
Series2
var1 = lost/oldfiles/
My question is then how can I get only the value of the 'var1' which is located right after the line 'Series1', such that 'x' returns 'temp/data/'?
Given the sample you posted all you need is:
x=$(awk 'prev=="Series1" {print $NF} {prev=$0}' file)
but more robustly:
x=$(awk '
{ name=value=$0; sub(/[[:space:]]*=.*/,"",name); sub(/[^=]+=[[:space]]*/,"",value) }
(prev=="Series1") && (name=="var1") { print value }
{ prev=$0 }
' file)
What about a two state machine to solve the problem:
#!/bin/awk
BEGIN {
state = 0;
}
{
if( state == 0 )
{
if( index( $1, "Series" nseries ) )
{
state = 1
}
}
else
{
if( index( $1, "Series" ) > 0 )
{
exit
}
if( index( $1, "var1" ) > 0 )
{
idx = index( $0, "=" )
str = substr( $0, idx + 1 )
gsub(/^[ \t]+/, "", str )
print str
exit
}
}
}
# eof #
Test file:
Series1
var1 = temp/data/
Series2
var1 = lost/oldfiles/
Series3
var1 = foo/bar/
Series4
var1 = alpha/betta/
Series5
var1 = /foo/this=bad
Series6
var1 = /foo/Series/
Reading var1 from Series1:
x=$(awk -v nseries=1 -f solution.awk -- ./my_file.dat)
echo $x
temp/data/
Reading var1 from Series5:
x=$(awk -v nseries=5 -f solution.awk -- ./my_file.dat)
echo $x
/foo/this=bad
Reading var1 from Series6:
x=$(awk -v nseries=6 -f solution.awk -- ./my_file.dat)
echo $x
/foo/Series/
Hope it Helps!

sed-style replace letters with random letters, numbers with random numbers

I need to take a large file, with lines such as:
member: cn=user0001,ou=people
And replace all the usernames such that they still have letters in the same position and numbers in the same position, at random. So the output might be something like:
member: cn=kvud7405,ou=people
The usernames vary in length and format, but they're always bounded by a cn= and a comma.
Can anyone offer a solution with sed/awk/bash preferably, or failing that python might be an option (not sure which version).
Thanks in advance.
something like
sed -i 's/blah/blah?$(cat /dev/urandom | tr -dc "a-z0-9" | fold -w 6 | head -n 1)/g' /home/test.html
awk -F 'cn=|,' 'BEGIN {srand(); OFS = ""} {n = split($2, a, ""); for (i = 1; i <= n; i++) {if (a[i] ~ /[[:digit:]]/) {new = new int(rand() * 10)} else {new = new sprintf("%c", int(rand() * 26 + 97))}}; $2 = "cn=" new ","; print}'
Broken out on multiple lines:
awk -F 'cn=|,' '
BEGIN {
srand();
OFS = ""
}
{
n = split($2, a, "");
for (i = 1; i <= n; i++) {
if (a[i] ~ /[[:digit:]]/) {
new = new int(rand() * 10)
}
else {
new = new sprintf("%c", int(rand() * 26 + 97))
}
};
$2 = "cn=" new ",";
print
}'
It could easily be modified to handle uppercase alpha characters if needed.
Edit:
More robust:
awk 'BEGIN {srand()} {match($0, /cn=[^,]*,/); n = split(substr($0, RSTART+3, RLENGTH-4), a, ""); for (i = 1; i <= n; i++) {if (a[i] ~ /[[:digit:]]/) {new = new int(rand() * 10)} else {new = new sprintf("%c", int(rand() * 26 + 97))}}; print substr($0, 1, RSTART+2) new substr($0, RSTART+RLENGTH-1)}'
This version doesn't use FS so it works when there are additional fields.
A Bash solution:
letter=( a b c d e f g h i j k l m n o p q r s t u v w x y z )
digit=( 0 1 2 3 4 5 6 7 8 9 0 )
while read line; do
user=''
line=${line#*=} # separate cn-value
line=${line%,*} # separate cn-value
for (( CNTR=0; CNTR<${#line}; CNTR+=1 )); do
if [[ ${line:CNTR:1} =~ [[:alpha:]] ]] ; then
user=$user${letter[RANDOM%26]}
else
user=$user${digit[RANDOM%10]}
fi
done
echo "member: cn=${user},ou=people"
done < "$infile" > "$tempfile"
mv "$tempfile" "$infile" # replace original file

Resources