How can I align the columns of tables in Bash? - bash

I want to format text as a table. I tried echoing with a '\t' separator, but it was misaligned.
Desired output:
a very long string.......... 112232432 anotherfield
a smaller string 123124343 anotherfield

Use the column command:
column -t -s' ' filename

printf is great, but people forget about it.
$ for num in 1 10 100 1000 10000 100000 1000000; do printf "%10s %s\n" $num "foobar"; done
1 foobar
10 foobar
100 foobar
1000 foobar
10000 foobar
100000 foobar
1000000 foobar
$ for((i=0;i<array_size;i++));
do
printf "%10s %10d %10s" stringarray[$i] numberarray[$i] anotherfieldarray[%i]
done
Notice I used %10s for strings. %s is the important part. It tells it to use a string. The 10 in the middle says how many columns it is to be. %d is for numerics (digits).
See man 1 printf for more info.

function printTable()
{
local -r delimiter="${1}"
local -r data="$(removeEmptyLines "${2}")"
if [[ "${delimiter}" != '' && "$(isEmptyString "${data}")" = 'false' ]]
then
local -r numberOfLines="$(wc -l <<< "${data}")"
if [[ "${numberOfLines}" -gt '0' ]]
then
local table=''
local i=1
for ((i = 1; i <= "${numberOfLines}"; i = i + 1))
do
local line=''
line="$(sed "${i}q;d" <<< "${data}")"
local numberOfColumns='0'
numberOfColumns="$(awk -F "${delimiter}" '{print NF}' <<< "${line}")"
# Add Line Delimiter
if [[ "${i}" -eq '1' ]]
then
table="${table}$(printf '%s#+' "$(repeatString '#+' "${numberOfColumns}")")"
fi
# Add Header Or Body
table="${table}\n"
local j=1
for ((j = 1; j <= "${numberOfColumns}"; j = j + 1))
do
table="${table}$(printf '#| %s' "$(cut -d "${delimiter}" -f "${j}" <<< "${line}")")"
done
table="${table}#|\n"
# Add Line Delimiter
if [[ "${i}" -eq '1' ]] || [[ "${numberOfLines}" -gt '1' && "${i}" -eq "${numberOfLines}" ]]
then
table="${table}$(printf '%s#+' "$(repeatString '#+' "${numberOfColumns}")")"
fi
done
if [[ "$(isEmptyString "${table}")" = 'false' ]]
then
echo -e "${table}" | column -s '#' -t | awk '/^\+/{gsub(" ", "-", $0)}1'
fi
fi
fi
}
function removeEmptyLines()
{
local -r content="${1}"
echo -e "${content}" | sed '/^\s*$/d'
}
function repeatString()
{
local -r string="${1}"
local -r numberToRepeat="${2}"
if [[ "${string}" != '' && "${numberToRepeat}" =~ ^[1-9][0-9]*$ ]]
then
local -r result="$(printf "%${numberToRepeat}s")"
echo -e "${result// /${string}}"
fi
}
function isEmptyString()
{
local -r string="${1}"
if [[ "$(trimString "${string}")" = '' ]]
then
echo 'true' && return 0
fi
echo 'false' && return 1
}
function trimString()
{
local -r string="${1}"
sed 's,^[[:blank:]]*,,' <<< "${string}" | sed 's,[[:blank:]]*$,,'
}
SAMPLE RUNS
$ cat data-1.txt
HEADER 1,HEADER 2,HEADER 3
$ printTable ',' "$(cat data-1.txt)"
+-----------+-----------+-----------+
| HEADER 1 | HEADER 2 | HEADER 3 |
+-----------+-----------+-----------+
$ cat data-2.txt
HEADER 1,HEADER 2,HEADER 3
data 1,data 2,data 3
$ printTable ',' "$(cat data-2.txt)"
+-----------+-----------+-----------+
| HEADER 1 | HEADER 2 | HEADER 3 |
+-----------+-----------+-----------+
| data 1 | data 2 | data 3 |
+-----------+-----------+-----------+
$ cat data-3.txt
HEADER 1,HEADER 2,HEADER 3
data 1,data 2,data 3
data 4,data 5,data 6
$ printTable ',' "$(cat data-3.txt)"
+-----------+-----------+-----------+
| HEADER 1 | HEADER 2 | HEADER 3 |
+-----------+-----------+-----------+
| data 1 | data 2 | data 3 |
| data 4 | data 5 | data 6 |
+-----------+-----------+-----------+
$ cat data-4.txt
HEADER
data
$ printTable ',' "$(cat data-4.txt)"
+---------+
| HEADER |
+---------+
| data |
+---------+
$ cat data-5.txt
HEADER
data 1
data 2
$ printTable ',' "$(cat data-5.txt)"
+---------+
| HEADER |
+---------+
| data 1 |
| data 2 |
+---------+
REF LIB at: https://github.com/gdbtek/linux-cookbooks/blob/master/libraries/util.bash

To have the exact same output as you need, you need to format the file like this:
a very long string..........\t 112232432\t anotherfield\n
a smaller string\t 123124343\t anotherfield\n
And then using:
$ column -t -s $'\t' FILE
a very long string.......... 112232432 anotherfield
a smaller string 123124343 anotherfield

It's easier than you wonder.
If you are working with a separated-by-semicolon file and header too:
$ (head -n1 file.csv && sort file.csv | grep -v <header>) | column -s";" -t
If you are working with an array (using tab as separator):
for((i=0;i<array_size;i++));
do
echo stringarray[$i] $'\t' numberarray[$i] $'\t' anotherfieldarray[$i] >> tmp_file.csv
done;
cat file.csv | column -t

awk solution that deals with stdin
Since column is not POSIX, maybe this is:
mycolumn() (
file="${1:--}"
if [ "$file" = - ]; then
file="$(mktemp)"
cat > "${file}"
fi
awk '
FNR == 1 { if (NR == FNR) next }
NR == FNR {
for (i = 1; i <= NF; i++) {
l = length($i)
if (w[i] < l)
w[i] = l
}
next
}
{
for (i = 1; i <= NF; i++)
printf "%*s", w[i] + (i > 1 ? 1 : 0), $i
print ""
}
' "$file" "$file"
if [ "$1" = - ]; then
rm "$file"
fi
)
Test:
printf '12 1234 1
12345678 1 123
1234 123456 123456
' > file
Test commands:
mycolumn file
mycolumn <file
mycolumn - <file
Output for all:
12 1234 1
12345678 1 123
1234 123456 123456
See also:
Using awk to align columns in text file?
AWK: go through the file twice, doing different tasks

I am not sure where you were running this, but the code you posted would not produce the output you gave, at least not in the Bash version that I'm familiar with.
Try this instead:
stringarray=('test' 'some thing' 'very long long long string' 'blah')
numberarray=(1 22 7777 8888888888)
anotherfieldarray=('other' 'mixed' 456 'data')
array_size=4
for((i=0;i<array_size;i++))
do
echo ${stringarray[$i]} $'\x1d' ${numberarray[$i]} $'\x1d' ${anotherfieldarray[$i]}
done | column -t -s$'\x1d'
Note that I'm using the group separator character (0x1D) instead of tab, because if you are getting these arrays from a file, they might contain tabs.

Just in case someone wants to do that in PHP, I posted a gist on GitHub:
https://gist.github.com/redestructa/2a7691e7f3ae69ec5161220c99e2d1b3
Simply call:
$output = $tablePrinter->printLinesIntoArray($items, ['title', 'chilProp2']);
You may need to adapt the code if you are using a PHP version older than 7.2.
After that, call echo or writeLine depending on your environment.

The below code has been tested and does exactly what is requested in the original question.
Parameters:
%30s Column of 30 char and text right align.
%10d integer notation, %10s will also work. \
stringarray[0]="a very long string.........."
# 28Char (max length for this column)
numberarray[0]=1122324333
# 10digits (max length for this column)
anotherfield[0]="anotherfield"
# 12Char (max length for this column)
stringarray[1]="a smaller string....."
numberarray[1]=123124343
anotherfield[1]="anotherfield"
printf "%30s %10d %13s" "${stringarray[0]}" ${numberarray[0]} "${anotherfield[0]}"
printf "\n"
printf "%30s %10d %13s" "${stringarray[1]}" ${numberarray[1]} "${anotherfield[1]}"
# a var string with spaces has to be quoted
printf "\n Next line will fail \n"
printf "%30s %10d %13s" ${stringarray[0]} ${numberarray[0]} "${anotherfield[0]}"
a very long string.......... 1122324333 anotherfield
a smaller string..... 123124343 anotherfield

column -t skips empty fields when a line starts with a delimiter character or when there are two or more consecutive delimiter characters:
$ printf %s\\n a,b,c a,,c ,b,c|column -s, -t
a b c
a c
b c
Therefore I use this awk function instead (it requires gawk because it uses arrays of arrays):
$ tab(){ awk '{if(NF>m)m=NF;for(i=1;i<=NF;i++){a[NR][i]=$i;l=length($i);if(l>b[i])b[i]=l}}END{for(h in a){for(i=1;i<=m;i++)printf("%-"(b[i]+n)"s",a[h][i]);print""}}' n="${2-1}" "${1+FS=$1}"|sed 's/ *$//';}
$ printf %s\\n a,b,c a,,c ,b,c|tab ,
a b c
a c
b c

if you data doesn't contain the equal sign ("=") anywhere in it, you can use that as a shell-friendly delimiter for column without having to escape anything -
by modifying FS to be either a tab ("\t") plus any amount of spaces (" ") or tabs ("\t") on either side of it, or a contiguous chunk of 2 or more spaces, it also allows the input data to have any amount of single space within each field
echo "${inputdata2}" |
mawk NF=NF OFS== FS=' + |[ \t]*\t[ \t]*' |
column -s= -t
a very long string.......... 112232432 anotherfield
a smaller string 123124343 anotherfield
if the data does contain the equal sign, use a combo sep that's close to impossible to exist in typical data :
gawk -e NF=NF OFS='\301\372\5' FS=' + |[ \t]*\t[ \t]*' |
LC_ALL=C column -s$'\301\372\5' -t
a very long string.......... 112232432 anotherfield
a smaller string 123124343 anotherfield
and if ur data only has 2 columns, and you have ballpark sense of how wide the first field is, you can use this \r trick for nice on-screen formatting (but those don't become runs of spaces if u need to send it down the pipe) :
# each \t is 8-spaces at console terminal
mawk NF=2 FS=' + |[ \t]*\t[ \t]*' OFS='\r\t\t\t\t'
a very long string.......... 112232432
a smaller string 123124343

Related

Accept filename as argument and calculate repeated words along with count

I need to find the number or repeated characters from a text file and need to pass filename as argument.
Example:
test.txt data contains
Zoom
Output should be like:
z 1
o 2
m 1
I need a command that will accept filename as argument and then lists the number of characters from that file. In my example I have a test.txt which has zoom word. So the output will be like how many times each letter has repeated.
My attempt:
vi test.sh
#!/bin/bash
FILE="$1" --to pass filename as argument
sort file1.txt | uniq -c --to count the number of letters
Just a guess?
cat test.txt |
tr '[:upper:]' '[:lower:]' |
fold -w 1 |
sort |
uniq -c |
awk '{print $2, $1}'
m 1
o 2
z 1
Suggesting awk script that count all kinds of chars:
awk '
BEGIN{FS = ""} # make each char a field
{
for (i = 1; i <= NF; i++) { # iteratre over all fields in line
++charsArr[$i]; # count each field occourance in array
}
}
END {
for (char in charsArr) { # iterrate over chars array
printf("%3d %s\n", charsArr[char], char); # cournt char-occourances and the char
}
}' |sort -n
Or in one line:
awk '{for(i=1;i<=NF;i++)++arr[$i]}END{for(char in arr)printf("%3d %s\n",arr[char],char)}' FS="" input.1.txt|sort -n
#!/bin/bash
#get the argument for further processing
inputfile="$1"
#check if file exists
if [ -f $inputfile ]
then
#convert file to a usable format
#convert all characters to lowercase
#put each character on a new line
#output to temporary file
cat $inputfile | tr '[:upper:]' '[:lower:]' | sed -e 's/\(.\)/\1\n/g' > tmp.txt
#loop over every character from a-z
for char in {a..z}
do
#count how many times a character occurs
count=$(grep -c "$char" tmp.txt)
#print if count > 0
if [ "$count" -gt "0" ]
then
echo -e "$char" "$count"
fi
done
rm tmp.txt
else
echo "file not found!"
exit 1
fi

Hex to decimal conversion in bash without using gawk

Input:
cat test1.out
12 , maze|style=0x48570006, column area #=0x7, location=0x80000d
13 , maze|style=0x48570005, column area #=0x7, location=0x80aa0d
....
...
..
.
Output needed:
12 , maze|style=0x48570006, column area #=0x7, location=8388621 <<<8388621 is decimal of 0x80000d
....
I want to convert just the last column to decimal.
I cannot use gawk as it is not available in our company machines everywhere.
Tried using awk --non-decimal-data but it didnt work also.
Wondering if just printf command can work on flipping the last word from hex to decimal.
Any other ideas that you can suggest?
There's no need for awk or any other external commands here: bash's native math operation handle hexadecimal values correctly when in an arithmetic context (this is why echo $((0xff)) emits 255).
#!/usr/bin/env bash
# ^^^^- must be really bash, not /bin/sh
location_re='location=(0x[[:xdigit:]]+)([[:space:]]|$)'
while read -r line; do
if [[ $line =~ $location_re ]]; then
hex=${BASH_REMATCH[1]}
dec=$(( $hex ))
printf '%s\n' "${line/location=$hex/location=$dec}"
else
printf '%s\n' "$line"
fi
done
You can see this running at https://ideone.com/uN7qNY
Considering the case strtonum() function is not available, how about:
#!/bin/bash
awk -F'location=0x' '
function hex2dec(str,
i, x, c, tab) {
for (i = 0; i <= 15; i++) {
tab[substr("0123456789ABCDEF", i + 1, 1)] = i;
}
x = 0
for (i = 1; i <= length(str); i++) {
c = toupper(substr(str, i, 1))
x = x * 16 + tab[c]
}
return x
}
{
print $1 "location=" hex2dec($2)
}
' test1.out
where hex2dec() is a homemade substituion of strtonum().
Wait, can't you just use printf in other awks? It won't work with gawk but it does with other awks, right? For example with mawk:
$ mawk 'BEGIN{FS=OFS="="}{$NF=sprintf("%d", $NF);print}' file
12 , maze|style=0x48570006, column area #=0x7, location=8388621
13 , maze|style=0x48570005, column area #=0x7, location=8432141
I tested with mawk, awk-20070501, awk-20121220 and Busybox awk.
Discarded after edit but left for comments' sake:
Using rev and cut to extract around the last = and printf for hex2dec conversion:
$ while IFS='' read -r line || [[ -n "$line" ]]
do
printf "%s=%d\n" "$(echo "$line" | rev | cut -d = -f 2- | rev)" \
$(echo "$line" | rev | cut -d = -f 1 | rev)
done < file
Output:
12 , maze|style=0x48570006, column area #=0x7, location=8388621
13 , maze|style=0x48570005, column area #=0x7, location=8432141
If you have Perl installed, not having Gawk is rather inconsequential.
perl -pe 's/location=\K0x([0-9a-f]+)/ hex($1) /e' file
This might work for you (GNU sed and Bash):
sed 's/\(.*location=\)\(0x[0-9a-f]\+\)/echo "\1$((\2))"/Ie' file
Use pattern matching and back references to split each line and then evaluate an echo command.
Alternative:
sed 's/\(.*location=\)\(0x[0-9a-f]\+\)/echo "\1$((\2))"/I' file | sh
BASH_REMATCH array info :
http://molk.ch/tips/gnu/bash/rematch.html
quintessential principe :
[[ string =~ regexp ]]
[[ "abcdef" =~ (b)(.)(d)e ]]
If the 'string' matches 'regexp',
.. the matched part of the string is stored in the BASH_REMATCH array.
# Now:
# BASH_REMATCH[0]=bcde # as the total match
# BASH_REMATCH[1]=b # as the 1'th captured group
# BASH_REMATCH[2]=c # as ...
# BASH_REMATCH[3]=d
enjoy !
Bash's native math operation handles hexadecimal values correctly anytime.
Example:
echo $(( 0xff))
255
printf '%d' 0xf0
240

extract a numeric substring and add value to it

I have a string like 1001.2001.3001.5001.6001 or 1001-2001-3001-5001-6001. How to extract the 4th string i.e., 5001, add a value like 121 to it and put it back in the same string. The output should be like 1001.2001.3001.5122.6001 or 1001-2001-3001-5122-6001. I have to achieve this in Linux bash scripting.
Try this
#!/bin/bash
str=$1
if [[ $(echo $str | grep '\.' | wc -l) == 1 ]]
then
str1=$(echo $str | cut -d '.' -f 1,2,3)
str2=$(echo $str | cut -d '.' -f 4 | awk {'print $1+121'})
str3=$(echo $str | cut -d '.' -f 5)
echo $str1.$str2.$str3
elif [[ $(echo $str | grep - | wc -l) == 1 ]]
then
str1=$(echo $str | cut -d '-' -f 1,2,3)
str2=$(echo $str | cut -d '-' -f 4 | awk {'print $1+121'})
str3=$(echo $str | cut -d '-' -f 5)
echo $str1-$str2-$str3
else
echo "do nothing"
fi
Pass a string as parameter
No pipes, no forks, no cutting, no awking, just plain POSIX shell:
$ s=1001.2001.3001.5001.6001
$ oldIFS=$IFS
$ IFS=.-
$ set -- $s
$ case $s in
> (*.*) echo "$1.$2.$3.$(($4 + 121)).$5";;
> (*-*) echo "$1-$2-$3-$(($4 + 121))-$5";;
> esac
1001.2001.3001.5122.6001
$ IFS=$oldIFS
One liner
value=121 ; str='1001.2001.3001.5001.6001' ; token="$(echo "$str" | cut -f 4 -d '.')" ; newtoken=$(( $token + $value )) ; newstr="$(echo "$str" | sed -e "s/$token/$newtoken/g" | tr '.' '-')" ; echo "$newstr"
Breakdown:
value=121 # <- Increment
str='1001.2001.3001.5001.6001' # <- Initial String
token="$(echo "$str" | cut -f 4 -d '.')" # <- Extract the 4th field with . sep
newtoken=$(( $token + $value )) # <- Add value and save to $newtoken
newstr="$(echo "$str" \
| sed -e "s/$token/$newtoken/g" \
| tr '.' '-')" # <- Replace 4th field with $newtoken
# and translate "." to "-"
echo "$newstr" # <- Echo new string
Works in:
Bash
sh
FreeBSD
Busybox
Using out of the box tools
If the field separator can either be . or -, then do something like
echo "1001.2001.3001.5001.6001" | awk 'BEGIN{FS="[.-]";OFS="-"}{$4+=121}1'
1001-2001-3001-5122-6001
However, if you need to match the regex FS or field separator with OFS then you need to have gawk installed
echo "1001.2001.3001.5001.6001" |
gawk 'BEGIN{FS="[.-]"}{split($0,a,FS,seps)}{$4+=121;OFS=seps[1]}1'
1001.2001.3001.5122.6001
Though resetting the argument list with the values is probably the preferred way, or by setting IFS to the delimiter and reading the values into an array and adding the desired value to the array index at issue, you can also do it with a simple loop to look for the delimiters and continually skipping characters until the desired segment is found (4 in you case -- when the delimiter count is 3). Then simply appending the digit at each array index until your next delimiter is found will give you the base value. Simply adding your desired 121 to the completed number completes the script, e.g.
#!/bin/bash
str=${1:-"1001.2001.3001.5001.6001"} ## string
ele=${2:-4} ## element to add value to [1, 2, 3, ...]
add=${3:-121} ## value to add to element
cnt=0 ## flag to track delimiters found
num=
## for each character in str
for ((i = 0; i < ${#str}; i++))
do
if [ "${str:$i:1}" = '.' -o "${str:$i:1}" = '-' ] ## is it '.' or '-'
then
(( cnt++ )) ## increment count
(( cnt == ele )) && break ## if equal to ele, break
## check each char is a valid digit 0-9
elif [ "0" -le "${str:$i:1}" -a "${str:$4i:1}" -le "9" ]
then
(( cnt == (ele - 1) )) || continue ## it not one of interest, continue
num="$num${str:$i:1}" ## append digit to num
fi
done
((num += add)) ## add the amount to num
printf "num: %d\n" $num ## print results
Example Use/Output
$ bash parsenum.sh
num: 5122
$ bash parsenum.sh "1001.2001.3001.5001.6001" 2
num: 2122
$ bash parsenum.sh "1001.2001.3001.5001.6001" 2 221
num: 2222
Look things over and let me know if you have any questions.

Cut column by column name in bash

I want to specify a column by name (i.e. 102), find the position of this column and then use something like cut -5,7- with the found position to delete the specified column.
This is my file header (delim = "\t"):
#CHROM POS 1 100 101 102 103 107 108
This awk should work:
awk -F'\t' -v c="102" 'NR==1{for (i=1; i<=NF; i++) if ($i==c){p=i; break}; next} {print $p}' file
Here's one possible solution without the restriction that only one column is to be removed. It is written as a bash function, where the first argument is the filename, and the remaining arguments are the columns to exclude.
rmcol() {
local file=$1
shift
cut -f$(head -n1 "$file" | tr \\t \\n | grep -vFxn "${#/#/-e}" |
cut -d: -f1 | paste -sd,) "$file"
}
If you want to select rather than exclude the named columns, then change -vFxn to -Fxn.
That almost certainly requires some sort of explanation. The first two lines of the function just removes the filename from the arguments and stores it for later use. The cut command will then select the appropriate columns; the column numbers are computed with the complicated pipeline which follows:
head -n1 "$file" | # Take the first line of the file
tr \\t \\n | # Change all the tabs to newlines [ Note 1]
grep # Select all lines (i.e. column names) which
-v # don't match
F # the literal string
x # which is the complete line
n # and include the line number in the output
"${#/#/-e}" | # Put -e at the beginning of each command line argument,
# converting the arguments into grep pattern arguments (-e)
cut -d: -f1 | # Select only the line number from that matches
paste -sd, # Paste together all the line numbers, separated with commas.
Using a for loop in bash:
C=1; for i in $(head file -n 1) ; do if [ $i == "102" ] ; then break ; else C=$(( $C + 1 )) ; fi ; done ; echo $C
And a full script
C=1
for i in $(head in_file -n 1) ; do
echo $i
if [ $i == "102" ] ; then
break ;
else
echo $C
C=$(( $C + 1 ))
fi
done
cut -f1-$(($C-1)),$(($C+1))- in_file
trying a solution without looping through columns, I get:
#!/bin/bash
pick="$1"
titles="pos 1 100 102 105"
tmp=" $titles "
tmp="${tmp%% $pick* }"
tmp=($tmp)
echo "column ${#tmp[#]}"
It suffers from incorrectly reporting last column if column name can't be found.
Try this small awk utility to cut specific headers - https://github.com/rohitprajapati/toyeca-cutter
Example usage -
awk -f toyeca-cutter.awk -v c="col1, col2, col3, col4" my_file.csv

How to combine columns that have the same headers within 1 file using Awk or Bash

I would like to know how to combine columns with duplicate headers in a file using bash/sed/awk.
x y x y
s1 3 4 6 10
s2 3 9 10 7
s3 7 1 3 2
to :
x y
s1 9 14
s2 13 16
s3 10 3
$ cat file
x y x y
s1 3 4 6 10
s2 3 9 10 7
s3 7 1 3 2
$ cat tst.awk
NR==1 {
for (i=1;i<=NF;i++) {
flds[$i] = flds[$i] " " i+1
}
printf "%-3s",""
for (hdr in flds) {
printf "%3s",hdr
}
print ""
next
}
{
printf "%-3s",$1
for (hdr in flds) {
n = split(flds[hdr],fldNrs)
sum = 0
for (i=1; i<=n; i++) {
sum += $(fldNrs[i])
}
printf "%3d",sum
}
print ""
}
$ awk -f tst.awk file
x y
s1 9 14
s2 13 16
s3 10 3
$ time awk -f ./tst.awk file
x y
s1 9 14
s2 13 16
s3 10 3
real 0m0.265s
user 0m0.030s
sys 0m0.108s
Adjust the printf lines in the obvious ways for different output formatting if you like.
Here's the bash equivalent in response to the comments elsethread. Do NOT use this, the awk solution is the right one, this is just to show how you should write it in bash IF you wanted to do that for some inexplicable reason:
$ cat tst.sh
declare -A flds
while IFS= read -r rec
do
lineNr=$(( lineNr + 1 ))
set -- $rec
if (( lineNr == 1 ))
then
fldNr=1
for fld
do
fldNr=$(( fldNr + 1 ))
flds[$fld]+=" $fldNr"
done
printf "%-3s" ""
for hdr in "${!flds[#]}"
do
printf "%3s" "$hdr"
done
printf "\n"
else
printf "%-3s" "$1"
for hdr in "${!flds[#]}"
do
fldNrs=( ${flds[$hdr]} )
sum=0
for fldNr in "${fldNrs[#]}"
do
eval val="\$$fldNr"
sum=$(( sum + val ))
done
printf "%3d" "$sum"
done
printf "\n"
fi
done < "$1"
$
$ time ./tst.sh file
x y
s1 9 14
s2 13 16
s3 10 3
real 0m0.062s
user 0m0.031s
sys 0m0.046s
Note that it runs in roughly the same order of magnitude duration as the awk script (see comments elsethread). Caveat - I never write bash scripts for processing text files so I'm not claiming the above bash script is perfect, just an example of how to approach it in bash for comparison with the other script in this thread that I claimed should be rewritten!
This not a one line. You can do it using Bash v4, Bash's dictonaries, and some shell tools.
Execute the script below with the name of the file to process a parameter
bash script_below.sh your_file
Here is the script:
declare -A coltofield
headerdone=0
# Take the first line of the input file and extract all fields
# and their position. Start with position value 2 because of the
# format of the following lines
while read line; do
colnum=$(echo $line | cut -d "=" -f 1)
field=$(echo $line | cut -d "=" -f 2)
coltofield[$colnum]=$field
done < <(head -n 1 $1 | sed -e 's/^[[:space:]]*//;' -e 's/[[:space:]]*$//;' -e 's/[[:space:]]\+/\n/g;' | nl -v 2 -n ln | sed -e 's/[[:space:]]\+/=/g;')
# Read the rest of the file starting with the second line
while read line; do
declare -A computation
declare varname
# Turn the line in key value pair. The key is the position of
# the value in the line
while read value; do
vcolnum=$(echo $value | cut -d "=" -f 1)
vvalue=$(echo $value | cut -d "=" -f 2)
# The first value is the line variable name
# (s1, s2)
if [[ $vcolnum == "1" ]]; then
varname=$vvalue
continue
fi
# Get the name of the field by the column
# position
field=${coltofield[$vcolnum]}
# Add the value to the current sum for this field
computation[$field]=$((computation[$field]+${vvalue}))
done < <(echo $line | sed -e 's/^[[:space:]]*//;' -e 's/[[:space:]]*$//;' -e 's/[[:space:]]\+/\n/g;' | nl -n ln | sed -e 's/[[:space:]]\+/=/g;')
if [[ $headerdone == "0" ]]; then
echo -e -n "\t"
for key in ${!computation[#]}; do echo -n -e "$key\t" ; done; echo
headerdone=1
fi
echo -n -e "$varname\t"
for value in ${computation[#]}; do echo -n -e "$value\t"; done; echo
computation=()
done < <(tail -n +2 $1)
Yet another AWK alternative:
$ cat f
x y x y
s1 3 4 6 10
s2 3 9 10 7
s3 7 1 3 2
$ cat f.awk
BEGIN {
OFS="\t";
}
NR==1 {
#need header for 1st column
for(f=NF; f>=1; --f)
$(f+1) = $f;
$1="";
for(f=1; f<=NF; ++f)
fld2hdr[f]=$f;
}
{
for(f=1; f<=NF; ++f)
if($f ~ /^[0-9]/)
colValues[fld2hdr[f]]+=$f;
else
colValues[fld2hdr[f]]=$f;
for (i in colValues)
row = row colValues[i] OFS;
print row;
split("", colValues);
row=""
}
$ awk -f f.awk f
x y
s1 9 14
s2 13 16
s3 10 3
$ awk 'BEGIN{print " x y"} a=$2+$4, b=$3+$5 {print $1, a, b}' file
x y
s1 9 14
s2 13 16
s3 10 3
No doubt there is a better way to display the heading but my awk is a little sketchy.
Here's a Perl solution, just for fun:
cat table.txt | perl -e'#h=grep{$_}split/\s+/,<>;while(#l=grep{$_}split/\s+/,<>){for$i(1..$#l){$t{$l[0]}{$h[$i-1]}+=$l[$i]}};printf " %s\n",(join" ",sort keys%{$t{(keys%t)[0]}});for$h(sort keys%t){printf"$h %s\n",(join " ",map{sprintf"%2d",$_}#{$t{$h}}{sort keys%{$t{$h}}})};'

Resources