I am trying to substract the variable $i from the $end_date variable - any advice?
#!/bin/bash
COUNT="5"
declare -a arrWANTEDBACKUPS;
for ((i = 0 ; i < $COUNT ; i++)); do
WANTEDBACKUPNAME=`date '+%Y%m%d_%H00' -d "$end_date-$i hours"`;
arrWANTEDBACKUPS=(${arrWANTEDBACKUPS[#]} "$WANTEDBACKUPNAME");
echo "$arrWANTEDBACKUPS[$i]";
echo "Test";
done
To do math with bash you need double round brackets, like in this example:
echo "$(($end_date-$i)) hours"
4 hours
So this works for me:
date '+%Y%m%d_%H00' -d "$(($end_date-$i)) hours"
20200901_1900
#Léa Gris & #TheSlater
Thanks - got it!
Here my solution:
#!/bin/bash
COUNT="5"
declare -a arrWANTEDBACKUPS;
for ((i = 0 ; i < COUNT ; i++)); do
WANTEDBACKUPNAME=$(date '+%Y%m%d_%H00' -d "-$i hours");
arrWANTEDBACKUPS=(${arrWANTEDBACKUPS[#]} "$WANTEDBACKUPNAME");
echo "${arrWANTEDBACKUPS[$i]}";
echo "$WANTEDBACKUPNAME";
echo "Test";
done
Related
I have an array from with numbers from 1 to 100:
array=$(seq 100)
My task is to exclude range from 60 to 80.
You can use parameter expansion with the offset/length specification.
#! /bin/bash
arr=({1..100})
exclude_from=60
exclude_to=80
echo "${arr[#]:0:exclude_from-1}" "${arr[#]:exclude_to}"
An arithmetic test condition
for n in {1..100}; do
(( 60 <= n && n <= 80 )) && continue
echo $n
done
However, to remove those elements from an array
ary=({1..100})
# note that number 1 is stored in _index_ 0
for ((n=60; n <= 80; n++)); do
unset "ary[$((n-1))]"
done
declare -p ary
outputs
declare -a ary=([0]="1" [1]="2" [2]="3" [3]="4" [4]="5" [5]="6" [6]="7" [7]="8" [8]="9" [9]="10" [10]="11" [11]="12" [12]="13" [13]="14" [14]="15" [15]="16" [16]="17" [17]="18" [18]="19" [19]="20" [20]="21" [21]="22" [22]="23" [23]="24" [24]="25" [25]="26" [26]="27" [27]="28" [28]="29" [29]="30" [30]="31" [31]="32" [32]="33" [33]="34" [34]="35" [35]="36" [36]="37" [37]="38" [38]="39" [39]="40" [40]="41" [41]="42" [42]="43" [43]="44" [44]="45" [45]="46" [46]="47" [47]="48" [48]="49" [49]="50" [50]="51" [51]="52" [52]="53" [53]="54" [54]="55" [55]="56" [56]="57" [57]="58" [58]="59" [80]="81" [81]="82" [82]="83" [83]="84" [84]="85" [85]="86" [86]="87" [87]="88" [88]="89" [89]="90" [90]="91" [91]="92" [92]="93" [93]="94" [94]="95" [95]="96" [96]="97" [97]="98" [98]="99" [99]="100")
# ... note ......................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................^^^^^^^^^^^^^^^^^^^
And
for n in "${ary[#]}"; do echo $n; done
# or more concisely
printf '%d\n' "${ary[#]}"
excludes 60-80
Something like this maybe?
#!/usr/bin/env bash
arr=({1..100})
exclude=({60..80})
exclude_pattern=$(IFS='|'; printf '%s' "#(${exclude[*]})")
for i in "${arr[#]}"; do
[[ $i == $exclude_pattern ]] && continue
printf '%d\n' "$i"
done
Or create a temp array to build up the elements.
#!/usr/bin/env bash
arr=({1..100})
exclude=({60..80})
exclude_pattern=$(IFS='|'; printf '%s' "#(${exclude[*]})")
for i in "${arr[#]}"; do
[[ $i == $exclude_pattern ]] && continue
included_arr+=("$i")
done
arr=("${included_arr[#]}")
declare -p arr
I have a config file:
map_a 1234,3788,9940
map_b 9948,8901
map_c
map_d 7789,30400
map_e 499423
map_f
The array variable should content:
Name Attribute 1 Attribute 2 Attribute 3 Attribute ...
---------------------------------------------------------------------------
map_a 1234 3788 9940
map_b 9948 8901
map_c
map_d 7789 30400
map_e 499423
map_f
...
So:
foo[0,0] = map_a
foo[0,1] = 1234
foo[3,2] = 30400
...
How can I achieve this with bash? Or are there recommendations for change the delimiters of the .cfg file? As it's flexible from scratch
Regards
Joe C.
Bash arrays are one-dimensional, but associate arrays may allow you to represent something that looks like 2D array (each key will be the indices combined with ','). Without changing the structure:
#! /bin/bash
declare -A foo
n=0
maxcol=0
IFS=" ,"
while read k v ; do
foo[$n,0]=$k
IFS=', ' read -r -a vv <<< "$v"
i=1
for v1 in ${vv[*]} ; do
foo[$n,$i]=$v1
[ $i -gt $maxcol ] && maxcol=$i
let i=i+1
done
let n=n+1
done < config.txt
for ((i=0; i<n ; i++)) do
for ((j=0 ; j<maxcol ; j++)) do
echo "foo($i,$j)=${foo[$i,$j]}"
done
echo
done
Alternative will be to use the attribute name as the first key, so that it will be possible to query ${foo[map_a,2}} - 2nd attribute of map_a
#! /bin/bash
declare -A foo
n=0
maxcol=0
maxcol=0
IFS=" ,"
while read k v ; do
foo[$n,0]=$k
IFS=', ' read -r -a vv <<< "$v"
i=1
for v1 in ${vv[*]} ; do
# Use key as first index
foo[$k,$i]=$v1
[ $i -gt $maxcol ] && maxcol=$i
let i=i+1
done
let n=n+1
done < config.txt
for i in map_a map_b map_c map_d map_e map_f ; do
for ((j=0 ; j<maxcol ; j++)) do
echo "foo($i,$j)=${foo[$i,$j]}"
done
echo
done
Alright, I've also put maxcol into an array:
declare -A foo
declare -A maxcol
n=0
IFS=" ,"
while read k v
do
foo[$n,0]=$k
IFS=', ' read -r -a vv <<< "$v"
i=1
for v1 in ${vv[*]}
do
foo[$n,$i]=$v1
let i=i+1
done
maxcol[$n]=$i
let n=n+1
done < config.txt
for (( i=0; i<n; i++ ))
do
for (( j=0 ; j<maxcol[$i] ; j++ ))
do
echo "foo($i,$j)=${foo[$i,$j]}"
done
echo "maxcol($i)=${maxcol[$i]}"
echo
done
I think this will fit my needs now perfectly.
I am trying to create 1000s of large CSVs rapidly. This function generates the CSVs:
function csvGenerator () {
for ((i=1; i<=$NUMCSVS; i++)); do
CSVNAME=$DIRNAME"-"$CSVPREFIX$i$CSVEXT
HEADERARRAY=()
if [[ ! -e $CSVNAME ]]; then #Only create csv file if it not exist
touch $CSVNAME
echo "file: "$CSVNAME "created at $(date)" >> ../status.txt
fi
for ((j=1; j<=$NUMCOLS; j++)); do
if (( j < $NUMCOLS )) ; then
HEADERNAME=$DIRNAME"-csv-"$i"-header-"$j", "
elif (( j == $NUMCOLS )) ; then
HEADERNAME=$DIRNAME"-csv-"$i"-header-"$j
fi
HEADERARRAY+=$HEADERNAME
done
echo $HEADERARRAY > $CSVNAME
for ((k=1; k<=$NUMROWS; k++)); do
ROWARRAY=()
for ((l=1; l<=$NUMCOLS; l++)); do
if (( l < $NUMCOLS )) ; then
ROWVALUE=$DIRNAME"-csv-"$i"-r"$k"c"$l", "
elif (( l == $NUMCOLS )) ; then
ROWVALUE=$DIRNAME"-csv-"$i"-r"$k"c"$l
fi
ROWARRAY+=$ROWVALUE
done
echo $ROWARRAY >> $CSVNAME
done
done
}
The script takes ~3 mins to generate a CSV with 100k rows and 70 cols. What do I need to do to generate these CSVs at the rate of 1 CSV/~10 seconds?
Let me start by saying that bash and "performant" don't usually go together in the same sentence. As other commentators suggested, awk may be a good choice that's adjacent in some senses.
I haven't yet had a chance to run your code, but it opens and closes the output file once per row — in this example, 100,000 times. Each time it must seek to the end of the file so that it can append the latest row.
Try pulling the actual generation (everything after for ((j=1; j<=$NUMCOLS; j++)); do) into a new function, like generateCsvContents. In that new function, don't reference $CSVNAME, and remove the redirections on the echo statements. Then, in the original function, call the new function and redirect its output to the filename. Roughly:
function csvGenerator () {
for ((i=1; i<=NUMCSVS; i++)); do
CSVNAME=$DIRNAME"-"$CSVPREFIX$i$CSVEXT
if [[ ! -e $CSVNAME ]]; then #Only create csv file if it not exist
echo "file: $CSVNAME created at $(date)" >> ../status.txt
fi
# This will create $CSVNAME if it doesn't yet exist
generateCsvContents > "$CSVNAME"
done
}
function generateCsvContents() {
HEADERARRAY=()
for ((j=1; j<=NUMCOLS; j++)); do
if (( j < NUMCOLS )) ; then
HEADERNAME=$DIRNAME"-csv-"$i"-header-"$j", "
elif (( j == NUMCOLS )) ; then
HEADERNAME=$DIRNAME"-csv-"$i"-header-"$j
fi
HEADERARRAY+=$HEADERNAME
done
echo $HEADERARRAY
for ((k=1; k<=NUMROWS; k++)); do
ROWARRAY=()
for ((l=1; l<=NUMCOLS; l++)); do
if (( l < NUMCOLS )) ; then
ROWVALUE=$DIRNAME"-csv-"$i"-r"$k"c"$l", "
elif (( l == NUMCOLS )) ; then
ROWVALUE=$DIRNAME"-csv-"$i"-r"$k"c"$l
fi
ROWARRAY+=$ROWVALUE
done
echo "$ROWARRAY"
done
}
"Not this way" is I think the answer.
There are a few problems here.
You're not using your arrays as arrays. When you treat them like strings, you affect only the first element in the array, which is misleading.
The way you're using >> causes the output file to be opened and closed once for every line. That's potentially wasteful.
You're not quoting your variables. In fact, you're quoting the stuff that doesn't need quoting, and not quoting the stuff that does.
Upper case variable names are not recommended, due to the risk of collision with system variables. ref
Bash isn't good at this. Really.
A cleaned up version of your function might look like this:
csvGenerator2() {
for (( i=1; i<=NUMCSVS; i++ )); do
CSVNAME="$DIRNAME-$CSVPREFIX$i$CSVEXT"
# Only create csv file if it not exist
[[ -e "$CSVNAME" ]] && continue
touch "$CSVNAME"
date "+[%F %T] created: $CSVNAME" | tee -a status.txt >&2
HEADER=""
for (( j=1; j<=NUMCOLS; j++ )); do
printf -v HEADER '%s, %s-csv-%s-header-%s' "$HEADER" "$DIRNAME" "$i" "$j"
done
echo "${HEADER#, }" > "$CSVNAME"
for (( k=1; k<=NUMROWS; k++ )); do
ROW=""
for (( l=1; l<=NUMCOLS; l++ )); do
printf -v ROW '%s, %s-csv-%s-r%sc%s' "$ROW" "$DIRNAME" "$i" "$k" "$l"
done
echo "${ROW#, }"
done >> "$CSVNAME"
done
}
(Note that I haven't switched the variables to lower case because I'm lazy, but it's still a good idea.)
And if you were to make something functionally equivalent in awk:
csvGenerator3() {
awk -v NUMCSVS="$NUMCSVS" -v NUMCOLS="$NUMCOLS" -v NUMROWS="$NUMROWS" -v DIRNAME="$DIRNAME" -v CSVPREFIX="$CSVPREFIX" -v CSVEXT="$CSVEXT" '
BEGIN {
for ( i=1; i<=NUMCSVS; i++) {
out=sprintf("%s-%s%s%s", DIRNAME, CSVPREFIX, i, CSVEXT)
if (!system("test -e " CSVNAME)) continue
system("date '\''+[%F %T] created: " out "'\'' | tee -a status.txt >&2")
comma=""
for ( j=1; j<=NUMCOLS; j++ ) {
printf "%s%s-csv-%s-header-%s", comma, DIRNAME, i, j > out
comma=", "
}
printf "\n" >> out
for ( k=1; k<=NUMROWS; k++ ) {
comma=""
for ( l=1; l<=NUMCOLS; l++ ) {
printf "%s%s-csv-%s-r%sc%s", comma, DIRNAME, i, k, l >> out
comma=", "
}
printf "\n" >> out
}
}
}
'
}
Note that awk does not suffer from the same open/closer overhead mentioned earlier with bash; when a file is used for output or as a pipe, it gets opened once and is left open until it is closed.
Comparing the two really highlights the choice you need to make:
$ time bash -c '. file; NUMCSVS=1 NUMCOLS=10 NUMROWS=100000 DIRNAME=2 CSVPREFIX=x CSVEXT=.csv csvGenerator2'
[2019-03-29 23:57:26] created: 2-x1.csv
real 0m30.260s
user 0m28.012s
sys 0m1.395s
$ time bash -c '. file; NUMCSVS=1 NUMCOLS=10 NUMROWS=100000 DIRNAME=3 CSVPREFIX=x CSVEXT=.csv csvGenerator3'
[2019-03-29 23:58:23] created: 3-x1.csv
real 0m4.994s
user 0m3.297s
sys 0m1.639s
Note that even my optimized bash version is only a little faster than your original code.
Refactoring your two inner for-loops to loops like this will save time:
for ((j=1; j<$NUMCOLS; ++j)); do
HEADERARRAY+=$DIRNAME"-csv-"$i"-header-"$j", "
done
HEADERARRAY+=$DIRNAME"-csv-"$i"-header-"$NUMCOLS
How can I speed this up? it's taking about 5 minutes to make one file...
it runs correctly, but I have a little more than 100000 files to make.
Is my implementation of awk or sed slowing it down? I could break it down into several smaller loops and run it on multiple processors but one script is much easier.
#!/bin/zsh
#1000 configs per file
alpha=( a b c d e f g h i j k l m n o p q r s t u v w x y z )
m=1000 # number of configs per file
t=1 #file number
for (( i=1; i<=4; i++ )); do
for (( j=i; j<=26; j++ )); do
input="arc"${alpha[$i]}${alpha[$j]}
n=1 #line number
#length=`sed -n ${n}p $input| awk '{printf("%d",$1)}'`
#(( length= $length + 1 ))
length=644
for ((k=1; k<=$m; k++ )); do
echo "$hmbi" >> ~/Glycine_Tinker/configs/config$t.in
echo "jobtype = energy" >> ~/Glycine_Tinker/configs/config$t.in
echo "analyze_only = false" >> ~/Glycine_Tinker/configs/config$t.in
echo "qm_path = qm_$t" >> ~/Glycine_Tinker/configs/config$t.in
echo "mm_path = aiff_$t" >> ~/Glycine_Tinker/configs/config$t.in
cat head.in >> ~/Glycine_Tinker/configs/config$t.in
water=4
echo $k
for (( l=1; l<=$length; l++ )); do
natom=`sed -n ${n}p $input| awk '{printf("%d",$1)}'`
number=`sed -n ${n}p $input| awk '{printf("%d",$6)}'`
if [[ $natom -gt 10 && $number -gt 0 ]]; then
symbol=`sed -n ${n}p $input| awk '{printf("%s",$2)}'`
x=`sed -n ${n}p $input| awk '{printf("%.10f",$3)}'`
y=`sed -n ${n}p $input| awk '{printf("%.10f",$4)}'`
z=`sed -n ${n}p $input| awk '{printf("%.10f",$5)}'`
if [[ $water -eq 4 ]]; then
echo "--" >> ~/Glycine_Tinker/configs/config$t.in
echo "0 1 0.4638" >> ~/Glycine_Tinker/configs/config$t.in
water=1
fi
echo "$symbol $x $y $z" >> ~/Glycine_Tinker/configs/config$t.in
(( water= $water + 1 ))
fi
(( n= $n + 1 ))
done
cat tail.in >> ~/Glycine_Tinker/configs/config$t.in
(( t= $t + 1 ))
done
done
done
One thing that is going to be killing you here is the sheer number of processes being created. Especially when they are doing the exact same thing.
Consider doing the sed -n ${n}p $input once per loop iteration.
Also consider doing the equivalent of awk as a shell array assignment, then accessing the individual elements.
With these two things you should be able to get the 12 or so processes (and the shell invocation via back quotes) down to a single shell invocation and the backquote.
Obviously, Ed's advice is far preferable, but if you don't want to follow that, I had a couple of thoughts...
Thought 1
Rather than run echo 5 times and cat head.in onto the Glycine file, each of which causes the file to be opened, seeked (or sought maybe) to the end, and appended, you could do that in one go like this:
# Instead of
hmbi=3
echo "$hmbi" >> ~/Glycine_thing
echo "jobtype = energy" >> ~/Glycine_thing
echo "somethingelse" >> ~/Glycine_thing
echo ... >> ~/Glycine_thing
echo ... >> ~/Glycine_thing
cat ... >> ~/Glycine_thing
# Try this
{
echo "$hmbi"
echo "jobtype = energy"
echo "somethingelse"
echo
echo
cat head.in
} >> ~/Glycine_thing
# Or, better still, this
echo -e "$hmbi\njobtype = energy\nsomethingelse" >> Glycine_thing
# Or, use a here-document, as suggested by #mklement0
cat -<<EOF >>Glycine
$hmbi
jobtype = energy
next thing
EOF
Thought 2
Rather than invoke sed and awk 5 times to find 5 parameters, just let awk do what sed was doing, and also do all 5 things in one go:
read symbol x y z < <(awk '...{printf "%.10f %.10f %.10f" $2,$3,$4}' $input)
I was trying to write a BASH loop of the form:
~/$ for i in {1..$(grep -c "match" file)} ; do echo $i ; done
{1..20}
where I was hoping it would produce counted output. So I tried this instead:
~/$ export LOOP_COUNT=$(grep -c "match" file)
~/$ for i in {1..$LOOP_COUNT} ; do echo $i ; done
{1..20}
What I fell back to using was:
~/$ for i in $(seq 1 1 $(grep -c "match" file)) ; do echo $i ; done
1
2
3
...
20
Perfect! But how can I get that behaviour without using seq?
Have you tried this?
max=$(grep -c "match" file)
for (( c=1; c <= $max; c++ ))
do
echo $c
done
According to bash documentation
A sequence expression takes the form {x..y[..incr]}, where x and y are
either integers or single characters, and incr, an optional
increment, is an integer.
You can still use eval in other cases, but Mithrandir's advice is probably faster.
eval "for i in {1..$(grep -c 'match' file)} ; do echo \$i ; done"
Here is a recursive solution:
loop () {
i=$1
n=$2
echo $i
((i < n)) && loop $((i+1)) $n
}
LOOP_COUNT=$(grep -c "Int" sum.scala)
loop 1 $LOOP_COUNT