Bash script functions overflowing into others - bash

Morning,
I'm trying to consolidate a number of smaller scripts into a single large bash script where everything is called via functions.
Most functions will function fine (i.e. script.sh update), however giving script.sh status for example will start giving errors related to the docker() function.
I've corrected all the errors I can via shellcheck and tried adding return to each function but it's still pulling incorrect functions.
Here is the script in full:
#!/bin/bash
# variables and arguments
main() {
export XZ_OPT=-e9
distro=$(awk -F'"' '/^NAME/ {print $2}' /etc/os-release)
username=$(grep home /etc/passwd | sed 1q | cut -f1 -d:)
directory_home="/home/$username"
directory_script="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
rclone_command="rclone --config=$directory_script/rclone.conf"
docker_restart=("flexget" "cbreader" "syncthing")
args "$#"
}
args() {
action=$1
case "$action" in
archive) archive ;;
borg) borg ;;
docker) docker ;;
logger) logger ;;
magnet) magnet ;;
payslip) payslip ;;
permissions) permissions ;;
rclone) rclone_mount ;;
sshfs) sshfs_mount ;;
status) status ;;
sync) sync ;;
update) update ;;
*) echo "$0" && available_options ;;
esac
}
# functions
function available_options() {
sed -n '/^\tcase/,/\tesac$/p' "$0" | cut -f1 -d")" | sed '1d;$d' | sort | tr -d "*" | xargs
return
}
function plural() {
if (("$1">1))
then
echo s
fi
return
}
function dir_find() {
find "$directory_home" -maxdepth 3 -mount -type d -name "$1"
return
}
function domain_find() {
file_config_traefik="$(dir_find config)/traefik/traefik.toml"
awk -F'"' '/domain/ {print $2}' "$file_config_traefik"
return
}
function git_config() {
git config --global user.email "$username#$(domain_find)"
git config --global user.name "$username"
git config pack.windowMemory 10m
git config pack.packSizeLimit 20m
return
}
function delete_docker_env() {
if [[ -f "$directory_script/.env" ]]
then
echo Deleting existing env file
rm "$directory_script/.env"
fi
return
}
function delete_docker_compose() {
if [[ -f "$directory_script/docker-compose.yml" ]]
then
echo Deleting existing env file
rm "$directory_script/docker-compose.yml"
fi
return
}
function write_docker_env() {
{
printf "NAME=%s\\n" "$username"
printf "PASS=%s\\n" "$docker_password"
printf "DOMAIN=%s\\n" "$(domain_find)"
printf "PUID=%s\\n" "$(id -u)"
printf "PGID=%s\\n" "$(id -g)"
printf "TZ=%s\\n" "$(cat /etc/timezone)"
printf "HOMEDIR=%s\\n" "$directory_home"
printf "CONFDIR=%s\\n" "$(dir_find config)"
printf "DOWNDIR=%s\\n" "$(dir_find downloads)"
printf "POOLDIR=%s\\n" "$(dir_find media)"
printf "SAVEDIR=%s\\n" "$(dir_find saves)"
printf "SYNCDIR=%s\\n" "$(dir_find vault)"
printf "WORKDIR=%s\\n" "$(dir_find paperwork)"
printf "RCLONE_REMOTE_MEDIA=%s\\n" "$(rclone_remote media)"
printf "RCLONE_REMOTE_SAVES=%s\\n" "$(rclone_remote saves)"
printf "RCLONE_REMOTE_WORK=%s\\n" "$(rclone_remote work)"
} > "$directory_script/.env"
return
}
function payslip_config_write() {
{
printf "[retriever]\\n"
printf "type = SimpleIMAPSSLRetriever\\n"
printf "server = imap.yandex.com\\n"
printf "username = %s\\n" "$payslip_username"
printf "port = 993\\n"
printf "password = %s\\n\\n" "$payslip_password"
printf "[destination]\\n"
printf "type = Maildir\\n"
printf "path = %s/\\n" "$directory_temp"
} > getmailrc
return
}
function payslip_decrypt() {
cd "$(dir_find paperwork)" || exit
for i in *pdf
do
fileProtected=0
qpdf "$i" --check || fileProtected=1
if [ $fileProtected == 1 ]
then
qpdf --password=$payslip_encryption --decrypt "$i" "decrypt-$i" && rm "$i"
fi
done
return
}
function rclone_remote() {
$rclone_command listremotes | grep "$1"
return
}
function check_running_as_root() {
if [ "$EUID" -ne 0 ]
then
echo "Please run as root"
exit 0
fi
return
}
function include_credentials() {
source "$directory_script/credentials.db"
return
}
function archive() {
rclone_remote=$(rclone_remote backups)
working_directory=$(dir_find backups)/archives
echo "$working_directory"
if [ -z "$*" ]
then
echo Creating archives...
# build folder array?
cd "$(mktemp -d)" || exit
for i in "config" "vault"
do
tar -cJf "backup-$i-$(date +%Y-%m-%d-%H%M).tar.xz" --ignore-failed-read "$HOME/$i"
done
echo "Sending via rclone..."
for i in *
do
du -h "$i"
$rclone_command move "$i" "$rclone_remote"/archives/
done
echo Cleaning up...
rm -r "$PWD"
echo Done!
else
echo Creating single archive...
cd "$(mktemp -d)" || exit
tar -cJf "backup-$1-$(date +%Y-%m-%d-%H%M).tar.xz" --ignore-failed-read "$directory_home/$1"
echo "Sending via rclone..."
for i in *
do
du -h "$i" && $rclone_command move "$i" "$rclone_remote"/archives/
done
echo Cleaning up...
rm -r "$PWD"
echo Done!
fi
return
}
function update-arch() {
if [ -x "$(command -v yay)" ]
then
yay -Syu --noconfirm
else
pacman -Syu --noconfirm
fi
return
}
function update-debian() {
export DEBIAN_FRONTEND=noninteractive
apt-get update
apt-get dist-upgrade -y
apt-get autoremove --purge -y
apt-get clean
if [ -x "$(command -v youtube-dl)" ]
then
youtube-dl -U
fi
if [ -x "$(command -v rclone)" ]
then
curl --silent "https://rclone.org/install.sh" | bash
fi
return
}
function update-remaining() {
if [ -f "$directory_home/.config/retroarch/lrcm/lrcm" ]
then
"$directory_home/.config/retroarch/lrcm/lrcm" update
fi
find "$(dir_find config)" -maxdepth 2 -name ".git" -type d | sed 's/\/.git//' | xargs -P10 -I{} git -C {} pull
if [ -x "$(command -v we-get)" ]
then
pip3 install --upgrade git+https://github.com/rachmadaniHaryono/we-get
fi
if [ -x "$(command -v plowmod)" ]
then
su -c "plowmod -u" -s /bin/sh "$username"
chown -R "$username":"$username" "$directory_home/.config/plowshare"
fi
return
}
function borg() {
# https://opensource.com/article/17/10/backing-your-machines-borg
working_directory=$(dir_find backups)/borg
echo "$working_directory"
return
}
function docker() {
delete_docker_env
# delete_docker_compose
include_credentials
# update submodules
git pull --recurse-submodules
# write compose file
# {
# printf "nope"
# } > docker-compose.yml
# write env file
write_docker_env
# clean up existing stuff
echo Cleaning up existing docker files
for i in volume image system network
do
docker "$i" prune -f
done
docker system prune -af
# make network, if not existing
if ! printf "$(docker network ls)" | grep -q "proxy"
then
echo Creating docker network
docker network create proxy
fi
# start containers
echo Starting docker containers
docker-compose up -d --remove-orphans
delete_docker_env
return
}
function logger() {
git_config
git_directory="$(dir_find logger)"
file_git_log="$git_directory/media.log"
log_command="git --git-dir=$git_directory/.git --work-tree=$git_directory"
log_remote=$(rclone_remote media)
if [ ! -e "$git_directory" ]
then
mkdir "$git_directory" # make log directory
fi
if [ ! -e "$git_directory/.git" ]
then
$log_command init # initialise git repo
fi
if [ -e "$file_git_log.xz" ]
then
xz -d "$file_git_log.xz" # if xz archive exists, decompress
fi
if [ -e "$file_git_log" ]
then
rm "$file_git_log"
fi
$rclone_command ls "$log_remote" | sort -k2 > "$file_git_log" # create log
$rclone_command size "$log_remote" >> "$file_git_log" # append size
$log_command add "$file_git_log" # add log file
$log_command commit -m "Update: $(date +%Y-%m-%d)" # commit to repo, datestamped
if [ -e "$file_git_log.xz" ]
then
rm "$file_git_log.xz"
fi
xz "$file_git_log" # compress log
$log_command gc --aggressive --prune # compress repo
return
}
function magnet() {
if [ ! -f "$(dir_find vault)/*.magnet" ]
then
echo No magnet files found
exit 0
fi
mag2tor_script_path="$(dir_find config)/magnet2torrent/Magnet_To_Torrent2.py"
if [ ! -f "$mag2tor_script_path" ]
then
echo script not found, downloading
git clone "https://github.com/danfolkes/Magnet2Torrent.git" "$(dir_find config)/magnet2torrent"
fi
sshfs_mount
cd "$(dir_find vault)" || exit
for i in *.magnet
do
magnet_source="$(cat "$i")"
python "$mag2tor_script_path" -m "$magnet_source" -o "$(dir_find downloads)/remote/watch/"
rm "$i"
done
return
}
function payslip() {
# depends on: getmail4 mpack qpdf
directory_temp="$(mktemp -d)"
include_credentials
cd "$directory_temp" || exit
mkdir {cur,new,tmp}
payslip_config_write
getmail --getmaildir "$directory_temp"
cd new || exit
grep "$payslip_sender" ./* | cut -f1 -d: | uniq | xargs munpack -f
mv "*.pdf" "$(dir_find paperwork)/"
payslip_decrypt
rm -r "$directory_temp"
return
}
function permissions() {
check_running_as_root
chown "$username":"$username" "$directory_script/rclone.conf"
return
}
function rclone_mount() {
echo rclone mount checker
for i in backups media paperwork pictures saves
do
mount_point="$directory_home/$i"
if [[ -f "$mount_point/.mountcheck" ]]
then
echo "$i" still mounted
else
echo "$i" not mounted
echo force unmounting
fusermount -uz "$mount_point"
echo sleeping
sleep 5
echo mounting
$rclone_command mount "drive-$i": "/home/peter/$i" --vfs-cache-mode minimal --allow-other --allow-non-empty --daemon --log-file "$(dir-find config)/logs/rclone-$i.log" # --allow-other requires user_allow_other in /etc/fuse.conf
echo restarting docker containers
for j in "${docker_restart[#]}"
do
docker restart "$j"
done
fi
done
return
}
function sshfs_mount() {
include_credentials
echo sshfs mount checker
seedbox_host="$seedbox_username.seedbox.io"
seedbox_mount="$(dir_find downloads)/remote"
if [[ -d "$seedbox_mount/files" ]]
then
echo "sshfs mount exists"
else
echo "sshfs mount missing, mounting"
printf "%s" "$seedbox_password" | sshfs "$seedbox_username#$seedbox_host":/ "$seedbox_mount" -o password_stdin -o allow_other
fi
return
}
function status() {
status_filename=$(dir_find blog)/status.md
status_timestamp="$(date +%Y-%m-%d) at $(date +%H:%M)"
status_uptime=$(( $(cut -f1 -d. </proc/uptime) / 86400 ))
status_cpuavgs=$(cut -d" " -f1-3 < /proc/loadavg)
status_users=$(uptime | grep -oP '.{3}user' | sed 's/\user//g' | xargs)
status_ram=$(printf "%.0f" "$(free | awk '/Mem/ {print $3/$2 * 100.0}')")
status_swap=$(printf "%.0f" "$(free | awk '/Swap/ {print $3/$2 * 100.0}')")
status_rootuse=$(df / | awk 'END{print $5}')
status_dluse=$(df | awk '/downloads/ {print $5}')
status_dockers=$(docker ps -q | wc -l)/$(docker ps -aq | wc -l)
status_packages=$(dpkg -l | grep ^ii -c)
status_ifdata=$(vnstat -i eth0 -m --oneline | cut -f11 -d\;)
{
printf -- "---\\nlayout: page\\ntitle: Server Status\\ndescription: A (hopefully) recently generated server status page\\npermalink: /status/\\n---\\n\\n"
printf "*Generated on %s*\\n\\n" "$status_timestamp"
printf "* Uptime: %s" "$status_uptime"
printf " Day%s\\n" "$(plural "$status_uptime")"
printf "* CPU Load: %s\\n" "$status_cpuavgs"
printf "* Users: %s\\n" "$status_users"
printf "* RAM Usage: %s%%\\n" "$status_ram"
printf "* Swap Usage: %s%%\\n" "$status_swap"
printf "* Root Usage: %s\\n" "$status_rootuse"
printf "* Downloads Usage: %s\\n" "$status_dluse"
printf "* [Dockers](https://github.com/breadcat/Dockerfiles): %s\\n" "$status_dockers"
printf "* Packages: %s\\n" "$status_packages"
printf "* Monthly Data: %s\\n\\n" "$status_ifdata"
printf "Hardware specifications themselves are covered on the [hardware page](/hardware/#server).\\n"
} > "$status_filename"
return
}
function sync() {
source=$(rclone_remote gdrive | sed 1q)
dest=$(rclone_remote gdrive | sed -n 2p)
echo Syncing "$source" to "$dest"
$rclone_command sync "$source" "$dest" --drive-server-side-across-configs --verbose --log-file "$(dir_find config)/logs/rclone-sync-$(date +%Y-%m-%d-%H%M).log"
return
}
function update() {
check_running_as_root
if [[ $distro =~ "Debian" ]]
then
update-debian
elif [[ $distro =~ "Arch" ]]
then
update-arch
else
echo "Who knows what you're running"
fi
update-remaining
return
}
main "$#"

I believe you have a namespace problem.
You define a docker() function that does all strange things.
Then inside docker() you call $(docker network ls), that just calls the same function recursively, or inside status you call $(docker ps -aq | wc -l).
There is only one namespace - after you define a function named docker docker() {} anywhere you call $(docker) it will call that function.
You can use command, ex. echo() { printf "I AM NOT ECHO\n"; }; echo 123; command echo 123 - the first echo 123 will execute the function if it exists, the second one will however try to find echo executable in PATH and execute it.
However I better suggest to just use a unique namespace that will not interfere with anything. Declaring your functions docker hides the real command.
blabla_status() {} # instead of status()
blabla_docker() {} # instead of docker
# etc..
# then later in main()
case "$1" in
docker|status) blabla_"$1"; ;;
*) echo "Unknown function" >&2; ;;
esac

Related

bash script loop to check if variable contains string - not working

i have a script which copy files from one s3 bucket to local server, do some stuff and upload it to another s3 bucket.
in the original bucket i have few folders, one of them called "OTHER"
i dot want my script to work on this folder
i tried to define a loop to check if the path string does not contains the string "OTHER" only then to continue to other commands but for some reason it is not working.
what am i doing wrong ?
#!/bin/bash
shopt -s extglob
gcs3='s3://gc-reporting-pud-production/splunk_printer_log_files/'
gcs3ls=$((aws s3 ls 's3://gc-reporting-pud-production/splunk_printer_log_files/' --recursive) | sed 's/^.*\(splunk_printer.*\)/\1/g'| tr -s ' ' | tr ' ' '_')
ssyss3=s3://ssyssplunk
tokenFile=/splunkData/GCLogs/tokenFile.txt
nextToken=$((aws s3api list-objects-v2 --bucket "gc-reporting-pud-production" --prefix splunk_printer_log_files/ --max-items 5) |grep -o 'NEXTTOKEN.*' |awk -F " " '{print $2}')
newToken=$( tail -n 1 /splunkData/GCLogs/tokenFile.txt )
waterMark=$(aws s3api list-objects-v2 --bucket "gc-reporting-pud-production" --prefix splunk_printer_log_files/ --max-items 5 --starting-token
$newToken|sed 's/^.*\(splunk_printer.*zip\).*$/\1/'|sed '1d'|sed '$d')
while true; do
for j in $waterMark ; do
echo $j
if [ "$j" != *"OTHER"* ]; then
gcRegion=$(echo $j | awk -F'/' '{print $2}')
echo "gcRegion:"$gcRegion
if [ "$gcRegion" != "OTHER" ]; then
gcTech=$(echo $j | awk -F'/' '{print $3}')
echo "GCTech:"$gcTech
gcPrinterFamily=$(echo $j | awk -F'/' '{print $4}')
echo "gcPrinterFamily:" $gcPrinterFamily
gcPrinterType=$(echo $j | awk -F'/' '{print $5}')
echo "gcPrinterType:" $gcPrinterType
gcPrinterName=$(echo $j| awk -F'/' '{print $6}')
echo "gcPrinterName:" $gcPrinterName
gcFileName=$(echo $j| awk -F'/' '{print $7}'| awk -F'.zip' '{print $1}')
echo "gcFileName:" $gcFileName
cd /splunkData/GCLogs
dir="/splunkData/GCLogs/$gcRegion/$gcTech/$gcPrinterFamily/$gcPrinterType/$gcPrinterName"
echo "dir:"$dir
mkdir -p $dir
aws s3 sync $gcs3$gcRegion/$gcTech/$gcPrinterFamily/$gcPrinterType/$gcPrinterName/ $dir
find $dir -name '*.zip' -exec sh -c 'unzip -o -d "${0%.*}" "$0"' '{}' ';'
aws s3 cp $dir $ssyss3/$gcRegion/$gcTech/$gcPrinterFamily/$gcPrinterType/$gcPrinterName/ --recursive --exclude "*.zip"
newToken=$( tail -n 1 /splunkData/GCLogs/tokenFile.txt )
nextToken=$(aws s3api list-objects-v2 --bucket "gc-reporting-pud-production" --prefix splunk_printer_log_files/ --max-items 5 --starting-token $newToken |grep -o 'NEXTTOKEN.*' |awk -F " " '{print $2}')
waterMark=$(aws s3api list-objects-v2 --bucket "gc-reporting-pud-production" --prefix splunk_printer_log_files/ --max-items 5 --starting-token $newToken|sed 's/^.*\(splunk_printer.*zip\).*$/\1/'|sed '1d'|sed '$d')
echo "$nextToken" > "$tokenFile"
fi
fi
done
done
You need to use the double-bracket conditional command to turn == and != into pattern matching operators:
if [[ "$j" != *"OTHER"* ]]; then
# ^^ ^^
Or use case
case "$j" in
*OTHER*) ... ;;
*) echo "this is like an `else` block" ;;
esac
Paste your code into https://www.shellcheck.net/ for other things to fix.
I think glenn jackman was on the right path. Try this:
if [[ "$j" != *OTHER* ]]; then
The [[ ]] is required for pattern string matching (and you have to remove the " ). The case statement is also a good idea. You can abandon the shell test altogether and use grep as follows:
if
grep -q '.*OTHER.*' <<< "$j" 2>/dev/null
then
...
fi
Here's a check of the [[ ]]:
$ echo $j
abOTHERc
$ [[ "$j" == *OTHER* ]]
$ echo $?
0
As per BenjaminW., the quotes around $j in [[ ]] are unnecessary. However, the quotes around *OTHER* do make a big difference. See below:
$ j="OTHER THINGS"
$ [[ $j == "*OTHER*" ]] ; echo "$j" matches '"*OTHER*"': $?
OTHER THINGS matches "*OTHER*": 1
$ [[ $j == *OTHER* ]] ; echo "$j" matches '*OTHER*': $?
OTHER THINGS matches *OTHER*: 0

Unix Bash - Copy files from a source folder recursively to destination/*file_extension*(ex. “txt”) folder

This is my code, something in the rec_copy() function isn't working properly, probably this line:
cp $1/$f $HOME/$2/$dest
The extension named folders are created in the destination folder but the files are not copied there. Can you help me?
#!/bin/bash
if [ $# -ne 2 ]
then
echo "Usage: $0 <source> <destination>"
exit
fi
if [ ! -d $1 ]
then
echo "Source folder does not exist"
exit
fi
if [ -d $2 ]
then
rm -r $2
mkdir $2
else
mkdir $2
fi
extension=`ls -l $1 | grep -v "^d" | awk '{ print $10; }' | sed 's/^.*\.//g'`
for f in $extension
do
if [ ! -d $1/$f ]
then
mkdir $2/$f
fi
done
rec_copy(){
folder=`ls $1`
for f in $folder
do
dest=`echo "$f" | sed 's/.*\.//g'`
if [ -f $1/$f ]
then
cp $1/$f $HOME/$2/$dest
elif [ -d $1/$f ]
then
rec_copy $1/$f
fi
done
}
rec_copy $1
Here is the answer in case someone ever needs it:
#!/bin/bash
if [ $# -ne 2 ]
then
echo "Usage: $0 <izvor> <destinacija>"
exit
fi
if [ ! -d "$1" ]
then
echo "Izvorniot folder ne postoi"
exit
fi
if [ -d "$2" ]
then
rm -r "$2"
mkdir "$2"
else
mkdir "$2"
fi
extension=`ls -l "$1" | grep -v "^d" | awk '{ print $10; }' | sed 's/^.*\.//g' | sort -u`
for f in $extension
do
if [ ! -d "$1/$f" ]
then
mkdir "$2/$f"
fi
done
rec_copy(){
folder=`ls "$1"`
for f in $folder
do
dest=`echo "$f" | sed 's/.*\.//g'`
to=`cp "$1/$f" "$2/$dest"`
if [ -f "$1/$f" ]
then
echo "$to"
elif [ -d "$1/$f" ]
then
rec_copy "$1/$f" "$2"
fi
done
}
rec_copy "./$1" "./$2"

How to pipe aws s3 cp to gzip to be used with "$QUERY" | psql utility

I have following command
"$QUERY" | psql -h $DB_HOST -p $DB_PORT -U $DB_USERNAME $DB_NAME
Where $QUERY is a command that loads files from a bucket, unzip it, and put to the database. It looks like following:
COPY my_table
FROM PROGRAM 'readarray -t files <<<"$(aws s3 ls ${BUCKET_PATH} | tr [:space:] "\n")"; for (( n = ${#files[#]} - 1; n >= 0; n--)); do if [[ ${files[$n]} =~ .csv.gz$ ]]; then aws s3 cp ${BUCKET_PATH}${files[$n]} >(gzip -d -c); break; fi done'
WITH DELIMITER ',' CSV
Here is formatted bash code:
#!/usr/bin/env bash
raw_files=`aws s3 ls ${BUCKET_PATH} | tr [:space:] "\n"`
readarray -t files <<<"$raw_files"
for (( n = ${#files[#]} - 1; n >= 0; n--)); do
if [[ ${files[$n]} =~ .csv.gz$ ]];
then aws s3 cp ${BUCKET_PATH}${files[$n]} >(gzip -d -c);
break; # for test purposes to be no load all files, jsut one
fi
done
aws-CLI version
#: aws --version
#: aws-cli/1.11.13 Python/3.5.2 Linux/4.13.0-43-generic botocore/1.4.70
This script works. But when I try to use it with psql, it fails, and I cannot understand why.
How can I fix it?
Here is a script that loads data from s3 bucket and merges it to fat file:
#!/usr/bin/env bash
bucket_path=$1
limit_files=$2
target_file_name=$3
echo "Source bucket $bucket_path"
if [ -z $target_file_name ]; then
target_file_name="fat.csv.gz"
echo "Default target file $target_file_name"
fi
echo "Total files $(aws s3 ls $bucket_path | wc -l)"
readarray -t files <<<"$(aws s3 ls $bucket_path | tr [:space:] "\n")"
for (( n = ${#files[#]} - 1, i=1; n >= 0; n--)); do
if [[ ${files[$n]} =~ .csv.gz$ ]]; then
aws s3 cp --quiet $bucket_path${files[$n]} >(cat >> "$target_file_name");
echo "$((i++)), ${files[$n]}, current size: $(du -sh $target_file_name)"
if [ ! -z $limit_files ] && [ $i -gt $limit_files ]; then
echo "Final size $(du -sh $target_file_name)"
exit 0
fi
fi
done
exit 0
It works correctly.
But when I try pipe this fat.csv.gz to psql db using the following code
echo "COPY my_table
FROM PROGRAM 'gzip -d -c fat.csv.gz'
WITH DELIMITER ',' CSV" | psql -h $DB_HOST -p $DB_PORT -U $DB_USERNAME $DB_NAME
I am getting the error:
ERROR: must be superuser to COPY to or from a file
It looks like a specific of working of pg (I guess it's due to security reasons) - link
So, the problem now that I don't know how to rework my script to be pipe the fat.csv.gz. I cannot get such privilege and should find a workaround.
I finally wrote the following bash script downloads files from s3, merge them to 50MB archives and pipe to pg in a sub process. Hope it will be helpful for somebody:
get_current_timestamp() (
date '+%s.%N'
)
execute_sql() (
write_log "Importing data from s3 to pg..."
import_data_from_s3 "$EVENTS_PATH"
write_log "Importing data from s3 to pg...done"
)
columns() (
local columns=`echo "SELECT array_to_string(
array(SELECT column_name::text
FROM information_schema.columns
WHERE table_name ILIKE '${TMP_TABLE}'
AND column_name NOT ILIKE '${DATE_FIELD}'), ',')" | \
psql --tuples-only -h $DB_HOST -p $DB_PORT -U $DB_USERNAME $DB_NAME`
echo -n "${columns}"
)
get_timestamp_difference() (
FROM=$1
TO=$2
echo $FROM $TO | awk '{
diff = $2-$1
if (diff >= 86400) {
printf "%i days ", diff/86400
}
if (diff >= 3600) {
printf "%i hours ", (diff/3600)%24
}
if (diff >= 60) {
printf "%i mins ", (diff/60)%60
}
printf "%f secs", diff%60
}'
)
pretty_size() (
if [ ! -z $1 ]; then
local size=$1;
else
local size=`cat <&0`;
fi
echo "${size}" | \
awk '{ \
split( "B KB MB GB" , v ); \
s=1; \
while( $1>=1024 ) { \
$1/=1024; s++ \
} \
printf "%.1f%s", $1, v[s] \
}' | \
add_missing_eol >&1
)
import_data_from_s3() (
local bucket_path=$1
local limit_files=$2
local target_file_name=$3
write_log "Source bucket $bucket_path"
if [ -z ${target_file_name} ]; then
target_file_name="fat.csv.gz"
write_log "Default target file $target_file_name"
fi
if [ ! -z ${limit_files} ]; then
write_log "Import ${limit_files} files"
else
write_log "Import all files"
fi
write_log "Total files $(aws s3 ls $bucket_path | wc -l)"
readarray -t files <<<"$(aws s3 ls $bucket_path | tr [:space:] "\n")"
write_log "Remove old data files..."
find . -maxdepth 1 -type f -name "*${target_file_name}" -execdir rm -f {} +;
write_log "Remove old data files...done"
TMP_TABLE_COLUMNS=$(columns)
write_log "Importing columns: ${DW_EVENTS_TMP_TABLE_COLUMNS}"
declare -A pids
local total_data_amount=0
local file_size_bytes=0
local file_size_bytes=0
local size_limit=$((50*1024*1024))
for (( n = ${#files[#]} - 1, file_counter=1, fat_file_counter=1; n >= 0; n--)); do
if [[ ! ${files[$n]} =~ .csv.gz$ ]]; then continue; fi
file="${fat_file_counter}-${target_file_name}"
aws s3 cp --quiet ${bucket_path}${files[$n]} >(cat >> "${file}");
file_size_bytes=$(stat -c%s "$file")
if [ $file_size_bytes -gt $size_limit ]; then
import_zip "${file}" "$(pretty_size ${file_size_bytes})" & pids["${file}"]=$!;
total_data_amount=$((total_data_amount+file_size_bytes))
write_log "Files read: ${file_counter}, total size(zipped): $(pretty_size ${total_data_amount})"
((fat_file_counter++))
fi
# write_log "${file_counter}, ${files[$n]}, current size: $(du -sh $file)"
if [ ! -z ${limit_files} ] && [ ${file_counter} -gt ${limit_files} ]; then
write_log "Final size $(du -sh ${file})"
if [ ! ${pids["${file}"]+0} ]; then
import_zip "${file}" "$(pretty_size ${file_size_bytes})" & pids["${file}"]=$!;
fi
break;
fi
((file_counter++))
done
# import rest file that can less than limit size
if [ ! ${pids["${file}"]+0} ]; then
import_zip "${file}" "$(pretty_size ${file_size_bytes})" & pids["${file}"]=$!;
fi
write_log "Waiting for all pids: ${pids[*]}"
for pid in ${pids[*]}; do
wait $pid
done
write_log "All sub process have finished. Total size(zipped): $(pretty_size ${total_data_amount})"
)
import_zip() (
local file=$1
local size=$2
local start_time=`get_current_timestamp`
write_log "pid: $!, size: ${size}, importing ${file}...";
gzip -d -c ${file} | \
psql --quiet -h ${DB_HOST} -p ${DB_PORT} -U ${DB_USERNAME} ${DB_NAME} \
-c "COPY ${TMP_TABLE}(${TMP_TABLE_COLUMNS})
FROM STDIN
WITH DELIMITER ',' CSV";
rm $file;
local end_time=`get_current_timestamp`
write_log "pid: $!, time: `get_timestamp_difference ${start_time} ${end_time}`, size: ${size}, importing ${file}...done";
)

How does CMake detect changed files

I have a "C"/C++ CMake project which works fine. However, I'm sometimes (re)building on a remote cluster where the time is slightly different. This machine runs Linux and I'm building using make. I'm wondering if there is some make/CMake way to change how the changes to the files are detected, e.g. to MD5 or diff rather than using timestamps. Otherwise I guess I'd either have to endure the constant make clean / make -j cycle or have to change my local time every time I'm working with that particular server.
I was poking CMake documentation to see if there is a flag which would change these settings but found none. How would this work on platforms which have no RTC (e.g. Raspberry)?
Right, so knowing that CMake / make does not do what I want and I don't want the hassle of synchronizing the time of my machine to the target, I came up with the following:
#!/bin/bash
touch src_hash.md5
echo -n make "$#" > mymake.sh
find `pwd`/../src `pwd`/../include -print0 |
while IFS= read -r -d $'\0' f; do
if [[ ! -d "$f" ]]; then
MD5=`md5sum "$f" | awk -v fn="$f" '{ print "\"" fn "\" " $1; }'`
echo $MD5 >> src_hash.md5.new
OLDMD5=`grep -e "^\"$f\"" src_hash.md5`
if [[ "$OLDMD5" == "" ]]; then
echo "$MD5 -- [a new file]"
continue # a new file, make can handle that well on its own
fi
HASH=`echo $MD5 | awk '{ print $2; }'`
OLDHASH=`echo $OLDMD5 | awk '{ print $2; }'`
if [[ "$HASH" != "$OLDHASH" ]]; then
echo "$MD5 -- changed from $OLDHASH"
echo -n " \"--what-if=${f}\"" >> mymake.sh
# this is running elsewhere, can't pass stuff via variables
fi
fi
done
touch src_hash.md5.new
mv src_hash.md5.new src_hash.md5
echo using: `cat mymake.sh`
echo >> mymake.sh # add a newline
chmod +x mymake.sh
./mymake.sh
rm -f mymake.sh
This keeps a list of source file hashes in src_hash.md5 and at each time it runs it compares the current files to those hashes (and updates the list accordingly).
At the end, it calls make, passing any arguments you give to the script (such as -j). It makes use of the --what-if= switch which tells make to act like the given file changed - that way the dependences of build targets on sources / headers are handled elegantly.
You might want to also pass the path to source / include files as arguments so that those wouldn't be hardcoded inside.
Or one more iteration on the said script, using touch to change and restore the file timestamps for situations when make is extra stubborn about not rebuilding anything:
#!/bin/bash
if [[ ! -d ../src ]]; then
>&2 echo "error: ../src is not a directory or does not exist"
exit -1
fi
if [[ ! -d ../include ]]; then
>&2 echo "error: ../include is not a directory or does not exist"
exit -1
fi
echo "Scanning for changed files in ../src and ../include"
touch src_hash.md5 # in case this runs for the first time
rm -f mymaketouch.sh
rm -f mymakerestore.sh
touch mymaketouch.sh
touch mymakerestore.sh
echo -n make "$#" > mymake.sh
CWD="`pwd`"
find ../src ../include -print0 |
while IFS= read -r -d $'\0' f; do
if [[ ! -d "$f" ]]; then
fl=`readlink -f "$CWD/$f"`
MD5=`md5sum "$fl" | awk -v fn="$fl" '{ print "\"" fn "\" " $1; }'`
HASH=`echo $MD5 | awk '{ print $2; }'`
echo $MD5 >> src_hash.md5.new
OLDMD5=`grep -e "^\"$fl\"" src_hash.md5`
OLDHASH=`echo $OLDMD5 | awk '{ print $2; }'`
if [[ "$OLDMD5" == "" ]]; then
echo "$f $HASH -- [a new file]"
continue # a new file, make can handle that well on its own
fi
if [[ "$HASH" != "$OLDHASH" ]]; then
echo "$f $HASH -- changed from $OLDHASH"
echo "touch -m \"$fl\"" >> mymaketouch.sh # will touch it and change modification time
stat "$fl" -c "touch -m -d \"%y\" \"%n\"" >> mymakerestore.sh # will restore it later on so that we do not run into problems when copying newer from a different system
echo -n " \"--what-if=$fl\"" >> mymake.sh
# this is running elsewhere, can't pass stuff via variables
fi
fi
done
echo using: `cat mymake.sh`
echo >> mymake.sh # add a newline
echo 'exit $?' >> mymake.sh
chmod +x mymaketouch.sh
chmod +x mymakerestore.sh
chmod +x mymake.sh
control_c() # run if user hits control-c
{
echo -en "\nrestoring modification times\n"
./mymakerestore.sh
rm -f mymaketouch.sh
rm -f mymakerestore.sh
rm -f mymake.sh
rm -f src_hash.md5.new
exit -1
}
trap control_c SIGINT
./mymaketouch.sh
./mymake.sh
RETVAL=$?
./mymakerestore.sh
rm -f mymaketouch.sh
rm -f mymakerestore.sh
rm -f mymake.sh
touch src_hash.md5.new # in case there was nothing new
mv src_hash.md5.new src_hash.md5
# do it now in case someone hits ctrl+c mid-build and not all files are built
exit $RETVAL
Or even run hashing in parallel in case you are building a large project:
#!/bin/bash
if [[ ! -d ../src ]]; then
>&2 echo "error: ../src is not a directory or does not exist"
exit -1
fi
if [[ ! -d ../include ]]; then
>&2 echo "error: ../include is not a directory or does not exist"
exit -1
fi
echo "Scanning for changed files in ../src and ../include"
touch src_hash.md5 # in case this runs for the first time
rm -f mymaketouch.sh
rm -f mymakerestore.sh
touch mymaketouch.sh
touch mymakerestore.sh
echo -n make "$#" > mymake.sh
CWD="`pwd`"
rm -f src_hash.md5.new # will use ">>", make sure to remove the file
find ../src ../include -print0 |
while IFS= read -r -d $'\0' f; do
if [[ ! -d "$f" ]]; then
fl="$CWD/$f"
(echo `md5sum "$f" | awk -v fn="$fl" '{ print "\"" fn "\" " $1; }'` ) & # parallel, echo is atomic (http://stackoverflow.com/questions/9926616/is-echo-atomic-when-writing-single-lines)
# run in parallel (remove the ampersand if you run into trouble)
fi
done >> src_hash.md5.new # >> is atomic but > wouldn't be
# this is fast
cat src_hash.md5 > src_hash.md5.diff
echo separator >> src_hash.md5.diff
cat src_hash.md5.new >> src_hash.md5.diff
# make a compound file for awk (could also read the other file in awk but this seems simpler right now)
cat src_hash.md5.diff | awk 'BEGIN { FS="\""; had_sep = 0; }
{
if(!had_sep && $1 == "separator")
had_sep = 1;
else {
sub(/[[:space:]]/, "", $3);
if(!had_sep)
old_hashes[$2] = $3;
else {
f = $2;
if((idx = index(f, "../")) != 0)
f = substr(f, idx, length(f) - idx + 1);
if($2 in old_hashes) {
if(old_hashes[$2] != $3)
print "\"" f "\" " $3 " -- changed from " old_hashes[$2];
} else
print "\"" f "\" -- a new file " $3;
}
}
}'
# print verbose for the user only
cat src_hash.md5.diff | awk 'BEGIN { FS="\""; had_sep = 0; }
{
if(!had_sep && $1 == "separator")
had_sep = 1;
else {
sub(/[[:space:]]/, "", $3);
if(!had_sep)
old_hashes[$2] = $3;
else {
if($2 in old_hashes) {
if(old_hashes[$2] != $3)
printf($2 "\0"); /* use \0 as a line separator for the below loop */
}
}
}
}' |
while IFS= read -r -d $'\0' fl; do
echo "touch -m \"$fl\"" >> mymaketouch.sh # will touch it and change modification time
stat "$fl" -c "touch -m -d \"%y\" \"%n\"" >> mymakerestore.sh # will restore it later on so that we do not run into problems when copying newer from a different system
echo -n " \"--what-if=$fl\"" >> mymake.sh
# this is running elsewhere, can't pass stuff via variables
done
# run again, handle files that require change
rm -f src_hash.md5.diff
echo using: `cat mymake.sh`
echo >> mymake.sh # add a newline
echo 'exit $?' >> mymake.sh
chmod +x mymaketouch.sh
chmod +x mymakerestore.sh
chmod +x mymake.sh
control_c() # run if user hits control-c
{
echo -en "\nrestoring modification times\n"
./mymakerestore.sh
rm -f mymaketouch.sh
rm -f mymakerestore.sh
rm -f mymake.sh
rm -f src_hash.md5.new
exit -1
}
trap control_c SIGINT
./mymaketouch.sh
./mymake.sh
RETVAL=$?
./mymakerestore.sh
rm -f mymaketouch.sh
rm -f mymakerestore.sh
rm -f mymake.sh
touch src_hash.md5.new # in case there was nothing new
mv src_hash.md5.new src_hash.md5
# do it now in case someone hits ctrl+c mid-build and not all files are built
exit $RETVAL

how to pass list to parallel

I am trying to use parallel in following script
#!/bin/bash
declare -a ephemeral_list
for mount in $(lsblk | grep ^x | awk '{ print $1 }')
do
if ! mount | grep $mount >/dev/null; then
ephemeral_list+=($mount)
fi
done
for i in "${!ephemeral_list[#]}"
do
printf "%s\t%s\n" "$i" "${ephemeral_list[$i]}"
[ -d /mnt/ephemeral$i ] || mkdir /mnt/ephemeral$i
mkfs.ext4 -E nodiscard /dev/${ephemeral_list[$i]} && mount /dev/${ephemeral_list[$i]} /mnt/ephemeral$i &
done
I want to run "mkfs.ext4 -E nodiscard /dev/${ephemeral_list[$i]} && mount /dev/${ephemeral_list[$i]} /mnt/ephemeral$i &" command on each cpu here
any help ?
thanks
Make a function. Call that.
mymkfs() {
printf "%s\t%s\n" "$1" "$2"
[ -d /mnt/ephemeral$1 ] || mkdir /mnt/ephemeral$1
mkfs.ext4 -E nodiscard /dev/"$2" && mount /dev/"$2" /mnt/ephemeral$1
}
export -f mymkfs
parallel mymkfs {#} {} ::: "${ephemeral_list[#]}"

Resources