Query information inside the files(UNIX,AWK) - bash

I need to query the information of about 1000 files in once.
For example
My filename is
Test_001_20150517
Test_001_20150530
Information inside the file
{
1=2015
2=8
3=4
4=98888
5=123456
}
{
1=2014
2=456
3=5588
4=95858
5=67889
}
I want to query these 2 files with the conditions that 1=2015 and only show the result of 5
cat *201505*|awk -F '=' '{if ($1=="5"){print $2}}'
I'm trying to show the result but there is no condition that 1=2015 I don't know what should I do because 1 and 5 is the same as $1.
Sorry for my poor English if there is something wrong or misunderstand in my question.

Is this what you want?
$ awk -F'=' '{a[$1]=$2} /}/ && (a[1] == 2015) {print a[5]}' file
123456

Related

Make a division of a stat/number from another stat/number in bash

I have a simply question but I'm not this able with bash, I'm using a command line to get the number of queries and cached queries my Pi-Hole makes to Unbound (recursive DNS) and I want to display the cached queries as % of total queries, here're the lines to get the total queries:
sudo unbound-control stats_noreset | awk -F '=' '/total.num.queries/ {print $NF}'
this gives me for example 1500 and I want to divide this number with this line:
sudo unbound-control stats_noreset | awk -F '=' '/total.num.cachehits/ {print $NF}'
this give me for example 500 and I want to display it as:
500 33.3%
with one line code only, not with variables.
Thanks a lot! I was trying for days.
Edit: as asked, the full output of sudo unbound-control stats_noreset is:
sudo unbound-control stats_noreset
thread0.num.queries=1294
thread0.num.queries_ip_ratelimited=0
thread0.num.cachehits=327
thread0.num.cachemiss=967
thread0.num.prefetch=134
thread0.num.zero_ttl=0
thread0.num.recursivereplies=967
thread0.requestlist.avg=0.334242
thread0.requestlist.max=9
thread0.requestlist.overwritten=0
thread0.requestlist.exceeded=0
thread0.requestlist.current.all=0
thread0.requestlist.current.user=0
thread0.recursion.time.avg=0.080698
thread0.recursion.time.median=0.0325689
thread0.tcpusage=0
thread1.num.queries=1309
thread1.num.queries_ip_ratelimited=0
thread1.num.cachehits=342
thread1.num.cachemiss=967
thread1.num.prefetch=132
thread1.num.zero_ttl=0
thread1.num.recursivereplies=967
thread1.requestlist.avg=0.374886
thread1.requestlist.max=9
thread1.requestlist.overwritten=0
thread1.requestlist.exceeded=0
thread1.requestlist.current.all=0
thread1.requestlist.current.user=0
thread1.recursion.time.avg=0.075309
thread1.recursion.time.median=0.0322503
thread1.tcpusage=0
thread2.num.queries=1338
thread2.num.queries_ip_ratelimited=0
thread2.num.cachehits=336
thread2.num.cachemiss=1002
thread2.num.prefetch=156
thread2.num.zero_ttl=0
thread2.num.recursivereplies=1002
thread2.requestlist.avg=0.360104
thread2.requestlist.max=9
thread2.requestlist.overwritten=0
thread2.requestlist.exceeded=0
thread2.requestlist.current.all=0
thread2.requestlist.current.user=0
thread2.recursion.time.avg=0.073632
thread2.recursion.time.median=0.031425
thread2.tcpusage=0
thread3.num.queries=1258
thread3.num.queries_ip_ratelimited=0
thread3.num.cachehits=339
thread3.num.cachemiss=919
thread3.num.prefetch=127
thread3.num.zero_ttl=0
thread3.num.recursivereplies=919
thread3.requestlist.avg=0.315488
thread3.requestlist.max=9
thread3.requestlist.overwritten=0
thread3.requestlist.exceeded=0
thread3.requestlist.current.all=0
thread3.requestlist.current.user=0
thread3.recursion.time.avg=0.073834
thread3.recursion.time.median=0.0308651
thread3.tcpusage=0
total.num.queries=5199
total.num.queries_ip_ratelimited=0
total.num.cachehits=1344
total.num.cachemiss=3855
total.num.prefetch=549
total.num.zero_ttl=0
total.num.recursivereplies=3855
total.requestlist.avg=0.34673
total.requestlist.max=9
total.requestlist.overwritten=0
total.requestlist.exceeded=0
total.requestlist.current.all=0
total.requestlist.current.user=0
total.recursion.time.avg=0.075873
total.recursion.time.median=0.0317773
total.tcpusage=0
time.now=1613041718.040611
time.up=14305.501526
time.elapsed=14305.501526
thread0,1,etc..are the cores but my interest is only the total.
I assume when you say without variables, you mean without variables in the shell.
With this in mind, you can use awk variables to store the intermediate result:
sudo unbound-control stats_noreset | awk -F '=' '$1 == "total.num.queries" {queries=$NF} $1 == "total.num.cachehits" {hits=$NF}END{print hits, hits/queries*100"%"}'
or in a more readable multi-line format:
sudo unbound-control stats_noreset |
awk -F '=' '$1 == "total.num.queries" { queries = $NF }
$1 == "total.num.cachehits" { hits = $NF }
END { print hits, hits / queries * 100 "%" }'
The output is:
1344 25.8511%
If you need only a single decimal place in the output, you can use printf, like
END { printf "%d %.1f%%\n", hits, hits / queries * 100 }

Error "awk: too many output files 10" when splitting SSL certificates

I'm trying to split a file that contains multiple SSL certificates with AWK but is showing an error message:
awk: too many output files 10
Command that I'm using is the following:
cat ${SSL_CERTIFICATES_PATH} | awk '/BEGIN/ { i++; } /BEGIN/, /END/ { print > i ".extracted.crt" }'
Error Message:
awk: too many output files 10
record number 735
Do you know how could I solve this issue?
You have to close() file,
awk '/BEGIN/ {f=i++".extracted.crt"}/BEGIN/,/END/{print > f;if(/END/)close(f)}'
The Best solution as suggested by Ed Morton, one should not use range expressions, for more details Read Here
awk '/BEGIN/{f=(++i)".extracted.crt"} f{print>f} /END/{close(f);f=""}'
Here is sample (not certificate)
Input
$ cat file
BEGIN
1
END
BEGIN
2
END
BEGIN
3
END
Execution
$ awk '/BEGIN/{f=i++".extracted.crt"}/BEGIN/,/END/{print > f;if(/END/)close(f)}' file
$ awk '/BEGIN/{f=(++i)".extracted.crt"} f{print>f} /END/{close(f);f=""}' file
Output files
$ ls *.crt
0.extracted.crt 1.extracted.crt 2.extracted.crt
File contents of each
$ for i in *.crt; do echo $i; cat $i; done
0.extracted.crt
BEGIN
1
END
1.extracted.crt
BEGIN
2
END
2.extracted.crt
BEGIN
3
END
We have to close the files each time variable i's value gets increases by 1, so try following and let me know if this helps you.
awk '/BEGIN/ {close(i".extracted.crt");i++} /BEGIN/, /END/ { print > i".extracted.crt" }' ${SSL_CERTIFICATES_PATH}
EDIT: Xavier, I have checked with a friend who has SUN 5 with him and following worked well without any error. You could put variable as per your need.
/usr/xpg4/bin/awk '/BEGIN/ {close(i".extracted.crt");i++} /BEGIN/, /END/ { print > i".extracted.crt" }' *.crt

Splitting of Big File into Smaller Chunks in Shell Scripting

I need to split the bigger file into smaller chunks based on the last occurrence of the pattern in the bigger file using shell script. For eg.
Sample.txt ( File will be sorted based on the third field on which pattern to be searched )
NORTH EAST|0004|00001|Fost|Weaather|<br/>
NORTH EAST|0004|00001|Fost|Weaather|<br/>
SOUTH|0003|00003|Haet|Summer|<br/>
SOUTH|0003|00003|Haet|Summer|<br/>
SOUTH|0003|00003|Haet|Summer|<br/>
EAST|0007|00016|uytr|kert|<br/>
EAST|0007|00016|uytr|kert|<br/>
WEST|0002|00112|WERT|fersg|<br/>
WEST|0002|00112|WERT|fersg|<br/>
SOUTHWEST|3456|01134|GDFSG|EWRER|<br/>
"Pattern 1 = 00003 " to be searched output file must contain sample_00003.txt
NORTH EAST|0004|00001|Fost|Weaather|<br/>
NORTH EAST|0004|00001|Fost|Weaather|<br/>
SOUTH|0003|00003|Haet|Summer|<br/>
SOUTH|0003|00003|Haet|Summer|<br/>
SOUTH|0003|00003|Haet|Summer|<br/>
"Pattren 2 = 00112" to be searched output file must contain sample_00112.txt
EAST|0007|00016|uytr|kert|<br/>
EAST|0007|00016|uytr|kert|<br/>
WEST|0002|00112|WERT|fersg|<br/>
WEST|0002|00112|WERT|fersg|<br/>
Used
awk -F'|' -v 'pattern="00003"' '$3~pattern big_file' > smallfile
and grep commands but it was very time consuming since file is 300+ MB of size.
Not sure if you'll find a faster tool than awk, but here's a variant that fixes your own attempt and also speeds things up a little by using string matching rather than regex matching.
It processes lookup values in a loop, and outputs everything from where the previous iteration left off through the last occurrence of the value at hand to a file named smallfile<n>, where <n> is an index starting with 1.
ndx=0; fromRow=1
for val in '00003' '00112' '|'; do # 2 sample values to match, plus dummy value
chunkFile="smallfile$(( ++ndx ))"
fromRow=$(awk -F'|' -v fromRow="$fromRow" -v outFile="$chunkFile" -v val="$val" '
NR < fromRow { next }
{ if ($3 != val) { if (p) { print NR; exit } } else { p=1 } } { print > outFile }
' big_file)
done
Note that dummy value | ensures that any remaining rows after the last true value to match are saved to a chunk file too.
Note that moving all the logic into a single awk script should be much faster, because big_file would only have to be read once:
awk -F'|' -v vals='00003|00112' '
BEGIN { split(vals, val); outFile="smallfile" ++ndx }
{
if ($3 != val[ndx]) {
if (p) { p=0; close(outFile); outFile="smallfile" ++ndx }
} else {
p=1
}
print > outFile
}
' big_file
You can try with Perl:
perl -ne '/00003/ && print' big_file > small_file
and compare its timing with other solutions...
EDIT
Limiting my answer to the tools you didn't try already... you can also use:
sed -n '/00003/p' big_file > small_file
But I tend to believe perl will be faster. Again... I'd suggest you to measure the elapsed for different solutions on your own.

Change value from specific line and column given by user UNIX [duplicate]

This question already has an answer here:
replace columns UNIX with awk
(1 answer)
Closed 6 years ago.
I have this file:
16492674422392|Alberto|Parra|female|1985-09-22|2012-09-01T01:30:59.228+0000|190.96.12.239|Chrome
> 16492674424948|Peng|Chen|female|1984-07-26|2012-09-23T00:51:52.900+0000|1.4.10.198|Internet
> Explorer
> 16492674425075|Changpeng|Xu|female|1984-03-27|2012-10-02T03:55:00.946+0000|1.50.15.119|Firefox
> 16492674425398|Prince|Kobayashi|male|1989-08-07|2012-09-30T03:30:41.772+0000|14.101.89.18|Chrome
> 16492674426410|Yang|Wei|male|1980-07-01|2012-10-01T13:11:48.528+0000|27.144.204.193|Firefox
I want the user to:
choose an id (id is the first column)
choose a column and
change the value to one value chosen by the user.
I use:
./tool.sh 16492674426410 3 replacement
as the inputs, and the code I run is:
awk -v antik1=$1 -v antik2=$2 '
{
sub(antik1, antik2);
print;
}' persons.dat.txt
This script doesn't let the user choose the column and id. How can I modify it so it works as I want?
Give this tested version a try:
#!/bin/bash --
awk -v anid="${1}" -v antik1="${2}" -v antik2="${3}" '
BEGIN {
FS="|";
OFS="|";
}
{
if ($1 == anid) {
$antik1=antik2;
}
print;
}' persons.dat.txt
The test:
$ ./tool.sh 16492674426410 3 replacement
16492674422392|Alberto|Parra|female|1985-09-22|2012-09-01T01:30:59.228+0000|190.96.12.239|Chrome
16492674424948|Peng|Chen|female|1984-07-26|2012-09-23T00:51:52.900+0000|1.4.10.198|Internet Explorer
16492674425075|Changpeng|Xu|female|1984-03-27|2012-10-02T03:55:00.946+0000|1.50.15.119|Firefox
16492674425398|Prince|Kobayashi|male|1989-08-07|2012-09-30T03:30:41.772+0000|14.101.89.18|Chrome
16492674426410|Yang|replacement|male|1980-07-01|2012-10-01T13:11:48.528+0000|27.144.204.193|Firefox

Find nth row using AWK and assign them to a variable

Okay, I have two files: one is baseline and the other is a generated report. I have to validate a specific string in both the files match, it is not just a single word see example below:
.
.
name os ksd
56633223223
some text..................
some text..................
My search criteria here is to find unique number such as "56633223223" and retrieve above 1 line and below 3 lines, i can do that on both the basefile and the report, and then compare if they match. In whole i need shell script for this.
Since the strings above and below are unique but the line count varies, I had put it in a file called "actlist":
56633223223 1 5
56633223224 1 6
56633223225 1 3
.
.
Now from below "Rcount" I get how many iterations to be performed, and in each iteration i have to get ith row and see if the word count is 3, if it is then take those values into variable form and use something like this
I'm stuck at the below, which command to be used. I'm thinking of using AWK but if there is anything better please advise. Here's some pseudo-code showing what I'm trying to do:
xxxxx=/root/xxx/xxxxxxx
Rcount=`wc -l $xxxxx | awk -F " " '{print $1}'`
i=1
while ((i <= Rcount))
do
record=_________________'(Awk command to retrieve ith(1st) record (of $xxxx),
wcount=_________________'(Awk command to count the number of words in $record)
(( i=i+1 ))
done
Note: record, wcount values are later printed to a log file.
Sounds like you're looking for something like this:
#!/bin/bash
while read -r word1 word2 word3 junk; do
if [[ -n "$word1" && -n "$word2" && -n "$word3" && -z "$junk" ]]; then
echo "all good"
else
echo "error"
fi
done < /root/shravan/actlist
This will go through each line of your input file, assigning the three columns to word1, word2 and word3. The -n tests that read hasn't assigned an empty value to each variable. The -z checks that there are only three columns, so $junk is empty.
I PROMISE you you are going about this all wrong. To find words in file1 and search for those words in file2 and file3 is just:
awk '
NR==FNR{ for (i=1;i<=NF;i++) words[$i]; next }
{ for (word in words) if ($0 ~ word) print FILENAME, word }
' file1 file2 file3
or similar (assuming a simple grep -f file1 file2 file3 isn't adequate). It DOES NOT involve shell loops to call awk to pull out strings to save in shell variables to pass to other shell commands, etc, etc.
So far all you're doing is asking us to help you implement part of what you think is the solution to your problem, but we're struggling to do that because what you're asking for doesn't make sense as part of any kind of reasonable solution to what it sounds like your problem is so it's hard to suggest anything sensible.
If you tells us what you are trying to do AS A WHOLE with sample input and expected output for your whole process then we can help you.
We don't seem to be getting anywhere so let's try a stab at the kind of solution I think you might want and then take it from there.
Look at these 2 files "old" and "new" side by side (line numbers added by the cat -n):
$ paste old new | cat -n
1 a b
2 b 56633223223
3 56633223223 c
4 c d
5 d h
6 e 56633223225
7 f i
8 g Z
9 h k
10 56633223225 l
11 i
12 j
13 k
14 l
Now lets take this "actlist":
$ cat actlist
56633223223 1 2
56633223225 1 3
and run this awk command on all 3 of the above files (yes, I know it could be briefer, more efficient, etc. but favoring simplicity and clarity for now):
$ cat tst.awk
ARGIND==1 {
numPre[$1] = $2
numSuc[$1] = $3
}
ARGIND==2 {
oldLine[FNR] = $0
if ($0 in numPre) {
oldHitFnr[$0] = FNR
}
}
ARGIND==3 {
newLine[FNR] = $0
if ($0 in numPre) {
newHitFnr[$0] = FNR
}
}
END {
for (str in numPre) {
if ( str in oldHitFnr ) {
if ( str in newHitFnr ) {
for (i=-numPre[str]; i<=numSuc[str]; i++) {
oldFnr = oldHitFnr[str] + i
newFnr = newHitFnr[str] + i
if (oldLine[oldFnr] != newLine[newFnr]) {
print str, "mismatch at old line", oldFnr, "new line", newFnr
print "\t" oldLine[oldFnr], "vs", newLine[newFnr]
}
}
}
else {
print str, "is present in old file but not new file"
}
}
else if (str in newHitFnr) {
print str, "is present in new file but not old file"
}
}
}
.
$ awk -f tst.awk actlist old new
56633223225 mismatch at old line 12 new line 8
j vs Z
It's outputing that result because the 2nd line after 56633223225 is j in file "old" but Z in file "new" and the file "actlist" said the 2 files had to be common from one line before until 3 lines after that pattern.
Is that what you're trying to do? The above uses GNU awk for ARGIND but the workaround is trivial for other awks.
Use the below code:
awk '{if (NF == 3) { word1=$1; word2=$2; word3=$3; print "Words are:" word1, word2, word3} else {print "Line", NR, "is having", NF, "Words" }}' filename.txt
I have given the solution as per the requirement.
awk '{ # awk starts from here and read a file line by line
if (NF == 3) # It will check if current line is having 3 fields. NF represents number of fields in current line
{ word1=$1; # If current line is having exact 3 fields then 1st field will be assigned to word1 variable
word2=$2; # 2nd field will be assigned to word2 variable
word3=$3; # 3rd field will be assigned to word3 variable
print word1, word2, word3} # It will print all 3 fields
}' filename.txt >> output.txt # THese 3 fields will be redirected to a file which can be used for further processing.
This is as per the requirement, but there are many other ways of doing this but it was asked using awk.

Resources