Edit certain variables in a file using bash script - bash

Below i have the following SES.ses file:
1 VERSION_CONTROL {
2 FILE_NAME = "C:/Users/kjbaili/Documents/SWCD_TEST/SES.ses";
3 DATE = "";
4 VERSION = "1.1";
5 AUTHOR = "";
6 }
7
8 DISPLAYS {
9 DISPLAY xxx-c-vm-0120:main = {
10 COMPUTER = "xxx-C-VM-0120";
11 DISPLAY = "main";
12 ITEMS {
13 PANEL {
14 name = "visu.pnl";
15 }
16 }
17 }
18 }
19
20 RT-HOSTS {
21 RT-HOST xxx-c-vm-0120 = {
22 COMPONENT {
23 name = "RTE_connections_xxxxx.cmp";
24 }
25 COMPONENT {
26 name = "xxxx.cmp";
27 }
28 }
29 RT-HOST xxx-c-agx-0003 = {
30 COMPONENT {
31 name = "CtApxxx.cmp";
32 }
33 COMPONENT {
34 name = "CtApxxx.cmp";
35 }
36 COMPONENT {
37 name = "CtApxxx.cmp";
38 }
39 }
40 }
41
42 HARDWARE {
43 }
The user enters three Inputs to be placed in lines 2, 9, 21 and 29 -> /userpath, DISPLAY Node0, RT-HOSTS Node0, and RT-HOSTS Node1 accordingly.
I'm trying to edit variables in this file based on above user's input. These are : FILE_NAME (line 2), DISPLAY (line 9), RT-HOSTS ( line 21 and 29)
After doing some research i could build the following:
currentPath=$(pwd)/SES
awk -v path="$currentPath" '/FILE_NAME/ {cnt++} /FILE_NAME/ && cnt==1 {lnee=gensub(/(^.*\")(.*)(\".*$)/,"\\1"path"\\3",$0);print lnee; next}1' SES.ses > SES.temp && mv -f SES.tmp SES.ses
This command supposes to find the first entry for FILE_NAME and set the variable currentPath to it. However i'm getting the following error:
awk: cmd. line:1: warning: regexp escape sequence `\"' is not a known regexp operator
mv: cannot stat 'SES.tmp': No such file or directory
So my question is how to solve this error and how to set the other three variables line 9, 21 and 29
Thank in advance and would really appriciate your help
suggested solution from #Ed Morton:
awk -v filename='foo' -v display='bar' -v rthosts='some others' 'BEGIN { numRth = split(rthosts,rths) }
(filename != "") && ($1 == "FILE_NAME") { newval=filename; filename="" }
(display != "") && ($1 == "DISPLAY") { newval=display; display="" }
(numRth in rths) && ($1 == "RT-HOST") { newval=rths[++c]; delete rths[c] }
newval != "" {
oldval = ( $2 == "=" ? $3 : $2 )
gsub(/^[^"]*"|"[^"]*$/,"",oldval)
pos = index($0,oldval)
$0 = substr($0,1,pos-1) newval substr($0,pos+length(oldval))
newval = ""
}
{ print }' SES.ses
output of cat -Ev SES.ses
$
VERSION_CONTROL {$
FILE_NAME = "/c/Users/kjbaili/Documents/DO_NOT_DELETE/SES";$
DATE = "";$
VERSION = "1.1";$
AUTHOR = "";$
}$
$
DISPLAYS {$
DISPLAY d = {$
COMPUTER = "FDT-C-VM-0120";$
DISPLAY = "main";$
ITEMS {$
PANEL {$
name = "visu.pnl";$
}$
}$
}$
}$
$
RT-HOSTS {$
RT-HOST v = {$
COMPONENT {$
name = "RTE_connections_CtCoFallbackPath.cmp";$
}$
COMPONENT {$
name = "CtGwHwpFbpCmp.cmp";$
}$
}$
RT-HOST v = {$
COMPONENT {$
name = "CtApHwpFbpSit.cmp";$
}$
COMPONENT {$
name = "CtApHwpFbpMpl.cmp";$
}$
COMPONENT {$
name = "CtApHwpFbpCVGen.cmp";$
}$
}$
}$
$
HARDWARE {$
}$

The 3rd argument to the gensub() function must be a count of replacement
such as 1 or g (global).
Would you please try instead:
#!/bin/bash
# user's inputs
read -p "FILE_NAME: " -r file_name
read -p "DISPLAY: " -r display
read -p "RT-HOST: " -r rt_host
awk -v file_name="$file_name" -v display="$display" -v rt_host="$rt_host" '
{
sub(/FILE_NAME *= *\"[^"]+/, "FILE_NAME = \"" file_name)
sub(/DISPLAY *[^:]+:/, "DISPLAY " display ":")
}
/RT-HOST / {
if (! count++)
sub(/RT-HOST *[^[:space:]]+ *=/, "RT-HOST " display " =")
else
sub(/RT-HOST *[^[:space:]]+ *=/, "RT-HOST " rt_host " =")
}
1
' SES.ses > SES.tmp && mv -f -- SES.tmp SES.ses

You're getting the warning (not error) message escape sequence '\"' is not a known regexp operator because " isn't a regexp metachar, it's just a plain old literal char like x, but in your regexp (^.*\")(.*)(\".*$) you've written \" so either:
you're trying to escape " when it doesn't need to be escaped
or,
you're trying to include a literal \ in your regexp but
you'd need to escape it as \\ to do that.
So either way something is wrong with your regexp so awk is warning you about it.
I THINK this is probably what you're trying to do:
$ cat tst.awk
BEGIN { numRth = split(rthosts,rths) }
(filename != "") && ($1 == "FILE_NAME") { newval=filename; filename="" }
(display != "") && ($1 == "DISPLAY") { newval=display; display="" }
(numRth in rths) && ($1 == "RT-HOST") { newval=rths[++c]; delete rths[c] }
newval != "" {
oldval = ( $2 == "=" ? $3 : $2 )
gsub(/^[^"]*"|"[^"]*$/,"",oldval)
pos = index($0,oldval)
$0 = substr($0,1,pos-1) newval substr($0,pos+length(oldval))
newval = ""
}
{ print }
$ awk -v filename='foo' -v display='bar' -v rthosts='some others' -f tst.awk file
VERSION_CONTROL {
FILE_NAME = "foo";
DATE = "";
VERSION = "1.1";
AUTHOR = "";
}
DISPLAYS {
DISPLAY bar = {
COMPUTER = "xxx-C-VM-0120";
DISPLAY = "main";
ITEMS {
PANEL {
name = "visu.pnl";
}
}
}
}
RT-HOSTS {
RT-HOST some = {
COMPONENT {
name = "RTE_connections_xxxxx.cmp";
}
COMPONENT {
name = "xxxx.cmp";
}
}
RT-HOST others = {
COMPONENT {
name = "CtApxxx.cmp";
}
COMPONENT {
name = "CtApxxx.cmp";
}
COMPONENT {
name = "CtApxxx.cmp";
}
}
}
HARDWARE {
}

If ed is available/acceptable.
This should show the replacement.
#!/usr/bin/env bash
file_name=foobar
display=barmore
rt_host=quxfux
ed -s SES.ses <<-EOF
2,/FILE_NAME/s/".\{1,\}"/"$file_name"/
9s|^\([[:blank:]]*DISPLAY\).\{1,\}\(=[[:blank:]]*{\$\)|\1 $display \2|
21,29s|^\([[:blank:]]*RT-HOST\).\{1,\}\(=[[:blank:]]*{\$\)|\1 $rt_host \2|
,p
Q
EOF
Just like what #tshiono did using the builtin read to get the user's input
#!/usr/bin/env bash
# user's inputs
read -p "FILE_NAME: " file_name
read -p "DISPLAY: " display
read -p "RT-HOST: " rt_host
ed -s SES.ses <<-EOF
2,/FILE_NAME/s/".\{1,\}"/"$file_name"/
9s|^\([[:blank:]]*DISPLAY\).\{1,\}\(=[[:blank:]]*{\$\)|\1 $display \2|
21,29s|^\([[:blank:]]*RT-HOST\).\{1,\}\(=[[:blank:]]*{\$\)|\1 $rt_host \2|
,p
Q
EOF
The ,p is there just show what is the output of the newly edited buffer, remove it to silence the output.
Change Q to w to edit the file-inplace

Related

Validate log file using shell

I want to validate a log file based on a reference file, I worked on a script but, it is not beautiful and is not optimal:
For each line I want to check the value of the fields,
- the field 7 equal to 1 I have to check columns 16 and 17
- the field 7 equal to 2 I have to check columns 25 and 27 and 30
- the field 7 equal to 3 I have to check columns 18 and 24 and 31
etc..
#!/bin/bash
LOG=SMS.log
awk -F\| ' {s=""}
$4!=0 {printf "API has wrong value"; s="; " }
$8=="" { printf "%sApplicationID is empty", s; s="; " }
$9=="" { printf "%shttp request method is empty", s; s="; " }
$7=="" { printf "%sOperationID is empty", s; s="; " }
$13 !~ /0|1|2/ {printf "%sresult(0,1,2) has a wrong value", s; s="; " }
# 1:create SMS
$7=="1" && $18=="" {printf "%sSender is missing", s; s="; " }
$7=="1" && $18 ~ /\/tel\:\+\*\*/ {printf "%sSender is cyphred !", s; s="; " }
$7=="1" && $20=="" {printf "%sAddress is missing", s; s="; " }
$7=="1" && $20 ~ /\/tel\:\+[0-9]/ {printf "%sAddress(es) is not cyphred", s; s="; " }
$7=="1" && $10 ~ /\/tel\:\+\*\*/ {printf "%sSender is cyphred on URI !", s; s="; " }
## 2:subscribe
$7=="2" && $25=="" {printf "%sdestination is missing", s; s="; " }
$7=="2" && $16=="201" && $27="" {printf "%sresourceId is missing", s; s="; "}
#3:unsubscribe
$7=="2" && $16=="201" && $25="" {printf "%sresource is missing", s; s="; "}
s { printf "\n"}
s
{printf "\n"}
' $LOG
Is it possible to update the code to be more optimal and beautiful.
Output:
Application is empty; Operation is empty; Http request method is empty
83ac|EDR|V1|0|V1|2019-05-14|7||||2019-05-14T08:00:42.758Z|8|0|||||XXXXX|||||||||789|||||||||5945548f|||||
I'd do it like this:
awk -F'|' '
##### Error Detection
$4 != 0 { prtErr("ApiWrong") }
$8 == "" { prtErr("AppIdEmpty") }
$9 == "" { prtErr("HttpEmpty") }
$7 == "" { prtErr("OpIdEmpty") }
$13 !~ /[012]/ { prtErr("RsltBad") }
$7 == 1 { # 1:create SMS
if ( $18 == "" ) { prtErr("SndMiss") }
if ( $18 ~ /\/tel:\+\*\*/ ) { prtErr("SndCyph") }
if ( $20 == "" ) { prtErr("AddrMiss") }
if ( $20 ~ /\/tel:\+[0-9]/ ) { prtErr("AddrNotCyph") }
if ( $10 ~ /\/tel:\+\*\*/ ) { prtErr("SndCyphUri") }
}
$7 == 2 { # 2:subscribe
if ( $25 == "" ) { prtErr("DestMiss") }
if ( $16=="201" && $27=="" ) { prtErr("RsrcIdMiss") }
}
$7 == 3 { # 3:unsubscribe
if ( $16=="201" && $25=="" ) { prtErr("RsrcMiss") }
}
##### Error Reporting
function prtDbg(code,str) { if (doDebug) prtMsg("DEBUG",code,str) }
function prtTrc(code,str) { if (doTrace) prtMsg("TRACE",code,str) }
function prtWrn(code,str) { prtMsg("WARNING",code,str) }
function prtErr(code,str) { prtMsg("ERROR",code,str) }
function prtMsg(level, code, str, map, msg) {
map["ApiWrong"] = "API has wrong value"
map["AppIdEmpty"] = "ApplicationID is empty"
map["HttpEmpty"] = "http request method is empty"
map["OpIdEmpty"] = "OperationID is empty"
map["RsltBad"] = "result(0,1,2) has a wrong value"
map["SndMiss"] = "Sender is missing"
map["SndCyph"] = "Sender is cyphred !"
map["AddrMiss"] = "Address is missing"
map["AddrNotCyph" = "Address(es) is not cyphred"
map["SndCyphUri"] = "Sender is cyphred on URI !"
map["DestMiss"] = "destination is missing"
map["RsrcIdMiss"] = "resourceId is missing"
map["RsrcMiss"] = "resource is missing"
map["default"] = "Unknown error code"
msg = (code in map ? map[code] : map["default"])
printf "%s: %s[%d]: (%s) %s\n", level, FILENAME, FNR, code, msg | "cat>&2"
if ( str != "" ) {
printf "%s: %s[%d]:\t%s\n", $0 | "cat>&2"
}
}
' "$log"
That decouples the text being printed from the error indication and centralizes/instruments all error messages for a common look/feel and ability to add extra info if necessary and to de-clutter the code that's detecting the errors. I also showed how to separate errors, from warnings, etc. (you choose which is which in your code) and add tracing/debugging functions that you can all over the code if you like but to and they won't do anything till you set the relevant "do..." flag on the command line.
Update to just produce the specific output you asked for (untested):
BEGIN { FS="|" }
##### General processing including error detection
$4 != 0 { logErr("ApiWrong") }
$8 == "" { logErr("AppIdEmpty") }
$9 == "" { logErr("HttpEmpty") }
$7 == "" { logErr("OpIdEmpty") }
$13 !~ /[012]/ { logErr("RsltBad") }
$7 == 1 { # 1:create SMS
if ( $18 == "" ) { logErr("SndMiss") }
if ( $18 ~ /\/tel:\+\*\*/ ) { logErr("SndCyph") }
if ( $20 == "" ) { logErr("AddrMiss") }
if ( $20 ~ /\/tel:\+[0-9]/ ) { logErr("AddrNotCyph") }
if ( $10 ~ /\/tel:\+\*\*/ ) { logErr("SndCyphUri") }
}
$7 == 2 { # 2:subscribe
if ( $25 == "" ) { logErr("DestMiss") }
if ( $16=="201" && $27=="" ) { logErr("RsrcIdMiss") }
}
$7 == 3 { # 3:unsubscribe
if ( $16=="201" && $25=="" ) { logErr("RsrcMiss") }
}
{ prtErrs() }
##### Error reporting primitives
function logErr(code) { _errs[code] }
function prtErrs( code, map, msg, gotErrs, sep) {
for (code in _errs) {
gotErrs = 1
break
}
if (gotErrs) {
map["ApiWrong"] = "API has wrong value"
map["AppIdEmpty"] = "ApplicationID is empty"
map["HttpEmpty"] = "http request method is empty"
map["OpIdEmpty"] = "OperationID is empty"
map["RsltBad"] = "result(0,1,2) has a wrong value"
map["SndMiss"] = "Sender is missing"
map["SndCyph"] = "Sender is cyphred !"
map["AddrMiss"] = "Address is missing"
map["AddrNotCyph"] = "Address(es) is not cyphred"
map["SndCyphUri"] = "Sender is cyphred on URI !"
map["DestMiss"] = "destination is missing"
map["RsrcIdMiss"] = "resourceId is missing"
map["RsrcMiss"] = "resource is missing"
printf "%s: %s[%d]: ", "ERROR", FILENAME, FNR | "cat>&2"
for (code in _errs) {
msg = (code in map ? map[code] : "Unknown error code (" code ")")
printf "%s%s", sep, msg | "cat>&2"
sep = "; "
}
printf "\n%s\n", $0 | "cat>&2"
delete _errs
}
}
and if you have GNU awk for arrays of arrays and length(array) then I'd do it as:
BEGIN { FS="|" }
##### General processing including error detection
$4 != 0 { logErr("Wrong","API") }
$8 == "" { logErr("Empty","AppId") }
$9 == "" { logErr("Empty","Http request method") }
$7 == "" { logErr("Empty","OperationID") }
$13 !~ /[012]/ { logErr("Wrong","Result(0,1,2)") }
$7 == 1 { # 1:create SMS
if ( $18 == "" ) { logErr("Miss","Sender") }
if ( $18 ~ /\/tel:\+\*\*/ ) { logErr("Cyph","Sender") }
if ( $20 == "" ) { logErr("Miss","Address") }
if ( $20 ~ /\/tel:\+[0-9]/ ) { logErr("NotCyph","Address(es)") }
if ( $10 ~ /\/tel:\+\*\*/ ) { logErr("UriCyph","Sender") }
}
$7 == 2 { # 2:subscribe
if ( $25 == "" ) { logErr("Miss","Destination") }
if ( $16=="201" && $27=="" ) { logErr("Miss","ResourceId") }
}
$7 == 3 { # 3:unsubscribe
if ( $16=="201" && $25=="" ) { logErr("Miss","Resource") }
}
{ prtErrs() }
##### Error reporting primitives
function logErr(type,item) { _errs[type][item] }
function prtErrs( map, type, msg, item, sep) {
if ( length(_errs) ) {
map["Wrong"] = "has wrong value"
map["Empty"] = "is empty"
map["Miss"] = "is missing"
map["Cyph"] = "is cyphred !"
map["NotCyph"] = "is not cyphred"
map["UriCyph"] = "is cyphred on URI !"
printf "%s: %s[%d]: ", "ERROR", FILENAME, FNR | "cat>&2"
for (type in _errs) {
msg = (type in map ? map[type] : "Unknown error type (" type ")")
for (item in _errs[type]) {
printf "%s%s %s", sep, item, msg | "cat>&2"
sep = "; "
}
}
printf "\n%s\n", $0 | "cat>&2"
delete _errs
}
}
First thing you could do is get rid of the s variable.
#!/bin/bash
LOG=SMS.log
awk -F\| '
function add_error(message){
error = error OFS message
}
$4!=0 {add_error("API has wrong value")}
$8=="" {add_error("ApplicationID is empty")}
$9=="" {add_error("http request method is empty")}
$7=="" {add_error("OperationID is empty")}
$13 !~ /0|1|2/ {add_error("result(0,1,2) has a wrong value")}
# 1:create SMS
$7=="1" && $18=="" {add_error("Sender is missing")}
$7=="1" && $18 ~ /\/tel\:\+\*\*/ {add_error("Sender is cyphred !")}
$7=="1" && $20=="" {add_error("Address is missing")}
$7=="1" && $20 ~ /\/tel\:\+[0-9]/ {add_error("Address(es) is not cyphred")}
$7=="1" && $10 ~ /\/tel\:\+\*\*/ {add_error("Sender is cyphred on URI !")}
## 2:subscribe
$7=="2" && $25=="" {add_error("destination is missing")}
$7=="2" && $16=="201" && $27="" {add_error("resourceId is missing")}
#3:unsubscribe
$7=="2" && $16=="201" && $25="" {add_error("resource is missing")}
{
print substr(error, length(OFS)+1); #Works even if error is empty
error = "";
}
' OFS="; " $LOG
I think that it is a bit strange to analyze your log file and create ... a new log file. Why don't you create a csv with 1 column per error and 1/0 values for each line/error ? Your result would be much more easier to analyze and would contains all the informations you need.

How to write a script that searches for numeric pattern in huge file?

I have 200000 integers written in a file like this
0
1
2
3
.
98
99
.
.
100
101
102
.
I want to write with awk or join script that would tell how many times this pattern(from 0 to 99 )repeats itself.
Not battle tested:
awk 'i++!=$0{i=$0==0?1:0}i==100{c++;i=0}END{print c}' p.txt
Breakdown:
i++ != $0 { # Use a cursor (i) which will be compared to input
i=$0==0?1:0; # If not matched reset cursor if current line is zero then set to 1 because
# .. this means we already matched our first line. If not set to 0
i == 100 { # If Full pattern found:
c++; # add to count
i=0; # reset cursor
}
END {print c} # Print matched count
You can do this using a state variable which is reset anytime the pattern is incomplete. For example:
#!/usr/bin/awk -f
BEGIN {
state = -1;
count = 0;
}
/^[0-9]+$/ {
if ( $0 == ( state + 1 ) || $0 == 0 ) {
state = $0;
if ( state == 99 ) {
count++;
}
} else {
state = -1;
}
next;
}
{ state = -1; next; }
END {
print count;
}
This script assumes awk is in /usr/bin (the usual case). You would put the script in a file, e.g., "patterns", and run it like
./patterns < p.txt

Substituting variables in a text string

I have a text string in a variable in bash which looks like this:
filename1.txt
filename2.txt
varname1 = v1value
$(varname1)/filename3.txt
$(varname1)/filename4.txt
varname2 = $(varname1)/v2value
$(varname2)/filename5.txt
$(varname2)/filename6.txt
I want to substitute all of the variables in place, producing this:
filename1.txt
filename2.txt
v1value/filename3.txt
v1value/filename4.txt
v1value/v2value/filename5.txt
v1value/v2value/filename6.txt
Can anyone suggest a clean way to do this in the shell?
In awk:
BEGIN {
FS = "[[:space:]]*=[[:space:]]*"
}
NF > 1 {
map[$1] = $2
next;
}
function replace( count)
{
for (key in map) {
count += gsub("\\$\\("key"\\)", map[key])
}
return count
}
{
while (replace() > 0) {}
print
}
In lua:
local map = {}
--for line in io.lines("file.in") do -- To read from a file.
for line in io.stdin:lines() do -- To read from standard input.
local key, value = line:match("^(%w*)%s*=%s*(.*)$")
if key then
map[key] = value
else
local count
while count ~= 0 do
line, count = line:gsub("%$%(([^)]*)%)", map)
end
print(line)
end
end
I found a reasonable solution using m4:
function make_substitutions() {
# first all $(varname)s are replaced with ____varname____
# then each assignment statement is replaced with an m4 define macro
# finally this text is then passed through m4
echo "$1" |\
sed 's/\$(\([[:alnum:]][[:alnum:]]*\))/____\1____/' | \
sed 's/ *\([[:alnum:]][[:alnum:]]*\) *= *\(..*\)/define(____\1____, \2)/' | \
m4
}
Perhaps
echo "$string" | perl -nlE 'm/(\w+)\s*=\s*(.*)(?{$h{$1}=$2})/&&next;while(m/\$\((\w+)\)/){$x=$1;s/\$\($x\)/$h{$x}/e};say$_'
prints
filename1.txt
filename2.txt
v1value/filename3.txt
v1value/filename4.txt
v1value/v2value/filename5.txt
v1value/v2value/filename6.txt

How to use awk or anything else to number of shared x values of 2 different y values in a csv file consists of column a and b?

Let me be specific. We have a csv file consists of 2 columns x and y like this:
x,y
1h,a2
2e,a2
4f,a2
7v,a2
1h,b6
4f,b6
4f,c9
7v,c9
...
And we want to count how many shared x values two y values have, which means we want to get this:
y1,y2,share
a2,b6,2
a2,c9,2
b6,c9,1
And b6,a2,2 should not show up. Does anyone know how to do this by awk? Or anything else?
Thx ahead!
Try this executable awk script:
#!/usr/bin/awk -f
BEGIN {FS=OFS=","}
NR==1 { print "y1" OFS "y2" OFS "share" }
NR>1 {last=a[$1]; a[$1]=(last!=""?last",":"")$2}
END {
for(i in a) {
cnt = split(a[i], arr, FS)
if( cnt>1 ) {
for(k=1;k<cnt;k++) {
for(i=2;i<=cnt;i++) {
if( arr[k] != arr[i] ) {
key=arr[k] OFS arr[i]
if(out[key]=="") {order[++ocnt]=key}
out[key]++
}
}
}
}
}
for(i=1;i<=ocnt;i++) {
print order[i] OFS out[order[i]]
}
}
When put into a file called awko and made executable, running it like awko data yields:
y1,y2,share
a2,b6,2
a2,c9,2
b6,c9,1
I'm assuming the file is sorted by y values in the second column as in the question( after the header ). If it works for you, I'll add some explanations tomorrow.
Additionally for anyone who wants more test data, here's a silly executable awk script for generating some data similar to what's in the question. Makes about 10K lines when run like gen.awk.
#!/usr/bin/awk -f
function randInt(max) {
return( int(rand()*max)+1 )
}
BEGIN {
a[1]="a"; a[2]="b"; a[3]="c"; a[4]="d"; a[5]="e"; a[6]="f"
a[7]="g"; a[8]="h"; a[9]="i"; a[10]="j"; a[11]="k"; a[12]="l"
a[13]="m"; a[14]="n"; a[15]="o"; a[16]="p"; a[17]="q"; a[18]="r"
a[19]="s"; a[20]="t"; a[21]="u"; a[22]="v"; a[23]="w"; a[24]="x"
a[25]="y"; a[26]="z"
print "x,y"
for(i=1;i<=26;i++) {
amultiplier = randInt(1000) # vary this to change the output size
r = randInt(amultiplier)
anum = 1
for(j=1;j<=amultiplier;j++) {
if( j == r ) { anum++; r = randInt(amultiplier) }
print a[randInt(26)] randInt(5) "," a[i] anum
}
}
}
I think if you can get the input into a form like this, it's easy:
1h a2 b6
2e a2
4f a2 b6 c9
7v a2 c9
In fact, you don't even need the x value. You can convert this:
a2 b6
a2
a2 b6 c9
a2 c9
Into this:
a2,b6
a2,b6
a2,c9
a2,c9
That output can be sorted and piped to uniq -c to get approximately the output you want, so we only need to think much about how to get from your input to the first and second states. Once we have those, the final step is easy.
Step one:
sort /tmp/values.csv \
| awk '
BEGIN { FS="," }
{
if (x != $1) {
if (x) print values
x = $1
values = $2
} else {
values = values " " $2
}
}
END { print values }
'
Step two:
| awk '
{
for (i = 1; i < NF; ++i) {
for (j = i+1; j <= NF; ++j) {
print $i "," $j
}
}
}
'
Step three:
| sort | awk '
BEGIN {
combination = $0
print "y1,y2,share"
}
{
if (combination == $0) {
count = count + 1
} else {
if (count) print combination "," count
count = 1
combination = $0
}
}
END { print combination "," count }
'
This awk script does the job:
BEGIN { FS=OFS="," }
NR==1 { print "y1","y2","share" }
NR>1 { ++seen[$1,$2]; ++x[$1]; ++y[$2] }
END {
for (y1 in y) {
for (y2 in y) {
if (y1 != y2 && !(y2 SUBSEP y1 in c)) {
for (i in x) {
if (seen[i,y1] && seen[i,y2]) {
++c[y1,y2]
}
}
}
}
}
for (key in c) {
split(key, a, SUBSEP)
print a[1],a[2],c[key]
}
}
Loop through the input, recording both the original elements and the combinations. Once the file has been processed, look at each pair of y values. The if statement does two things: it prevents equal y values from being compared and it saves looping through the x values twice for every pair. Shared values are stored in c.
Once the shared values have been aggregated, the final output is printed.
This sed script does the trick:
#!/bin/bash
echo y1,y2,share
x=$(wc -l < file)
b=$(echo "$x -2" | bc)
index=0
for i in $(eval echo "{2..$b}")
do
var_x_1=$(sed -n ''"$i"p'' file | sed 's/,.*//')
var_y_1=$(sed -n ''"$i"p'' file | sed 's/.*,//')
a=$(echo "$i + 1" | bc)
for j in $(eval echo "{$a..$x}")
do
var_x_2=$(sed -n ''"$j"p'' file | sed 's/,.*//')
var_y_2=$(sed -n ''"$j"p'' file | sed 's/.*,//')
if [ "$var_x_1" = "$var_x_2" ] ; then
array[$index]=$var_y_1,$var_y_2
index=$(echo "$index + 1" | bc)
fi
done
done
counter=1
for (( k=1; k<$index; k++ ))
do
if [ ${array[k]} = ${array[k-1]} ] ; then
counter=$(echo "$counter + 1" | bc)
else
echo ${array[k-1]},$counter
counter=1
fi
if [ "$k" = $(echo "$index-1"|bc) ] && [ $counter = 1 ]; then
echo ${array[k]},$counter
fi
done

Parsing iw wlan0 scan output

I wrote wlan manager script to handle open/ad-hoc/wep/wpa2 networks. Now im trying to parse iw wlan0 scan output to get nice scan feature to my script. My goal is to get output like this :
SSID channel signal encryption
wlan-ap 6 70% wpa2-psk
test 1 55% wep
What i have achived already is output like this :
$ iw wlan0 scan | grep 'SSID\|freq\|signal\|capability' | tac
SSID: Koti783
signal: -82.00 dBm
capability: ESS Privacy ShortPreamble SpectrumMgmt ShortSlotTime (0x0531)
freq: 2437
I have been trying to study bash/sed/awk but havent found yet a way to achieve what im trying. So what is good way to achieve that?
Here is my final solution based of Sudo_O answer:
$1 == "BSS" {
MAC = $2
wifi[MAC]["enc"] = "Open"
}
$1 == "SSID:" {
wifi[MAC]["SSID"] = $2
}
$1 == "freq:" {
wifi[MAC]["freq"] = $NF
}
$1 == "signal:" {
wifi[MAC]["sig"] = $2 " " $3
}
$1 == "WPA:" {
wifi[MAC]["enc"] = "WPA"
}
$1 == "WEP:" {
wifi[MAC]["enc"] = "WEP"
}
END {
printf "%s\t\t%s\t%s\t\t%s\n","SSID","Frequency","Signal","Encryption"
for (w in wifi) {
printf "%s\t\t%s\t\t%s\t%s\n",wifi[w]["SSID"],wifi[w]["freq"],wifi[w]["sig"],wifi[w]["enc"]
}
}'
Output:
$ sudo iw wlan0 scan | awk -f scan.awk
SSID Frequency Signal Encryption
netti 2437 -31.00 dBm Open
Koti783 2437 -84.00 dBm WPA
WLAN-AP 2462 -85.00 dBm WPA
it's generally bad practice to try parsing complex output of programs intended for humans to read (rather than machines to parse).
e.g. the output of iw might change depending on the language settings of the system and/or the version of iw, leaving you with a "manager" that only works on your development machine.
instead you might use the same interface that iw uses to get it's information: the library backend libnl
you might also want to have a look at the wireless-tools (iwconfig, iwlist,...) that use the libiw library.
Here is an GNU awk script to get you going that grabs the SSIDs and the channel for each unique BSS:
/^BSS / {
MAC = $2
}
/SSID/ {
wifi[MAC]["SSID"] = $2
}
/primary channel/ {
wifi[MAC]["channel"] = $NF
}
# Insert new block here
END {
printf "%s\t\t%s\n","SSID","channel"
for (w in wifi) {
printf "%s\t\t%s\n",wifi[w]["SSID"],wifi[w]["channel"]
}
}
It should be easy for you to add the new blocks for signal and encryption considering all the studying you have been doing.
Save the script to file such as wifi.awk and run like:
$ sudo iw wlan0 scan | awk -f wifi.awk
The output will be in the formatted requested:
SSID channel
wlan-ap 6
test 1
Here is a simple Bash function which uses exclusively Bash internals and spawns only one sub-shell:
#!/bin/bash
function iwScan() {
# disable globbing to avoid surprises
set -o noglob
# make temporary variables local to our function
local AP S
# read stdin of the function into AP variable
while read -r AP; do
## print lines only containing needed fields
[[ "${AP//'SSID: '*}" == '' ]] && printf '%b' "${AP/'SSID: '}\n"
[[ "${AP//'signal: '*}" == '' ]] && ( S=( ${AP/'signal: '} ); printf '%b' "${S[0]},";)
done
set +o noglob
}
iwScan <<< "$(iw wlan0 scan)"
Output:
-66.00,FRITZ!Box 7312
-56.00,ALICE-WLAN01
-78.00,o2-WLAN93
-78.00,EasyBox-7A2302
-62.00,dlink
-74.00,EasyBox-59DF56
-76.00,BELAYS_Network
-82.00,o2-WLAN20
-82.00,BPPvM
The function can be easily modified to provide additional fields by adding a necessary filter into the while read -r AP while-loop, eg:
[[ "${AP//'last seen: '*}" == '' ]] && ( S=( ${AP/'last seen: '} ); printf '%b' "${S[0]},";)
Output:
-64.00,1000,FRITZ!Box 7312
-54.00,492,ALICE-WLAN01
-76.00,2588,o2-WLAN93
-78.00,652,LN8-Gast
-72.00,2916,WHITE-BOX
-66.00,288,ALICE-WLAN
-78.00,800,EasyBox-59DF56
-80.00,720,EasyBox-7A2302
-84.00,596,ALICE-WLAN08
I am using such solution for openwrt:
wlan_scan.sh
#!/bin/sh
sudo iw dev wlan0 scan | awk -f wlan_scan.awk | sort
wlan_scan.awk
/^BSS/ {
mac = gensub ( /^BSS[[:space:]]*([0-9a-fA-F:]+).*?$/, "\\1", "g", $0 );
}
/^[[:space:]]*signal:/ {
signal = gensub ( /^[[:space:]]*signal:[[:space:]]*(\-?[0-9.]+).*?$/, "\\1", "g", $0 );
}
/^[[:space:]]*SSID:/ {
ssid = gensub ( /^[[:space:]]*SSID:[[:space:]]*([^\n]*).*?$/, "\\1", "g", $0 );
printf ( "%s %s %s\n", signal, mac, ssid );
}
result
-62.00 c8:64:c7:54:d9:05 a
-72.00 70:72:3c:1c:af:17 b
-81.00 78:f5:fd:be:33:cb c
There is a bug in the awk script above.
The following code will not work if the SSID has spaces in the name. The received result will be the first token of the SSID name only.
$1 == "SSID:" {
wifi[MAC]["SSID"] = $2
}
When printing $0, $1, $2:
$0: SSID: DIRECT-82-HP OfficeJet 8700
$1: SSID:
$2: DIRECT-82-HP
One possibly solution is to take a substr of $0 which contains leading spaces, the token "SSID: " and the provided multi-token network name.
Any other suggestions?
I've taken awk code from Ari Malinen and reworked it a bit, because iw output is not stable and changes, also there are other issues like spaces in SSID. I put it on github in case if I'll change it in the future.
#!/usr/bin/env awk -f
$1 ~ /^BSS/ {
if($2 !~ /Load:/) { #< Escape "BBS Load:" line
gsub("(\\(.*|:)", "", $2)
MAC = toupper($2)
wifi[MAC]["enc"] = "OPEN"
wifi[MAC]["WPS"] = "no"
wifi[MAC]["wpa1"] = ""
wifi[MAC]["wpa2"] = ""
wifi[MAC]["wep"] = ""
}
}
$1 == "SSID:" {
# Workaround spaces in SSID
FS=":" #< Changing field separator on ":", it should be
# forbidded sign for SSID name
$0=$0
sub(" ", "", $2) #< remove first whitespace
wifi[MAC]["SSID"] = $2
FS=" "
$0=$0
}
$1 == "capability:" {
for(i=2; i<=NF; i++) {
if($i ~ /0x[0-9]{4}/) {
gsub("(\\(|\\))", "", $i)
if (and(strtonum($i), 0x10))
wifi[MAC]["wep"] = "WEP"
}
}
}
$1 == "WPA:" {
wifi[MAC]["wpa1"] = "WPA1"
}
$1 == "RSN:" {
wifi[MAC]["wpa2"] = "WPA2"
}
$1 == "WPS:" {
wifi[MAC]["WPS"] = "yes"
}
$1 == "DS" {
wifi[MAC]["Ch"] = $5
}
$1 == "signal:" {
match($2, /-([0-9]{2})\.00/, m)
wifi[MAC]["Sig"] = m[1]
}
$1 == "TSF:" {
gsub("(\\(|d|,)", "", $4)
match($5, /([0-9]{2}):([0-9]{2}):/, m)
day = $4
hour = m[1]
min = m[2]
wifi[MAC]["TSF"] = day"d"hour"h"min"m"
}
END {
for (w in wifi) {
if (wifi[w]["wep"]) {
if (wifi[w]["wpa1"] || wifi[w]["wpa2"])
wifi[w]["enc"] = wifi[w]["wpa1"]wifi[w]["wpa2"]
else
wifi[w]["enc"] = "WEP"
}
printf "%s:%s:%s:%s:%s:%s:%s\n", w, wifi[w]["SSID"], wifi[w]["enc"], \
wifi[w]["WPS"], wifi[w]["Ch"], wifi[w]["Sig"], wifi[w]["TSF"]
}
}
Output:
A5FEF2C499BB:test-ssid2:OPEN:no:9:43:0d00h00m
039EFACA9A8B:test-ssid2:WPA1:no:9:33:0d00h00m
038BF3C1988B:test-ssid2:WPA2:no:9:35:0d00h00m
028EF3C2997B:test-ssid2:WPA1:no:9:35:0d00h03m
if you wonder what if($2 !~ /Load:/) does, well on some routers there might be "BSS Load:" string.

Resources