Check for and combine overlapping ranges - bash

How can I check if ranges overlap other ranges and combine the ones that do?
Example:
10-1000,15-350,50-1500,2100,1700-1800,45,40,145,2-1300
The result I want is:
2-1500,1700-1800,2100
I tried to make a plan on how to code it but it's getting me nowhere. Is there a useful package that I can use? Or if not what should my approach be?

Using sort and awk:
tr , '\n' | sort -n | awk '
BEGIN {
FS = OFS = "-"
}
NF == 1 {
$2 = $1
}
$2 <= end {
next
}
$1 <= end {
end = $2
next
}
{
emit()
start = $1
end = $2
}
END {
emit()
}
function emit() {
if (NR != 1) {
if (start == end)
print start
else
print start, end
}
}' | paste -sd,
$ sh ./merge.sh <<<10-1000,15-350,50-1500,2100,1700-1800,45,40,145,2-1300
2-1500,1700-1800,2100

Related

How to reorder file using awk

I have a file as following format
Item-abc,c1,300
Item-abc,c2,500
Item-pqr,c1,900
Item-pqr,c2,800
Item-pqr,c3,600
Item-pqr,c4,700
Item-xyz,c1,950
Item-asd,
Item-jkl
I need this file rearranged in following manner
Item-abc,c1=300,c2=500
Item-pqr,c1=900,c2=800,c3=600,c4=700
Item-xyz,c1=950
If second and third columns are empty then that line should ve removed completely
$ cat tst.awk
BEGIN { FS=OFS="," }
$1 != prev { if (NR>2) print rec; rec=prev=$1 }
{ rec = rec OFS $2 "=" $3 }
END { print rec }
$ awk -f tst.awk file
Item-abc,c1=300,c2=500
Item-pqr,c1=900,c2=800,c3=600
cat answer.awk
BEGIN {
FS=","
RS="\r\n" # For Windows"
}
{
N[$1,$2]= $3
}
END {
for (comb in N) {
split (comb,S,SUBSEP)
K[S[1]]=K[S[1]] "," S[2] "=" N[S[1],S[2]]
}
for (j in K) if (j != "Name") print j K[j]
}
awk -f answer.awk file
Item-abc,c1=300,c2=500
Item-pqr,c1=900,c2=800,c3=600
#Jerin, this variant will strip \r'ss
BEGIN {
FS=","
}
{
sub(/\x0d/,"",$0)
split($0,Cols)
N[Cols[1],Cols[2]]= Cols[3]
}
END {
for (comb in N) {
split (comb,S,SUBSEP)
K[S[1]]=K[S[1]] "," S[2] "=" N[S[1],S[2]]
}
for (j in K) if (j != "Name") print j K[j]
}

how to group sequence of item into square brackets

how to group sequence of item into square brackets
for example
List of items
cat item.txt
sn01
sn02
sn03
sn05
sn07
sn08
Desired output
sn[01-03,05,07-08]
If your data is same as shown Input_file sample then following may help you in same.
awk 'FNR==1{line=$0} {sub(/[a-z]+/,"")} $0-val>1 && val1!=val{out=out?out "," val1"-"val:line"[" val1"-"val;val1=$0} $0-val>1 && val1==val{out=out?out "," val1:out "," val1;val1=$0} {if(FNR==1){sub(/[0-9]+/,"",line);val1=$0};val=$0}END{if(val1!=val){print out "," val1"-"val"]"} else {print out "," val"]"}}' Input_file
Adding non-one liner form of solution too.
awk '
FNR==1{
line=$0
}
{
sub(/[a-z]+/,"")
}
$0-val>1 && val1!=val{
out=out?out "," val1"-"val:line"[" val1"-"val;
val1=$0
}
$0-val>1 && val1==val{
out=out?out "," val1:out "," val1;
val1=$0
}
{
if(FNR==1){
sub(/[0-9]+/,"",line);
val1=$0
};
val=$0
}
END{
if(val1!=val){
print out "," val1"-"val"]"
}
else{
print out "," val"]"
}
}
' Input_file
Output will be as follows.
sn[01-03,05,07-08]
"sn" being static here. it should pick it from the input file. when I
given list of items start with "cn". still it picks "sn"
Using awk:
$ cat infile
sn01
sn02
sn03
sn05
sn07
sn08
cn08
cn09
cn10
cn11
cn15
when search='sn'
$ awk -v search='sn' 'function pr(){if(f && l)printf("%s%s",n?",":search"[",f==l?f:f"-"l)}$0!~"^"search{next}{t=$1;sub(/[^0-9]+/,"",t)}f==""{f=l=t;next}t==l+1{l=t;next}{pr();f=l=t;n++}END{pr(); print n?"]":"Nothing matched for keyword :"search}' infile
sn[01-03,05,07-08]
when search='cn'
$ awk -v search='cn' 'function pr(){if(f && l)printf("%s%s",n?",":search"[",f==l?f:f"-"l)}$0!~"^"search{next}{t=$1;sub(/[^0-9]+/,"",t)}f==""{f=l=t;next}t==l+1{l=t;next}{pr();f=l=t;n++}END{pr(); print n?"]":"Nothing matched for keyword :"search}' infile
cn[08-11,15]
Better Readable :
awk -v search='sn' '
function pr()
{
if(f && l)
printf("%s%s",n?",":search"[",f==l?f:f"-"l)
}
$0!~"^"search{
next
}
{
t=$1;
sub(/[^0-9]+/,"",t)
}
f==""{
f=l=t;
next
}
t==l+1{
l=t;
next
}
{
pr();
f=l=t;
n++
}
END{
pr();
print n?"]":"Nothing matched for keyword :"search
}' infile
a simple awk solution
We're aiming to set LB and UB for each possible range.
Starting from LB, the last number in sequence upto which the common difference is 1 gives us the UB.
If difference is more than 1 print the last range and set LB again.
$ awk 'FNR==1{ $1=$1; prefix=substr($0,1,2);} {gsub(/[^0-9]/,"",$1); a[++i]=$1;} END{ printf prefix"["; LB=UB=prev=a[1]; for(i=1; i<=NR; i++){ if(int(a[i+1])==int(prev+1)) { UB=a[i+1]; prev=UB; } else { if(LB==UB) { printf LB"," } else {delim=(i==NR)? "]" :","; printf LB "-" UB delim; } prev=LB=UB=a[i+1]; }} }' file
sn[01-03,05,07-08]
gsub(/[^0-9]/,"",$1) : This sets all non-digit chars to null. Therefore $1 ends up with just numbers;
To understand it better :
$ awk 'FNR==1{ $1=$1; prefix=substr($0,1,2); } {gsub(/[^0-9]/,"",$1); a[++i]=$1;}
END
{
printf prefix"["; LB=UB=prev=a[1];
for(i=1; i<=NR; i++)
{
if(int(a[i+1])==int(prev+1))
{
UB=a[i+1];
prev=UB; }
else
{
if(LB==UB)
{
printf LB","
}
else
{
delim=(i==NR)? "]" :",";
printf LB "-" UB delim;
}
prev=LB=UB=a[i+1];
}
}
}' file
Awk solution:
awk '{ v=substr($0,3) }NR==1{ pfx=substr($0,1,2); r=a=v; next }
{ diff=v-a; if(diff>1) { r=r ((a==last)? ",":"-"a",")v; last=v } a=v }
END{ if(diff==1) r=r"-"v; print pfx"["r"]" }' file
The output:
sn[01-03,05,07-08]

Finding Contiguous Ranges

I would like to find the contiguous ranges given a set of dates by day
given the following sample
2016-01-01
2016-01-02
2016-01-03
2016-01-04
2016-01-05
2016-01-06
2016-01-08
2016-01-09
2016-01-10
2016-01-11
2016-01-12
2016-01-15
2016-01-16
2016-01-17
2016-01-20
2016-01-21
2016-01-30
2016-01-31
2016-02-01
I expect the following result
2016-01-01-2016-01-06
2016-01-08-2016-01-12
2016-01-15-2016-01-17
2016-01-20-2016-01-21
2016-01-30-2016-01-31
2016-02-01-2016-02-01
I have already came across this question which is almost the opposite of what I want but with integers.
I have formulated the following which works with integers.
awk 'NR==1 {l=$1; n=$1} {if ($1==n){n=$1+1} else{print l"-"n-1; l=$1 ;n=$1+1} } END {print l"-"$1}' file.txt
With GNU awk for mktime():
$ cat tst.awk
BEGIN { FS=OFS="-" }
{ currSecs = mktime( $1" "$2" "$3" 0 0 0" ) }
(currSecs - prevSecs) > (24*60*60) {
if (NR>1) {
print startDate, prevDate
}
startDate = $0
}
{ prevSecs = currSecs; prevDate = $0 }
END { print startDate, prevDate }
$ awk -f tst.awk file
2016-01-01-2016-01-06
2016-01-08-2016-01-12
2016-01-15-2016-01-17
2016-01-20-2016-01-21
2016-01-30-2016-02-01
With any awk if you don't care about ranges restarting when months change (as apparent in your expected output and the comment under your question):
$ cat tst.awk
BEGIN { FS=OFS="-" }
{ currYrMth = $1 FS $2; currDay = $3 }
(currYrMth != prevYrMth) || ((currDay - prevDay) > 1) {
if (NR>1) {
print startDate, prevDate
}
startDate = $0
}
{ prevYrMth = currYrMth; prevDay = currDay; prevDate = $0 }
END { print startDate, prevDate }
$ awk -f tst.awk file
2016-01-01-2016-01-06
2016-01-08-2016-01-12
2016-01-15-2016-01-17
2016-01-20-2016-01-21
2016-01-30-2016-01-31
2016-02-01-2016-02-01
If you have GNU Awk you can use its time functions.
gawk -F - 'NR==1 || $1 "-" $2 "-" $3 != following {
if (following != "") print start "-" latest;
start = $1 "-" $2 "-" $3
this = mktime($1 " " $2 " " $3 " 0 0 0")
}
{
this += 24*60*60
following = strftime("%F", this)
latest = $1 "-" $2 "-" $3 }
END { if (start != latest) print start "-" latest }' filename
Unit ranges will print like "2016-04-15-2016-04-15" which is a bit of a wart, but easy to fix if you need to. Also the END block has a bug in this case, but again, this should at least get you started.
gawk:
#!/bin/awk -f
BEGIN{
FS="-"
}
{
a[NR]=mktime($1" "$2" "$3" 0 0 0")
b[NR]=$2;
if ( (a[NR-1]+86400) != a[NR] || b[NR-1]!=b[NR] ) {
if(NR!=1){
print s" - "strftime("%Y-%m-%d",a[NR-1])
};
s=$0
}
}
END{
print s" - "$0
}
Create array a with index NR and value as epochtime derived from $0 using awk time function mktime.
Array b with index NR and value as the month in $2
if either epoch time from last line + 86400 ( +1 day) is not equal to epoch time in current line or month in previous line and current line differs, except for first line, print value in s" - "strftime("%Y-%m-%d",a[NR-1] and reassign s which is the start date with $0
END:
Print the last start time s and last line

awk parse data based on value in middle of text

I have the following input:
adm.cd.rrn.vme.abcd.name = foo
adm.cd.rrn.vme.abcd.test = no
adm.cd.rrn.vme.abcd.id = 123456
adm.cd.rrn.vme.abcd.option = no
adm.cd.rrn.vme.asfa.name = bar
adm.cd.rrn.vme.asfa.test = no
adm.cd.rrn.vme.asfa.id = 324523
adm.cd.rrn.vme.asfa.option = yes
adm.cd.rrn.vme.xxxx.name = blah
adm.cd.rrn.vme.xxxx.test = no
adm.cd.rrn.vme.xxxx.id = 666666
adm.cd.rrn.vme.xxxx.option = no
How can extract all the values associated with a specific id?
For example, if I have id == 324523, I'd like it to print the values of name, test, and option:
bar no yes
Is it possible to achieve in a single awk command (or anything similar in bash)?
EDIT: Based on input, here's my solution until now:
MYID=$(awk -F. '/'"${ID}"$'/{print $5}' ${TMP_LIST})
awk -F'[ .]' '{
if ($5 == "'${MYID}'") {
if ($6 == "name") {name=$NF}
if ($6 == "test") {test=$NF}
if ($6 == "option") {option=$NF}
}
} END {print name,test,option}' ${TMP_LIST})
Thanks
$ cat tst.awk
{ rec = rec $0 RS }
/option/ {
if (rec ~ "id = "tgt"\n") {
printf "%s", rec
}
rec = ""
next
}
$ awk -v tgt=324523 -f tst.awk file
adm.cd.rrn.vme.asfa.name = bar
adm.cd.rrn.vme.asfa.test = no
adm.cd.rrn.vme.asfa.id = 324523
adm.cd.rrn.vme.asfa.option = yes
or if you prefer:
$ cat tst.awk
BEGIN { FS="[. ]" }
$(NF-2) == "id" { found = ($NF == tgt ? 1 : 0); next }
{ rec = (rec ? rec OFS : "") $NF }
$(NF-2) == "option" { if (found) print rec; rec = ""; next }
$ awk -v tgt=324523 -f tst.awk file
bar no yes
first, I convert each record in a line with xargs, then I look for lines that contain the regular expression and print the columns searched
cat input | xargs -n 12 | awk '{if($0~/id\s=\s324523\s/){ print $3, $6, $12}}'
a solution more general:
awk 'BEGIN{FS="\\.|\\s"; } #field separator is point \\. or space \\s
{
a[$5"."$6]=$8; #store records in associative array a
if($8=="324523" && $6=="id"){
reg[$5]=1; #if is record found, add to associative array reg
}
}END{
for(k2 in reg){
s=""
for(k in a){
if(k~"^"k2"\\."){ #if record is an element of "reg" then add to output "s"
s=k":"a[k]" "s
}
}
print s;
}
}' input
if your input format is fixed, you can do in this way:
grep -A1 -B2 'id\s*=\s*324523$' file|awk 'NR!=3{printf "%s ",$NF}END{print ""}'
you can add -F'=' to awk part too.
it could be done by awk alone, but grep could save some typing...

awk script, need to return results on multi lines following keyword

Using this:
awk '$1 == "pool" { f=1; print $1,$2; next }
f == 1 { if ($1 == "pool") { print }
else if ($1 == "members") { print }
else if ($0 ~ /^}/) { f=0 }
}' bigip.conf
That works fine until the config has the IPs on following lines.
How can I get it to print the IPs if they are on following lines.
The config has both, some have it on the same line, some on then next 1, 2 or 3 lines.
the data :
pool pl_stage_xxx_microsites_9483 {
monitor all tcp_half_open
members {
11.11.11.11:9483 {}
11.22.22.22:9483 {
session user disabled
}
}
}
Try the following awk code:
awk '
$1 == "pool" {
f=1
print $1,$2
next
}
f == 1 {
if ($1 == "pool") {
print
}
else if ($1 == "members") {
print
getline
while ($0 ~ "[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}:[0-9]{1,5}"){
print
getline
}
}
else if ($0 ~ /^}/) {
f=0
}
}'
That will print the IP lines while they exists.
It's hard to say without seeing more of your data and your expected output but I think all you need is something like this:
awk '
/^}/ { inPool=0 }
$1 == "pool" { inPool=1; inMembers=0 }
inPool {
if ($1 == "pool") {
print $1, $2
print
}
else if ($1 == "members") {
inMembers = 1
}
if (inMembers) {
print
}
}
' file
The above should be a good starting point at least. wrt the other answer posted using getline - getline has some appropriate uses but this isn't one of them, don't use getline until you fully understand and can live with all of it's caveats, see http://awk.info/?tip/getline.

Resources