How can I check if ranges overlap other ranges and combine the ones that do?
Example:
10-1000,15-350,50-1500,2100,1700-1800,45,40,145,2-1300
The result I want is:
2-1500,1700-1800,2100
I tried to make a plan on how to code it but it's getting me nowhere. Is there a useful package that I can use? Or if not what should my approach be?
Using sort and awk:
tr , '\n' | sort -n | awk '
BEGIN {
FS = OFS = "-"
}
NF == 1 {
$2 = $1
}
$2 <= end {
next
}
$1 <= end {
end = $2
next
}
{
emit()
start = $1
end = $2
}
END {
emit()
}
function emit() {
if (NR != 1) {
if (start == end)
print start
else
print start, end
}
}' | paste -sd,
$ sh ./merge.sh <<<10-1000,15-350,50-1500,2100,1700-1800,45,40,145,2-1300
2-1500,1700-1800,2100
Related
I have a file as following format
Item-abc,c1,300
Item-abc,c2,500
Item-pqr,c1,900
Item-pqr,c2,800
Item-pqr,c3,600
Item-pqr,c4,700
Item-xyz,c1,950
Item-asd,
Item-jkl
I need this file rearranged in following manner
Item-abc,c1=300,c2=500
Item-pqr,c1=900,c2=800,c3=600,c4=700
Item-xyz,c1=950
If second and third columns are empty then that line should ve removed completely
$ cat tst.awk
BEGIN { FS=OFS="," }
$1 != prev { if (NR>2) print rec; rec=prev=$1 }
{ rec = rec OFS $2 "=" $3 }
END { print rec }
$ awk -f tst.awk file
Item-abc,c1=300,c2=500
Item-pqr,c1=900,c2=800,c3=600
cat answer.awk
BEGIN {
FS=","
RS="\r\n" # For Windows"
}
{
N[$1,$2]= $3
}
END {
for (comb in N) {
split (comb,S,SUBSEP)
K[S[1]]=K[S[1]] "," S[2] "=" N[S[1],S[2]]
}
for (j in K) if (j != "Name") print j K[j]
}
awk -f answer.awk file
Item-abc,c1=300,c2=500
Item-pqr,c1=900,c2=800,c3=600
#Jerin, this variant will strip \r'ss
BEGIN {
FS=","
}
{
sub(/\x0d/,"",$0)
split($0,Cols)
N[Cols[1],Cols[2]]= Cols[3]
}
END {
for (comb in N) {
split (comb,S,SUBSEP)
K[S[1]]=K[S[1]] "," S[2] "=" N[S[1],S[2]]
}
for (j in K) if (j != "Name") print j K[j]
}
how to group sequence of item into square brackets
for example
List of items
cat item.txt
sn01
sn02
sn03
sn05
sn07
sn08
Desired output
sn[01-03,05,07-08]
If your data is same as shown Input_file sample then following may help you in same.
awk 'FNR==1{line=$0} {sub(/[a-z]+/,"")} $0-val>1 && val1!=val{out=out?out "," val1"-"val:line"[" val1"-"val;val1=$0} $0-val>1 && val1==val{out=out?out "," val1:out "," val1;val1=$0} {if(FNR==1){sub(/[0-9]+/,"",line);val1=$0};val=$0}END{if(val1!=val){print out "," val1"-"val"]"} else {print out "," val"]"}}' Input_file
Adding non-one liner form of solution too.
awk '
FNR==1{
line=$0
}
{
sub(/[a-z]+/,"")
}
$0-val>1 && val1!=val{
out=out?out "," val1"-"val:line"[" val1"-"val;
val1=$0
}
$0-val>1 && val1==val{
out=out?out "," val1:out "," val1;
val1=$0
}
{
if(FNR==1){
sub(/[0-9]+/,"",line);
val1=$0
};
val=$0
}
END{
if(val1!=val){
print out "," val1"-"val"]"
}
else{
print out "," val"]"
}
}
' Input_file
Output will be as follows.
sn[01-03,05,07-08]
"sn" being static here. it should pick it from the input file. when I
given list of items start with "cn". still it picks "sn"
Using awk:
$ cat infile
sn01
sn02
sn03
sn05
sn07
sn08
cn08
cn09
cn10
cn11
cn15
when search='sn'
$ awk -v search='sn' 'function pr(){if(f && l)printf("%s%s",n?",":search"[",f==l?f:f"-"l)}$0!~"^"search{next}{t=$1;sub(/[^0-9]+/,"",t)}f==""{f=l=t;next}t==l+1{l=t;next}{pr();f=l=t;n++}END{pr(); print n?"]":"Nothing matched for keyword :"search}' infile
sn[01-03,05,07-08]
when search='cn'
$ awk -v search='cn' 'function pr(){if(f && l)printf("%s%s",n?",":search"[",f==l?f:f"-"l)}$0!~"^"search{next}{t=$1;sub(/[^0-9]+/,"",t)}f==""{f=l=t;next}t==l+1{l=t;next}{pr();f=l=t;n++}END{pr(); print n?"]":"Nothing matched for keyword :"search}' infile
cn[08-11,15]
Better Readable :
awk -v search='sn' '
function pr()
{
if(f && l)
printf("%s%s",n?",":search"[",f==l?f:f"-"l)
}
$0!~"^"search{
next
}
{
t=$1;
sub(/[^0-9]+/,"",t)
}
f==""{
f=l=t;
next
}
t==l+1{
l=t;
next
}
{
pr();
f=l=t;
n++
}
END{
pr();
print n?"]":"Nothing matched for keyword :"search
}' infile
a simple awk solution
We're aiming to set LB and UB for each possible range.
Starting from LB, the last number in sequence upto which the common difference is 1 gives us the UB.
If difference is more than 1 print the last range and set LB again.
$ awk 'FNR==1{ $1=$1; prefix=substr($0,1,2);} {gsub(/[^0-9]/,"",$1); a[++i]=$1;} END{ printf prefix"["; LB=UB=prev=a[1]; for(i=1; i<=NR; i++){ if(int(a[i+1])==int(prev+1)) { UB=a[i+1]; prev=UB; } else { if(LB==UB) { printf LB"," } else {delim=(i==NR)? "]" :","; printf LB "-" UB delim; } prev=LB=UB=a[i+1]; }} }' file
sn[01-03,05,07-08]
gsub(/[^0-9]/,"",$1) : This sets all non-digit chars to null. Therefore $1 ends up with just numbers;
To understand it better :
$ awk 'FNR==1{ $1=$1; prefix=substr($0,1,2); } {gsub(/[^0-9]/,"",$1); a[++i]=$1;}
END
{
printf prefix"["; LB=UB=prev=a[1];
for(i=1; i<=NR; i++)
{
if(int(a[i+1])==int(prev+1))
{
UB=a[i+1];
prev=UB; }
else
{
if(LB==UB)
{
printf LB","
}
else
{
delim=(i==NR)? "]" :",";
printf LB "-" UB delim;
}
prev=LB=UB=a[i+1];
}
}
}' file
Awk solution:
awk '{ v=substr($0,3) }NR==1{ pfx=substr($0,1,2); r=a=v; next }
{ diff=v-a; if(diff>1) { r=r ((a==last)? ",":"-"a",")v; last=v } a=v }
END{ if(diff==1) r=r"-"v; print pfx"["r"]" }' file
The output:
sn[01-03,05,07-08]
I would like to find the contiguous ranges given a set of dates by day
given the following sample
2016-01-01
2016-01-02
2016-01-03
2016-01-04
2016-01-05
2016-01-06
2016-01-08
2016-01-09
2016-01-10
2016-01-11
2016-01-12
2016-01-15
2016-01-16
2016-01-17
2016-01-20
2016-01-21
2016-01-30
2016-01-31
2016-02-01
I expect the following result
2016-01-01-2016-01-06
2016-01-08-2016-01-12
2016-01-15-2016-01-17
2016-01-20-2016-01-21
2016-01-30-2016-01-31
2016-02-01-2016-02-01
I have already came across this question which is almost the opposite of what I want but with integers.
I have formulated the following which works with integers.
awk 'NR==1 {l=$1; n=$1} {if ($1==n){n=$1+1} else{print l"-"n-1; l=$1 ;n=$1+1} } END {print l"-"$1}' file.txt
With GNU awk for mktime():
$ cat tst.awk
BEGIN { FS=OFS="-" }
{ currSecs = mktime( $1" "$2" "$3" 0 0 0" ) }
(currSecs - prevSecs) > (24*60*60) {
if (NR>1) {
print startDate, prevDate
}
startDate = $0
}
{ prevSecs = currSecs; prevDate = $0 }
END { print startDate, prevDate }
$ awk -f tst.awk file
2016-01-01-2016-01-06
2016-01-08-2016-01-12
2016-01-15-2016-01-17
2016-01-20-2016-01-21
2016-01-30-2016-02-01
With any awk if you don't care about ranges restarting when months change (as apparent in your expected output and the comment under your question):
$ cat tst.awk
BEGIN { FS=OFS="-" }
{ currYrMth = $1 FS $2; currDay = $3 }
(currYrMth != prevYrMth) || ((currDay - prevDay) > 1) {
if (NR>1) {
print startDate, prevDate
}
startDate = $0
}
{ prevYrMth = currYrMth; prevDay = currDay; prevDate = $0 }
END { print startDate, prevDate }
$ awk -f tst.awk file
2016-01-01-2016-01-06
2016-01-08-2016-01-12
2016-01-15-2016-01-17
2016-01-20-2016-01-21
2016-01-30-2016-01-31
2016-02-01-2016-02-01
If you have GNU Awk you can use its time functions.
gawk -F - 'NR==1 || $1 "-" $2 "-" $3 != following {
if (following != "") print start "-" latest;
start = $1 "-" $2 "-" $3
this = mktime($1 " " $2 " " $3 " 0 0 0")
}
{
this += 24*60*60
following = strftime("%F", this)
latest = $1 "-" $2 "-" $3 }
END { if (start != latest) print start "-" latest }' filename
Unit ranges will print like "2016-04-15-2016-04-15" which is a bit of a wart, but easy to fix if you need to. Also the END block has a bug in this case, but again, this should at least get you started.
gawk:
#!/bin/awk -f
BEGIN{
FS="-"
}
{
a[NR]=mktime($1" "$2" "$3" 0 0 0")
b[NR]=$2;
if ( (a[NR-1]+86400) != a[NR] || b[NR-1]!=b[NR] ) {
if(NR!=1){
print s" - "strftime("%Y-%m-%d",a[NR-1])
};
s=$0
}
}
END{
print s" - "$0
}
Create array a with index NR and value as epochtime derived from $0 using awk time function mktime.
Array b with index NR and value as the month in $2
if either epoch time from last line + 86400 ( +1 day) is not equal to epoch time in current line or month in previous line and current line differs, except for first line, print value in s" - "strftime("%Y-%m-%d",a[NR-1] and reassign s which is the start date with $0
END:
Print the last start time s and last line
I have the following input:
adm.cd.rrn.vme.abcd.name = foo
adm.cd.rrn.vme.abcd.test = no
adm.cd.rrn.vme.abcd.id = 123456
adm.cd.rrn.vme.abcd.option = no
adm.cd.rrn.vme.asfa.name = bar
adm.cd.rrn.vme.asfa.test = no
adm.cd.rrn.vme.asfa.id = 324523
adm.cd.rrn.vme.asfa.option = yes
adm.cd.rrn.vme.xxxx.name = blah
adm.cd.rrn.vme.xxxx.test = no
adm.cd.rrn.vme.xxxx.id = 666666
adm.cd.rrn.vme.xxxx.option = no
How can extract all the values associated with a specific id?
For example, if I have id == 324523, I'd like it to print the values of name, test, and option:
bar no yes
Is it possible to achieve in a single awk command (or anything similar in bash)?
EDIT: Based on input, here's my solution until now:
MYID=$(awk -F. '/'"${ID}"$'/{print $5}' ${TMP_LIST})
awk -F'[ .]' '{
if ($5 == "'${MYID}'") {
if ($6 == "name") {name=$NF}
if ($6 == "test") {test=$NF}
if ($6 == "option") {option=$NF}
}
} END {print name,test,option}' ${TMP_LIST})
Thanks
$ cat tst.awk
{ rec = rec $0 RS }
/option/ {
if (rec ~ "id = "tgt"\n") {
printf "%s", rec
}
rec = ""
next
}
$ awk -v tgt=324523 -f tst.awk file
adm.cd.rrn.vme.asfa.name = bar
adm.cd.rrn.vme.asfa.test = no
adm.cd.rrn.vme.asfa.id = 324523
adm.cd.rrn.vme.asfa.option = yes
or if you prefer:
$ cat tst.awk
BEGIN { FS="[. ]" }
$(NF-2) == "id" { found = ($NF == tgt ? 1 : 0); next }
{ rec = (rec ? rec OFS : "") $NF }
$(NF-2) == "option" { if (found) print rec; rec = ""; next }
$ awk -v tgt=324523 -f tst.awk file
bar no yes
first, I convert each record in a line with xargs, then I look for lines that contain the regular expression and print the columns searched
cat input | xargs -n 12 | awk '{if($0~/id\s=\s324523\s/){ print $3, $6, $12}}'
a solution more general:
awk 'BEGIN{FS="\\.|\\s"; } #field separator is point \\. or space \\s
{
a[$5"."$6]=$8; #store records in associative array a
if($8=="324523" && $6=="id"){
reg[$5]=1; #if is record found, add to associative array reg
}
}END{
for(k2 in reg){
s=""
for(k in a){
if(k~"^"k2"\\."){ #if record is an element of "reg" then add to output "s"
s=k":"a[k]" "s
}
}
print s;
}
}' input
if your input format is fixed, you can do in this way:
grep -A1 -B2 'id\s*=\s*324523$' file|awk 'NR!=3{printf "%s ",$NF}END{print ""}'
you can add -F'=' to awk part too.
it could be done by awk alone, but grep could save some typing...
Using this:
awk '$1 == "pool" { f=1; print $1,$2; next }
f == 1 { if ($1 == "pool") { print }
else if ($1 == "members") { print }
else if ($0 ~ /^}/) { f=0 }
}' bigip.conf
That works fine until the config has the IPs on following lines.
How can I get it to print the IPs if they are on following lines.
The config has both, some have it on the same line, some on then next 1, 2 or 3 lines.
the data :
pool pl_stage_xxx_microsites_9483 {
monitor all tcp_half_open
members {
11.11.11.11:9483 {}
11.22.22.22:9483 {
session user disabled
}
}
}
Try the following awk code:
awk '
$1 == "pool" {
f=1
print $1,$2
next
}
f == 1 {
if ($1 == "pool") {
print
}
else if ($1 == "members") {
print
getline
while ($0 ~ "[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}:[0-9]{1,5}"){
print
getline
}
}
else if ($0 ~ /^}/) {
f=0
}
}'
That will print the IP lines while they exists.
It's hard to say without seeing more of your data and your expected output but I think all you need is something like this:
awk '
/^}/ { inPool=0 }
$1 == "pool" { inPool=1; inMembers=0 }
inPool {
if ($1 == "pool") {
print $1, $2
print
}
else if ($1 == "members") {
inMembers = 1
}
if (inMembers) {
print
}
}
' file
The above should be a good starting point at least. wrt the other answer posted using getline - getline has some appropriate uses but this isn't one of them, don't use getline until you fully understand and can live with all of it's caveats, see http://awk.info/?tip/getline.