ORACLE - How to use LAG to display strings from all previous rows into current row - oracle

I have data like below:
group
seq
activity
A
1
scan
A
2
visit
A
3
pay
B
1
drink
B
2
rest
I expect to have 1 new column "hist" like below:
group
seq
activity
hist
A
1
scan
NULL
A
2
visit
scan
A
3
pay
scan, visit
B
1
drink
NULL
B
2
rest
drink
I was trying to solve with LAG function, but LAG only returns one row from previous instead of multiple.
Truly appreciate any help!

Use a correlated sub-query:
SELECT t.*,
(SELECT LISTAGG(activity, ',') WITHIN GROUP (ORDER BY seq)
FROM table_name l
WHERE t."GROUP" = l."GROUP"
AND l.seq < t.seq
) AS hist
FROM table_name t
Or a hierarchical query:
SELECT t.*,
SUBSTR(SYS_CONNECT_BY_PATH(PRIOR activity, ','), 3) AS hist
FROM table_name t
START WITH seq = 1
CONNECT BY
PRIOR seq + 1 = seq
AND PRIOR "GROUP" = "GROUP"
Or a recursive sub-query factoring clause:
WITH rsqfc ("GROUP", seq, activity, hist) AS (
SELECT "GROUP", seq, activity, NULL
FROM table_name
WHERE seq = 1
UNION ALL
SELECT t."GROUP", t.seq, t.activity, r.hist || ',' || r.activity
FROM rsqfc r
INNER JOIN table_name t
ON (r."GROUP" = t."GROUP" AND r.seq + 1 = t.seq)
)
SEARCH DEPTH FIRST BY "GROUP" SET order_rn
SELECT "GROUP", seq, activity, SUBSTR(hist, 2) AS hist
FROM rsqfc
Which, for the sample data:
CREATE TABLE table_name ("GROUP", seq, activity) AS
SELECT 'A', 1, 'scan' FROM DUAL UNION ALL
SELECT 'A', 2, 'visit' FROM DUAL UNION ALL
SELECT 'A', 3, 'pay' FROM DUAL UNION ALL
SELECT 'B', 1, 'drink' FROM DUAL UNION ALL
SELECT 'B', 2, 'rest' FROM DUAL;
All output:
GROUP
SEQ
ACTIVITY
HIST
A
1
scan
null
A
2
visit
scan
A
3
pay
scan,visit
B
1
drink
null
B
2
rest
drink
db<>fiddle here

To aggregate strings in Oracle we use LISAGG function.
In general, you need a windowing_clause to specify a sliding window for analytic function to calculate running total.
But unfortunately LISTAGG doesn't support it.
To simulate this behaviour you may use model_clause of the select statement. Below is an example with explanation.
select
group_
, activity
, seq
, hist
from t
model
/*Where to restart calculation*/
partition by (group_)
/*Add consecutive numbers to reference "previous" row per group.
May use "seq" column if its values are consecutive*/
dimension by (
row_number() over(
partition by group_
order by seq asc
) as rn
)
measures (
/*Other columnns to return*/
activity
, cast(null as varchar2(1000)) as hist
, seq
)
rules update (
/*Apply this rule sequentially*/
hist[any] order by rn asc =
/*Previous concatenated result*/
hist[cv()-1]
/*Plus comma for the third row and tne next rows*/
|| presentv(activity[cv()-2], ',', '') /**/
/*lus previous row's value*/
|| activity[cv()-1]
)
GROUP_ | ACTIVITY | SEQ | HIST
:----- | :------- | --: | :---------
A | scan | 1 | null
A | visit | 2 | scan
A | pay | 3 | scan,visit
B | drink | 1 | null
B | rest | 2 | drink
db<>fiddle here

Few more variants (without subqueries):
SELECT--+ NO_XML_QUERY_REWRITE
t.*,
regexp_substr(
listagg(activity, ',')
within group(order by SEQ)
over(partition by "GROUP")
,'^([^,]+,){'||(row_number()over(partition by "GROUP" order by seq)-1)||'}'
)
AS hist1
,xmlcast(
xmlquery(
'string-join($X/A/B[position()<$Y]/text(),",")'
passing
xmlelement("A", xmlagg(xmlelement("B", activity)) over(partition by "GROUP")) as x
,row_number()over(partition by "GROUP" order by seq) as y
returning content
)
as varchar2(1000)
) hist2
FROM table_name t;
DBFIddle: https://dbfiddle.uk/?rdbms=oracle_21&fiddle=9b477a2089d3beac62579d2b7103377a
Full test case with output:
with table_name ("GROUP", seq, activity) AS (
SELECT 'A', 1, 'scan' FROM DUAL UNION ALL
SELECT 'A', 2, 'visit' FROM DUAL UNION ALL
SELECT 'A', 3, 'pay' FROM DUAL UNION ALL
SELECT 'B', 1, 'drink' FROM DUAL UNION ALL
SELECT 'B', 2, 'rest' FROM DUAL
)
SELECT--+ NO_XML_QUERY_REWRITE
t.*,
regexp_substr(
listagg(activity, ',')
within group(order by SEQ)
over(partition by "GROUP")
,'^([^,]+,){'||(row_number()over(partition by "GROUP" order by seq)-1)||'}'
)
AS hist1
,xmlcast(
xmlquery(
'string-join($X/A/B[position()<$Y]/text(),",")'
passing
xmlelement("A", xmlagg(xmlelement("B", activity)) over(partition by "GROUP")) as x
,row_number()over(partition by "GROUP" order by seq) as y
returning content
)
as varchar2(1000)
) hist2
FROM table_name t;
GROUP SEQ ACTIV HIST1 HIST2
------ ---------- ----- ------------------------------ ------------------------------
A 1 scan
A 2 visit scan, scan
A 3 pay scan,visit, scan,visit
B 1 drink
B 2 rest drink, drink

Related

Oracle sort values into column

I have a table like this:
time length name
00:01:00 2 a
00:11:22 2 a
01:01:00 45 a
00:23:00 3 b
and I want to retrieve data from the table in the form:
a b
time length time length
00:01:00 2 00:23:00 3
00:11:22 2
01:01:00 2
so it is a simple task of rearranging data, atm I am doing this in a bash script, but I wonder if there is an easy way to do it in Oracle?
You can use analytical function ROW_NUMBER and full outer join as follows:
WITH CTE1 AS
(SELECT T.*, ROW_NUMBER() OVER (ORDER BY LENGTH, TIME) AS RN FROM YOUR_TABLE T WHERE NAME = 'a'),
CTE2 AS
(SELECT T.*, ROW_NUMBER() OVER (ORDER BY LENGTH, TIME) AS RN FROM YOUR_TABLE T WHERE NAME = 'b')
SELECT A.TIME, A.LENGTH, B.TIME, B.LENGTH
FROM CTE1 A FULL OUTER JOIN CTE2 B
ON A.RN = B.RN
Note: You need to use proper order by to order the records as per your requirement. I have used LENGTH, TIME
You can use a multi-column pivot, by adding an extra column that links the related A and B values; presumably by time order, something like:
select time_col, length_col, name_col,
dense_rank() over (partition by name_col order by time_col) as rnk
from your_table;
TIME_COL LENGTH_COL N RNK
-------- ---------- - ----------
00:01:00 2 a 1
00:11:22 2 a 2
01:01:00 45 a 3
00:23:00 3 b 1
and then pivot based on that:
select *
from (
select time_col, length_col, name_col,
dense_rank() over (partition by name_col order by time_col) as rnk
from your_table
)
pivot (
max(time_col) as time_col, max(length_col) as length_col
for name_col in ('a' as a, 'b' as b)
);
RNK A_TIME_C A_LENGTH_COL B_TIME_C B_LENGTH_COL
---------- -------- ------------ -------- ------------
1 00:01:00 2 00:23:00 3
2 00:11:22 2
3 01:01:00 45
I've left the rnk value in the output; if you don't want that you can list the columns in the select list:
select a_time_col, a_length_col, b_time_col, b_length_col
from ...
Or you could do the same thing with conditional aggregation (which is what pivot uses under the hood anyway):
select
max(case when name_col = 'a' then time_col end) as time_col_a,
max(case when name_col = 'a' then length_col end) as length_col_a,
max(case when name_col = 'b' then time_col end) as time_col_b,
max(case when name_col = 'b' then length_col end) as length_col_b
from (
select time_col, length_col, name_col,
dense_rank() over (partition by name_col order by time_col) as rnk
from your_table
)
group by rnk
order by rnk;
TIME_COL LENGTH_COL_A TIME_COL LENGTH_COL_B
-------- ------------ -------- ------------
00:01:00 2 00:23:00 3
00:11:22 2
01:01:00 45
db<>fiddle

Count column comma delimited values oracle

Is it possible to count and also group by comma delimited values in the oracle database table? This is a table data example:
id | user | title |
1 | foo | a,b,c |
2 | bar | a,d |
3 | tee | b |
The expected result would be:
title | count
a | 2
b | 2
c | 1
d | 1
I wanted to use concat like this:
SELECT a.title FROM Account a WHERE concat(',', a.title, ',') LIKE 'a' OR concat(',', a.title, ',') LIKE 'b' ... GROUP BY a.title?
But I'm getting invalid number of arguments on concat. The title values are predefined, therefore I don't mind if I have to list all of them in the query. Any help is greatly appreciated.
This uses simple string functions and a recursive sub-query factoring and may be faster than using regular expressions and correlated joins:
Oracle Setup:
CREATE TABLE account ( id, "user", title ) AS
SELECT 1, 'foo', 'a,b,c' FROM DUAL UNION ALL
SELECT 2, 'bar', 'a,d' FROM DUAL UNION ALL
SELECT 3, 'tee', 'b' FROM DUAL;
Query:
WITH positions ( title, start_pos, end_pos ) AS (
SELECT title,
1,
INSTR( title, ',', 1 )
FROM account
UNION ALL
SELECT title,
end_pos + 1,
INSTR( title, ',', end_pos + 1 )
FROM positions
WHERE end_pos > 0
),
items ( item ) AS (
SELECT CASE end_pos
WHEN 0
THEN SUBSTR( title, start_pos )
ELSE SUBSTR( title, start_pos, end_pos - start_pos )
END
FROM positions
)
SELECT item,
COUNT(*)
FROM items
GROUP BY item
ORDER BY item;
Output:
ITEM | COUNT(*)
:--- | -------:
a | 2
b | 2
c | 1
d | 1
db<>fiddle here
Split titles to rows and count them.
SQL> with test (id, title) as
2 (select 1, 'a,b,c' from dual union all
3 select 2, 'a,d' from dual union all
4 select 3, 'b' from dual
5 ),
6 temp as
7 (select regexp_substr(title, '[^,]', 1, column_value) val
8 from test cross join table(cast(multiset(select level from dual
9 connect by level <= regexp_count(title, ',') + 1
10 ) as sys.odcinumberlist))
11 )
12 select val as title,
13 count(*)
14 From temp
15 group by val
16 order by val;
TITLE COUNT(*)
-------------------- ----------
a 2
b 2
c 1
d 1
SQL>
If titles aren't that simple, then modify REGEXP_SUBSTR (add + sign) in line #7, e.g.
SQL> with test (id, title) as
2 (select 1, 'Robin Hood,Avatar,Star Wars Episode III' from dual union all
3 select 2, 'Mickey Mouse,Avatar' from dual union all
4 select 3, 'The Godfather' from dual
5 ),
6 temp as
7 (select regexp_substr(title, '[^,]+', 1, column_value) val
8 from test cross join table(cast(multiset(select level from dual
9 connect by level <= regexp_count(title, ',') + 1
10 ) as sys.odcinumberlist))
11 )
12 select val as title,
13 count(*)
14 From temp
15 group by val
16 order by val;
TITLE COUNT(*)
------------------------------ ----------
Avatar 2
Mickey Mouse 1
Robin Hood 1
Star Wars Episode III 1
The Godfather 1
SQL>

Need a query to seperate char values and number values from table in oracle [closed]

Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 3 years ago.
Improve this question
I have table TAB1 which is having one column COL1. as shown below.
TAB1
COL1
123
Xyz
CM
44
I need single query which will give following output.
Ccol | Ncol
Xyz | 123
CM | 45
From Oracle 12 you can define a function in a sub-query factoring clause and this can easily determine whether a value is numeric:
Oracle Setup:
CREATE TABLE table_name (COL1) AS
SELECT '123' FROM DUAL UNION ALL
SELECT 'Xyz' FROM DUAL UNION ALL
SELECT 'CM' FROM DUAL UNION ALL
SELECT '44' FROM DUAL UNION ALL
SELECT '1E3' FROM DUAL UNION ALL
SELECT '-1.2' FROM DUAL
Query:
WITH
FUNCTION isNumeric( value VARCHAR2 ) RETURN NUMBER
IS
n NUMBER;
BEGIN
n := TO_NUMBER( value );
RETURN 1;
EXCEPTION
WHEN OTHERS THEN
RETURN 0;
END;
SELECT Ccol,
TO_NUMBER( Ncol ) AS Ncol
FROM (
SELECT col1,
isNumeric( col1 ) AS isNumber,
ROW_NUMBER() OVER ( PARTITION BY isNumeric( col1 ) ORDER BY ROWNUM ) AS rn
FROM table_name
)
PIVOT ( MAX( Col1 ) FOR isNumber IN ( 0 AS Ccol, 1 AS Ncol ) )
ORDER BY rn
Output:
CCOL | NCOL
:--- | ---:
Xyz | 123
CM | 44
null | 1000
null | -1.2
db<>fiddle here
In earlier versions you can use CREATE FUNCTION rather than defining it in the query.
You can try this query:
WITH TAB1(COL1) AS
(
SELECT '123' FROM DUAL UNION ALL
SELECT 'Xyz' FROM DUAL UNION ALL
SELECT 'CM' FROM DUAL UNION ALL
SELECT '44' FROM DUAL
)
-- Actual query starts from here
, CTE AS (SELECT
COL1,
NUMERIC,
ROW_NUMBER() OVER(
PARTITION BY NUMERIC
ORDER BY LENGTH(COL1) DESC -- here I considered that Xyz and 123 both have length 3 and are related and same for CM and 44
) AS RN
FROM
(
SELECT
COL1,
CASE
WHEN REGEXP_LIKE ( COL1,
'^[[:digit:]]+$' ) THEN 'NUMBER'
ELSE 'NOT NUMBER'
END AS NUMERIC
FROM
TAB1
))
SELECT
C.COL1 AS "Ccol",
N.COL1 AS "Ncol"
FROM
CTE N
FULL OUTER JOIN CTE C ON ( N.RN = C.RN )
WHERE
N.NUMERIC = 'NUMBER'
AND C.NUMERIC = 'NOT NUMBER';
Output:
Cco Nco
--- ---
Xyz 123
CM 44
db<>fiddle demo
Cheers!!

calculate the average time difference between each stage

How to calculate the average time difference between each stage.
The challenge with the actual data set is not every id will go through all stages.. some will skip stages and the date is not continuous for all Id's like below.
id date status
1 1/1/18 requirement
1 1/8/18 analysis
1 ? design
1 1/30/18 closed
2 2/1/18 requirement
2 2/18/18 closed
3 1/2/18 requirement
3 1/29/18 analysis
3 ? accepted
3 2/5/18 closed
?--we have missing dates as well
Expected output
id date status time_spent
1 1/1/18 requirement 0
1 1/8/18 analysis 7
1 ? design
1 1/30/18 closed 22
2 2/1/18 requirement 0
2 2/18/18 closed 17
3 1/2/18 requirement 0
3 1/29/18 analysis 27
3 ? accepted
3 2/5/18 closed 24
status avg(timespent)
requirement 0
analysis 17
design
closed 21
You can use windowing functions LAG (or LEAD) to get the data of the previous (or next) status for each id. That will let you compute the time elapsed in each stage. Then, compute the average time elapsed for each stage.
Here is an example of how to do that:
with input_data (id, dte, status) as (
SELECT 1, TO_DATE('1/1/18','MM/DD/YY'), 'requirement' FROM DUAL UNION ALL
SELECT 1, TO_DATE('1/8/18','MM/DD/YY'), 'analysis' FROM DUAL UNION ALL
SELECT 1, NULL, 'design' FROM DUAL UNION ALL
SELECT 1, TO_DATE('1/30/18','MM/DD/YY'), 'closed' FROM DUAL UNION ALL
SELECT 2, TO_DATE('2/1/18','MM/DD/YY'), 'requirement' FROM DUAL UNION ALL
SELECT 2, TO_DATE('2/18/18','MM/DD/YY'), 'closed' FROM DUAL UNION ALL
SELECT 3, TO_DATE('1/2/18','MM/DD/YY'), 'requirement' FROM DUAL UNION ALL
SELECT 3, TO_DATE('1/29/18','MM/DD/YY'), 'analysis' FROM DUAL UNION ALL
SELECT 3, NULL, 'accepted' FROM DUAL UNION ALL
SELECT 3, TO_DATE('2/5/18','MM/DD/YY'), 'closed' FROM DUAL ),
----- Solution begins here
data_with_elapsed_days as (
SELECT id.*, dte-nvl(lag(dte ignore nulls) over ( partition by id order by dte ), dte) elapsed
from input_data id)
SELECT status, avg(elapsed)
FROM data_with_elapsed_days d
group by status
order by decode(status,'requirement',1,'analysis',2,'design',3,'accepted',4,'closed',5,99);
+-------------+-------------------------------------------+
| STATUS | AVG(ELAPSED) |
+-------------+-------------------------------------------+
| requirement | 0 |
| analysis | 17 |
| design | |
| accepted | |
| closed | 15.33333333333333333333333333333333333333 |
+-------------+-------------------------------------------+
As I said in my comment, that logic computes the elapsed days as the time to the given status from the prior status. Since, "requirement" has no prior status, this logic will always show zero days spent in requirements. It would probably be better to compute the time from the given status to the next status. For "closed", there would be no next status. You could just leave that blank or use SYSDATE as the data of the next status. Here is an example of that:
with input_data (id, dte, status) as (
SELECT 1, TO_DATE('1/1/18','MM/DD/YY'), 'requirement' FROM DUAL UNION ALL
SELECT 1, TO_DATE('1/8/18','MM/DD/YY'), 'analysis' FROM DUAL UNION ALL
SELECT 1, NULL, 'design' FROM DUAL UNION ALL
SELECT 1, TO_DATE('1/30/18','MM/DD/YY'), 'closed' FROM DUAL UNION ALL
SELECT 2, TO_DATE('2/1/18','MM/DD/YY'), 'requirement' FROM DUAL UNION ALL
SELECT 2, TO_DATE('2/18/18','MM/DD/YY'), 'closed' FROM DUAL UNION ALL
SELECT 3, TO_DATE('1/2/18','MM/DD/YY'), 'requirement' FROM DUAL UNION ALL
SELECT 3, TO_DATE('1/29/18','MM/DD/YY'), 'analysis' FROM DUAL UNION ALL
SELECT 3, NULL, 'accepted' FROM DUAL UNION ALL
SELECT 3, TO_DATE('2/5/18','MM/DD/YY'), 'closed' FROM DUAL ),
----- Solution begins here
data_with_elapsed_days as (
SELECT id.*, nvl(lead(dte ignore nulls) over ( partition by id order by dte ), trunc(sysdate))-dte elapsed
from input_data id)
SELECT status, avg(elapsed)
FROM data_with_elapsed_days d
group by status
order by decode(status,'requirement',1,'analysis',2,'design',3,'accepted',4,'closed',5,99);
+-------------+------------------------------------------+
| STATUS | AVG(ELAPSED) |
+-------------+------------------------------------------+
| requirement | 17 |
| analysis | 14.5 |
| design | |
| accepted | |
| closed | 361.666666666666666666666666666666666667 |
+-------------+------------------------------------------+
I agree with #MatthewMcPeak. Your requirements seem a bit odd: you spend zero days of requirement stage but spend an average of 21 days on closed? Fnord.
This solution treats the presented date as the start date of the stage and calculates the difference between it and the start_date of the next phase.
with cte as (
select status
, lead(dd ignore nulls) over (partition by id order by dd) - dd as dt_diff
from your_table)
select status, avg(dt_diff) as avg_ela
from cte
group by status
/
If you wish to include all stages for each d and estimate the time spent in each (using linear interpolation) then you can create a sub-query with all the statuses and use a PARTITION OUTER JOIN to join them and then use LAG and LEAD to find the date range the status is in and interpolate between:
Oracle Setup:
CREATE TABLE data ( d, dt, status ) AS
SELECT 1, TO_DATE( '1/1/18', 'MM/DD/YY' ), 'requirement' FROM DUAL UNION ALL
SELECT 1, TO_DATE( '1/8/18', 'MM/DD/YY' ), 'analysis' FROM DUAL UNION ALL
SELECT 1, NULL, 'design' FROM DUAL UNION ALL
SELECT 1, TO_DATE( '1/30/18', 'MM/DD/YY' ), 'closed' FROM DUAL UNION ALL
SELECT 2, TO_DATE( '2/1/18', 'MM/DD/YY' ), 'requirement' FROM DUAL UNION ALL
SELECT 2, TO_DATE( '2/18/18', 'MM/DD/YY' ), 'closed' FROM DUAL UNION ALL
SELECT 3, TO_DATE( '1/2/18', 'MM/DD/YY' ), 'requirement' FROM DUAL UNION ALL
SELECT 3, TO_DATE( '1/29/18', 'MM/DD/YY' ), 'analysis' FROM DUAL UNION ALL
SELECT 3, NULL, 'accepted' FROM DUAL UNION ALL
SELECT 3, TO_DATE( '2/5/18', 'MM/DD/YY' ), 'closed' FROM DUAL;
Query:
WITH statuses ( status, id ) AS (
SELECT 'requirement', 1 FROM DUAL UNION ALL
SELECT 'analysis', 2 FROM DUAL UNION ALL
SELECT 'design', 3 FROM DUAL UNION ALL
SELECT 'accepted', 4 FROM DUAL UNION ALL
SELECT 'closed', 5 FROM DUAL
),
ranges ( d, dt, status, id, recent_dt, recent_id, next_dt, next_id ) AS (
SELECT d.d,
d.dt,
s.status,
s.id,
NVL(
d.dt,
LAG( d.dt, 1 )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id )
),
NVL2(
d.dt,
s.id,
LAG( CASE WHEN d.dt IS NOT NULL THEN s.id END, 1 )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id )
),
LEAD( d.dt, 1, d.dt )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id ),
LEAD( CASE WHEN d.dt IS NOT NULL THEN s.id END, 1, s.id + 1 )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id )
FROM data d
PARTITION BY ( d )
RIGHT OUTER JOIN statuses s
ON ( d.status = s.status )
)
SELECT d,
dt,
status,
( next_dt - recent_dt ) / (next_id - recent_id ) AS estimated_duration
FROM ranges;
Output:
D | DT | STATUS | ESTIMATED_DURATION
-: | :-------- | :---------- | ---------------------------------------:
1 | 01-JAN-18 | requirement | 7
1 | 08-JAN-18 | analysis | 7.33333333333333333333333333333333333333
1 | null | design | 7.33333333333333333333333333333333333333
1 | null | accepted | 7.33333333333333333333333333333333333333
1 | 30-JAN-18 | closed | 0
2 | 01-FEB-18 | requirement | 4.25
2 | null | analysis | 4.25
2 | null | design | 4.25
2 | null | accepted | 4.25
2 | 18-FEB-18 | closed | 0
3 | 02-JAN-18 | requirement | 27
3 | 29-JAN-18 | analysis | 2.33333333333333333333333333333333333333
3 | null | design | 2.33333333333333333333333333333333333333
3 | null | accepted | 2.33333333333333333333333333333333333333
3 | 05-FEB-18 | closed | 0
Query 2:
Then of you can easily change that to take the average for each status:
WITH statuses ( status, id ) AS (
SELECT 'requirement', 1 FROM DUAL UNION ALL
SELECT 'analysis', 2 FROM DUAL UNION ALL
SELECT 'design', 3 FROM DUAL UNION ALL
SELECT 'accepted', 4 FROM DUAL UNION ALL
SELECT 'closed', 5 FROM DUAL
),
ranges ( d, dt, status, id, recent_dt, recent_id, next_dt, next_id ) AS (
SELECT d.d,
d.dt,
s.status,
s.id,
NVL(
d.dt,
LAG( d.dt, 1 )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id )
),
NVL2(
d.dt,
s.id,
LAG( CASE WHEN d.dt IS NOT NULL THEN s.id END, 1 )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id )
),
LEAD( d.dt, 1, d.dt )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id ),
LEAD( CASE WHEN d.dt IS NOT NULL THEN s.id END, 1, s.id + 1 )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id )
FROM data d
PARTITION BY ( d )
RIGHT OUTER JOIN statuses s
ON ( d.status = s.status )
)
SELECT status,
AVG( ( next_dt - recent_dt ) / (next_id - recent_id ) ) AS estimated_duration
FROM ranges
GROUP BY status, id
ORDER BY id;
Results:
STATUS | ESTIMATED_DURATION
:---------- | ---------------------------------------:
requirement | 12.75
analysis | 4.63888888888888888888888888888888888889
design | 4.63888888888888888888888888888888888889
accepted | 4.63888888888888888888888888888888888889
closed | 0
db<>fiddle here

REGEXP to capture values delimited by a set of delimiters

My column value looks something like below: [Just an example i created]
{BASICINFOxxxFyyy100x} {CONTACTxxx12345yyy20202x}
It can contain 0 or more blocks of data... I have created the below query to split the blocks
with x as
(select
'{BASICINFOxxxFyyy100x}{CONTACTxxx12345yyy20202x}' a from dual)
select REGEXP_SUBSTR(a,'({.*?x})',1,rownum,null,1)
from x
connect by rownum <= REGEXP_COUNT(a,'x}')
However I would like to further split the output into 3 columns like below:
ColumnA | ColumnB | ColumnC
------------------------------
BASICINFO | F |100
CONTACT | 12345 |20202
The delimiters are always standard. I failed to create a pretty query which gives me the desired output.
Thanks in advance.
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE your_table ( str ) AS
SELECT '{BASICINFOxxxFyyy100x}{CONTACTxxx12345yyy20202x}' from dual
/
Query 1:
select REGEXP_SUBSTR(
t.str,
'\{([^}]*?)xxx([^}]*?)yyy([^}]*?)x\}',
1,
l.COLUMN_VALUE,
NULL,
1
) AS col1,
REGEXP_SUBSTR(
str,
'\{([^}]*?)xxx([^}]*?)yyy([^}]*?)x\}',
1,
l.COLUMN_VALUE,
NULL,
2
) AS col2,
REGEXP_SUBSTR(
str,
'\{([^}]*?)xxx([^}]*?)yyy([^}]*?)x\}',
1,
l.COLUMN_VALUE,
NULL,
3
) AS col3
FROM your_table t
CROSS JOIN
TABLE(
CAST(
MULTISET(
SELECT LEVEL
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.str,'\{([^}]*?)xxx([^}]*?)yyy([^}]*?)x\}')
) AS SYS.ODCINUMBERLIST
)
) l
Results:
| COL1 | COL2 | COL3 |
|-----------|-------|-------|
| BASICINFO | F | 100 |
| CONTACT | 12345 | 20202 |
Note:
Your query:
select REGEXP_SUBSTR(a,'({.*?x})',1,rownum,null,1)
from x
connect by rownum <= REGEXP_COUNT(a,'x}')
Will not work when you have multiple rows of input - In the CONNECT BY clause, the hierarchical query has nothing to restrict it connecting Row1-Level2 to Row1-Level1 or to Row2-Level1 so it will connect it to both and as the depth of the hierarchies gets greater it will create exponentially more duplicate copies of the output rows. There are hacks you can use to stop this but it is much more efficient to put the row generator into a correlated sub-query which can then be CROSS JOINed back to the original table (it is correlated so it won't join to the wrong rows) if you are going to use hierarchical queries.
Better yet would be to fix your data structure so you are not storing multiple values in delimited strings.
SQL> with x as
2 (select '{BASICINFOxxxFyyy100x}{CONTACTxxx12345yyy20202x}' a from dual
3 ),
4 y as (
5 select REGEXP_SUBSTR(a,'({.*?x})',1,rownum,null,1) c1
6 from x
7 connect by rownum <= REGEXP_COUNT(a,'x}')
8 )
9 select
10 substr(c1,2,instr(c1,'xxx')-2) z1,
11 substr(c1,instr(c1,'xxx')+3,instr(c1,'yyy')-instr(c1,'xxx')-3) z2,
12 rtrim(substr(c1,instr(c1,'yyy')+3),'x}') z3
13 from y;
Z1 Z2 Z3
--------------- --------------- ---------------
BASICINFO F 100
CONTACT 12345 20202
Here is another solution, which is derived from the place you left. Your query had already resulted into splitting of a row to 2 row. Below will make it in 3 columns:
WITH x
AS (SELECT '{BASICINFOxxxFyyy100x}{CONTACTxxx12345yyy20202x}' a
FROM DUAL),
-- Your query result here
tbl
AS ( SELECT REGEXP_SUBSTR (a,
'({.*?x})',
1,
ROWNUM,
NULL,
1)
Col
FROM x
CONNECT BY ROWNUM <= REGEXP_COUNT (a, 'x}'))
--- Actual Query
SELECT col,
REGEXP_SUBSTR (col,
'(.*?{)([^x]+)',
1,
1,
'',
2)
AS COL1,
REGEXP_SUBSTR (REGEXP_SUBSTR (col,
'(.*?)([^x]+)',
1,
2,
'',
2),
'[^y]+',
1,
1)
AS COL2,
REGEXP_SUBSTR (REGEXP_SUBSTR (col,
'[^y]+x',
1,
2),
'[^x]+',
1,
1)
AS COL3
FROM tbl;
Output:
SQL> /
COL COL1 COL2 COL3
------------------------------------------------ ------------------------------------------------ ------------------------------------------------ ------------------------------------------------
{BASICINFOxxxFyyy100x} BASICINFO F 100
{CONTACTxxx12345yyy20202x} CONTACT 12345 20202

Resources