I want to retrieve multible columns, sum of weight data from a table over a whole month. what I need help with is that I want to group the result into 2 parts sum of 1-15 of the month and second line 16-31 of the month.
Select TO_CHAR(sysdate) dummy
(SELECT(SUM(B.SCALE_WEIGHT) FROM TRACKING.DATALOG_TAB B WHERE B.MATERIALID= 1
AND B.SCALE_EVENTDATE BETWEEN TO_DATE(TRUNC(TO_DATE('2020-10-1', 'YYYY-MM-
DD'),'MONTH')) AND TO_DATE(TRUNC(TO_DATE('2020-11-1', 'YYYY-MM-DD'),
'MONTH')+16)) as MTRL1,
(SELECT(SUM(B.SCALE_WEIGHT) FROM TRACKING.DATALOG_TAB B WHERE B.MATERIALID= 2
AND B.SCALE_EVENTDATE BETWEEN TO_DATE(TRUNC(TO_DATE('2020-10-1', 'YYYY-MM-
DD'),'MONTH')) AND TO_DATE(TRUNC(TO_DATE('2020-11-1', 'YYYY-MM-DD'),
'MONTH')+16)) as MTRL2
FROM DUAL
GROUP BY(somthing like this - 1-15 and 16-31);
UPDATE
the result should look like this
To me, it looks like this:
select
sum(case when b.materialid = 1 and
to_number(to_char(b.scale_eventdate, 'dd')) between 1 and 15 then
b.scale_weight
end) mtrl1,
--
sum(case when b.materialid = 2 and
to_number(to_char(b.scale_eventdate, 'dd')) between 16 and 31 then
b.scale_weight
end) mtrl2
from datalog_tab b
where to_char(b.scale_eventdate, 'yyyymm') = '202010'
In other words, check whether day of scale_eventdate column belongs to 1st or 2nd half of the month and sum scale_weight accordingly.
If you have the sample data:
CREATE TABLE tracking.datalog_tab ( materialid, scale_eventdate, scale_weight ) AS
SELECT 1, DATE '2020-10-01', 1 FROM DUAL UNION ALL
SELECT 1, DATE '2020-10-15', 2 FROM DUAL UNION ALL
SELECT 1, DATE '2020-10-16', 3 FROM DUAL UNION ALL
SELECT 1, DATE '2020-10-31', 4 FROM DUAL UNION ALL
SELECT 2, DATE '2020-10-01', -1 FROM DUAL UNION ALL
SELECT 2, DATE '2020-10-15', -2 FROM DUAL UNION ALL
SELECT 2, DATE '2020-10-16', -3 FROM DUAL UNION ALL
SELECT 2, DATE '2020-10-31', -4 FROM DUAL;
You can use:
SELECT MATERIALID,
CASE
WHEN EXTRACT( DAY FROM SCALE_EVENTDATE ) <= 15
THEN ' 1-15'
ELSE '16-31'
END AS day_range,
SUM(SCALE_WEIGHT)
FROM TRACKING.DATALOG_TAB
WHERE MATERIALID IN ( 1, 2 )
AND SCALE_EVENTDATE >= DATE '2020-10-01'
AND SCALE_EVENTDATE < DATE '2020-11-01'
GROUP BY
MATERIALID,
CASE
WHEN EXTRACT( DAY FROM SCALE_EVENTDATE ) <= 15
THEN ' 1-15'
ELSE '16-31'
END;
Which outputs:
MATERIALID | DAY_RANGE | SUM(SCALE_WEIGHT)
---------: | :-------- | ----------------:
1 | 1-15 | 3
2 | 1-15 | -3
1 | 16-31 | 7
2 | 16-31 | -7
Or, if you want them as columns then PIVOT:
SELECT *
FROM (
SELECT MATERIALID,
CASE
WHEN EXTRACT( DAY FROM SCALE_EVENTDATE ) <= 15
THEN ' 1-15'
ELSE '16-31'
END AS day_range,
SCALE_WEIGHT
FROM TRACKING.DATALOG_TAB
WHERE MATERIALID IN ( 1, 2 )
AND SCALE_EVENTDATE >= DATE '2020-10-01'
AND SCALE_EVENTDATE < DATE '2020-11-01'
)
PIVOT (
SUM( scale_weight ) FOR ( materialid, day_range ) IN (
( 1, ' 1-15' ) AS mtrl1_01_15,
( 1, '16-31' ) AS mtrl1_16_31,
( 2, ' 1-15' ) AS mtrl2_01_15,
( 2, '16-31' ) AS mtrl2_16_31
)
);
Which outputs:
MTRL1_01_15 | MTRL1_16_31 | MTRL2_01_15 | MTRL2_16_31
----------: | ----------: | ----------: | ----------:
3 | 7 | -3 | -7
db<>fiddle here
Update
SELECT *
FROM (
SELECT MATERIALID,
CASE
WHEN EXTRACT( DAY FROM SCALE_EVENTDATE ) <= 15
THEN ' 1-15 '
ELSE '16-31 '
END
|| TO_CHAR( scale_eventdate, 'Mon' ) AS date_range,
SCALE_WEIGHT
FROM /*TRACKING.*/DATALOG_TAB
WHERE MATERIALID IN ( 1, 2, 3 )
AND SCALE_EVENTDATE >= DATE '2020-10-01'
AND SCALE_EVENTDATE < DATE '2020-11-01'
)
PIVOT (
SUM( scale_weight ) FOR materialid IN (
1 AS sum_mtrl1_weight,
2 AS sum_mtrl2_weight,
3 AS sum_mtrl3_weight
)
);
Which, for the sample data:
CREATE TABLE /*TRACKING.*/datalog_tab ( materialid, scale_eventdate, scale_weight ) AS
SELECT 1, DATE '2020-10-01', 25 FROM DUAL UNION ALL
SELECT 1, DATE '2020-10-15', 75 FROM DUAL UNION ALL
SELECT 1, DATE '2020-10-16', 125 FROM DUAL UNION ALL
SELECT 1, DATE '2020-10-31', 375 FROM DUAL UNION ALL
SELECT 2, DATE '2020-10-01', 90 FROM DUAL UNION ALL
SELECT 2, DATE '2020-10-15', 110 FROM DUAL UNION ALL
SELECT 2, DATE '2020-10-16', 90 FROM DUAL UNION ALL
SELECT 2, DATE '2020-10-31', 125 FROM DUAL UNION ALL
SELECT 3, DATE '2020-10-01', 120 FROM DUAL UNION ALL
SELECT 3, DATE '2020-10-16', 120 FROM DUAL UNION ALL
SELECT 3, DATE '2020-10-31', 240 FROM DUAL;
Outputs:
DATE_RANGE | SUM_MTRL1_WEIGHT | SUM_MTRL2_WEIGHT | SUM_MTRL3_WEIGHT
:--------- | ---------------: | ---------------: | ---------------:
1-15 Oct | 100 | 200 | 120
16-31 Oct | 500 | 215 | 360
db<>fiddle here
Related
Is it possible to count and also group by comma delimited values in the oracle database table? This is a table data example:
id | user | title |
1 | foo | a,b,c |
2 | bar | a,d |
3 | tee | b |
The expected result would be:
title | count
a | 2
b | 2
c | 1
d | 1
I wanted to use concat like this:
SELECT a.title FROM Account a WHERE concat(',', a.title, ',') LIKE 'a' OR concat(',', a.title, ',') LIKE 'b' ... GROUP BY a.title?
But I'm getting invalid number of arguments on concat. The title values are predefined, therefore I don't mind if I have to list all of them in the query. Any help is greatly appreciated.
This uses simple string functions and a recursive sub-query factoring and may be faster than using regular expressions and correlated joins:
Oracle Setup:
CREATE TABLE account ( id, "user", title ) AS
SELECT 1, 'foo', 'a,b,c' FROM DUAL UNION ALL
SELECT 2, 'bar', 'a,d' FROM DUAL UNION ALL
SELECT 3, 'tee', 'b' FROM DUAL;
Query:
WITH positions ( title, start_pos, end_pos ) AS (
SELECT title,
1,
INSTR( title, ',', 1 )
FROM account
UNION ALL
SELECT title,
end_pos + 1,
INSTR( title, ',', end_pos + 1 )
FROM positions
WHERE end_pos > 0
),
items ( item ) AS (
SELECT CASE end_pos
WHEN 0
THEN SUBSTR( title, start_pos )
ELSE SUBSTR( title, start_pos, end_pos - start_pos )
END
FROM positions
)
SELECT item,
COUNT(*)
FROM items
GROUP BY item
ORDER BY item;
Output:
ITEM | COUNT(*)
:--- | -------:
a | 2
b | 2
c | 1
d | 1
db<>fiddle here
Split titles to rows and count them.
SQL> with test (id, title) as
2 (select 1, 'a,b,c' from dual union all
3 select 2, 'a,d' from dual union all
4 select 3, 'b' from dual
5 ),
6 temp as
7 (select regexp_substr(title, '[^,]', 1, column_value) val
8 from test cross join table(cast(multiset(select level from dual
9 connect by level <= regexp_count(title, ',') + 1
10 ) as sys.odcinumberlist))
11 )
12 select val as title,
13 count(*)
14 From temp
15 group by val
16 order by val;
TITLE COUNT(*)
-------------------- ----------
a 2
b 2
c 1
d 1
SQL>
If titles aren't that simple, then modify REGEXP_SUBSTR (add + sign) in line #7, e.g.
SQL> with test (id, title) as
2 (select 1, 'Robin Hood,Avatar,Star Wars Episode III' from dual union all
3 select 2, 'Mickey Mouse,Avatar' from dual union all
4 select 3, 'The Godfather' from dual
5 ),
6 temp as
7 (select regexp_substr(title, '[^,]+', 1, column_value) val
8 from test cross join table(cast(multiset(select level from dual
9 connect by level <= regexp_count(title, ',') + 1
10 ) as sys.odcinumberlist))
11 )
12 select val as title,
13 count(*)
14 From temp
15 group by val
16 order by val;
TITLE COUNT(*)
------------------------------ ----------
Avatar 2
Mickey Mouse 1
Robin Hood 1
Star Wars Episode III 1
The Godfather 1
SQL>
How to calculate the average time difference between each stage.
The challenge with the actual data set is not every id will go through all stages.. some will skip stages and the date is not continuous for all Id's like below.
id date status
1 1/1/18 requirement
1 1/8/18 analysis
1 ? design
1 1/30/18 closed
2 2/1/18 requirement
2 2/18/18 closed
3 1/2/18 requirement
3 1/29/18 analysis
3 ? accepted
3 2/5/18 closed
?--we have missing dates as well
Expected output
id date status time_spent
1 1/1/18 requirement 0
1 1/8/18 analysis 7
1 ? design
1 1/30/18 closed 22
2 2/1/18 requirement 0
2 2/18/18 closed 17
3 1/2/18 requirement 0
3 1/29/18 analysis 27
3 ? accepted
3 2/5/18 closed 24
status avg(timespent)
requirement 0
analysis 17
design
closed 21
You can use windowing functions LAG (or LEAD) to get the data of the previous (or next) status for each id. That will let you compute the time elapsed in each stage. Then, compute the average time elapsed for each stage.
Here is an example of how to do that:
with input_data (id, dte, status) as (
SELECT 1, TO_DATE('1/1/18','MM/DD/YY'), 'requirement' FROM DUAL UNION ALL
SELECT 1, TO_DATE('1/8/18','MM/DD/YY'), 'analysis' FROM DUAL UNION ALL
SELECT 1, NULL, 'design' FROM DUAL UNION ALL
SELECT 1, TO_DATE('1/30/18','MM/DD/YY'), 'closed' FROM DUAL UNION ALL
SELECT 2, TO_DATE('2/1/18','MM/DD/YY'), 'requirement' FROM DUAL UNION ALL
SELECT 2, TO_DATE('2/18/18','MM/DD/YY'), 'closed' FROM DUAL UNION ALL
SELECT 3, TO_DATE('1/2/18','MM/DD/YY'), 'requirement' FROM DUAL UNION ALL
SELECT 3, TO_DATE('1/29/18','MM/DD/YY'), 'analysis' FROM DUAL UNION ALL
SELECT 3, NULL, 'accepted' FROM DUAL UNION ALL
SELECT 3, TO_DATE('2/5/18','MM/DD/YY'), 'closed' FROM DUAL ),
----- Solution begins here
data_with_elapsed_days as (
SELECT id.*, dte-nvl(lag(dte ignore nulls) over ( partition by id order by dte ), dte) elapsed
from input_data id)
SELECT status, avg(elapsed)
FROM data_with_elapsed_days d
group by status
order by decode(status,'requirement',1,'analysis',2,'design',3,'accepted',4,'closed',5,99);
+-------------+-------------------------------------------+
| STATUS | AVG(ELAPSED) |
+-------------+-------------------------------------------+
| requirement | 0 |
| analysis | 17 |
| design | |
| accepted | |
| closed | 15.33333333333333333333333333333333333333 |
+-------------+-------------------------------------------+
As I said in my comment, that logic computes the elapsed days as the time to the given status from the prior status. Since, "requirement" has no prior status, this logic will always show zero days spent in requirements. It would probably be better to compute the time from the given status to the next status. For "closed", there would be no next status. You could just leave that blank or use SYSDATE as the data of the next status. Here is an example of that:
with input_data (id, dte, status) as (
SELECT 1, TO_DATE('1/1/18','MM/DD/YY'), 'requirement' FROM DUAL UNION ALL
SELECT 1, TO_DATE('1/8/18','MM/DD/YY'), 'analysis' FROM DUAL UNION ALL
SELECT 1, NULL, 'design' FROM DUAL UNION ALL
SELECT 1, TO_DATE('1/30/18','MM/DD/YY'), 'closed' FROM DUAL UNION ALL
SELECT 2, TO_DATE('2/1/18','MM/DD/YY'), 'requirement' FROM DUAL UNION ALL
SELECT 2, TO_DATE('2/18/18','MM/DD/YY'), 'closed' FROM DUAL UNION ALL
SELECT 3, TO_DATE('1/2/18','MM/DD/YY'), 'requirement' FROM DUAL UNION ALL
SELECT 3, TO_DATE('1/29/18','MM/DD/YY'), 'analysis' FROM DUAL UNION ALL
SELECT 3, NULL, 'accepted' FROM DUAL UNION ALL
SELECT 3, TO_DATE('2/5/18','MM/DD/YY'), 'closed' FROM DUAL ),
----- Solution begins here
data_with_elapsed_days as (
SELECT id.*, nvl(lead(dte ignore nulls) over ( partition by id order by dte ), trunc(sysdate))-dte elapsed
from input_data id)
SELECT status, avg(elapsed)
FROM data_with_elapsed_days d
group by status
order by decode(status,'requirement',1,'analysis',2,'design',3,'accepted',4,'closed',5,99);
+-------------+------------------------------------------+
| STATUS | AVG(ELAPSED) |
+-------------+------------------------------------------+
| requirement | 17 |
| analysis | 14.5 |
| design | |
| accepted | |
| closed | 361.666666666666666666666666666666666667 |
+-------------+------------------------------------------+
I agree with #MatthewMcPeak. Your requirements seem a bit odd: you spend zero days of requirement stage but spend an average of 21 days on closed? Fnord.
This solution treats the presented date as the start date of the stage and calculates the difference between it and the start_date of the next phase.
with cte as (
select status
, lead(dd ignore nulls) over (partition by id order by dd) - dd as dt_diff
from your_table)
select status, avg(dt_diff) as avg_ela
from cte
group by status
/
If you wish to include all stages for each d and estimate the time spent in each (using linear interpolation) then you can create a sub-query with all the statuses and use a PARTITION OUTER JOIN to join them and then use LAG and LEAD to find the date range the status is in and interpolate between:
Oracle Setup:
CREATE TABLE data ( d, dt, status ) AS
SELECT 1, TO_DATE( '1/1/18', 'MM/DD/YY' ), 'requirement' FROM DUAL UNION ALL
SELECT 1, TO_DATE( '1/8/18', 'MM/DD/YY' ), 'analysis' FROM DUAL UNION ALL
SELECT 1, NULL, 'design' FROM DUAL UNION ALL
SELECT 1, TO_DATE( '1/30/18', 'MM/DD/YY' ), 'closed' FROM DUAL UNION ALL
SELECT 2, TO_DATE( '2/1/18', 'MM/DD/YY' ), 'requirement' FROM DUAL UNION ALL
SELECT 2, TO_DATE( '2/18/18', 'MM/DD/YY' ), 'closed' FROM DUAL UNION ALL
SELECT 3, TO_DATE( '1/2/18', 'MM/DD/YY' ), 'requirement' FROM DUAL UNION ALL
SELECT 3, TO_DATE( '1/29/18', 'MM/DD/YY' ), 'analysis' FROM DUAL UNION ALL
SELECT 3, NULL, 'accepted' FROM DUAL UNION ALL
SELECT 3, TO_DATE( '2/5/18', 'MM/DD/YY' ), 'closed' FROM DUAL;
Query:
WITH statuses ( status, id ) AS (
SELECT 'requirement', 1 FROM DUAL UNION ALL
SELECT 'analysis', 2 FROM DUAL UNION ALL
SELECT 'design', 3 FROM DUAL UNION ALL
SELECT 'accepted', 4 FROM DUAL UNION ALL
SELECT 'closed', 5 FROM DUAL
),
ranges ( d, dt, status, id, recent_dt, recent_id, next_dt, next_id ) AS (
SELECT d.d,
d.dt,
s.status,
s.id,
NVL(
d.dt,
LAG( d.dt, 1 )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id )
),
NVL2(
d.dt,
s.id,
LAG( CASE WHEN d.dt IS NOT NULL THEN s.id END, 1 )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id )
),
LEAD( d.dt, 1, d.dt )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id ),
LEAD( CASE WHEN d.dt IS NOT NULL THEN s.id END, 1, s.id + 1 )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id )
FROM data d
PARTITION BY ( d )
RIGHT OUTER JOIN statuses s
ON ( d.status = s.status )
)
SELECT d,
dt,
status,
( next_dt - recent_dt ) / (next_id - recent_id ) AS estimated_duration
FROM ranges;
Output:
D | DT | STATUS | ESTIMATED_DURATION
-: | :-------- | :---------- | ---------------------------------------:
1 | 01-JAN-18 | requirement | 7
1 | 08-JAN-18 | analysis | 7.33333333333333333333333333333333333333
1 | null | design | 7.33333333333333333333333333333333333333
1 | null | accepted | 7.33333333333333333333333333333333333333
1 | 30-JAN-18 | closed | 0
2 | 01-FEB-18 | requirement | 4.25
2 | null | analysis | 4.25
2 | null | design | 4.25
2 | null | accepted | 4.25
2 | 18-FEB-18 | closed | 0
3 | 02-JAN-18 | requirement | 27
3 | 29-JAN-18 | analysis | 2.33333333333333333333333333333333333333
3 | null | design | 2.33333333333333333333333333333333333333
3 | null | accepted | 2.33333333333333333333333333333333333333
3 | 05-FEB-18 | closed | 0
Query 2:
Then of you can easily change that to take the average for each status:
WITH statuses ( status, id ) AS (
SELECT 'requirement', 1 FROM DUAL UNION ALL
SELECT 'analysis', 2 FROM DUAL UNION ALL
SELECT 'design', 3 FROM DUAL UNION ALL
SELECT 'accepted', 4 FROM DUAL UNION ALL
SELECT 'closed', 5 FROM DUAL
),
ranges ( d, dt, status, id, recent_dt, recent_id, next_dt, next_id ) AS (
SELECT d.d,
d.dt,
s.status,
s.id,
NVL(
d.dt,
LAG( d.dt, 1 )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id )
),
NVL2(
d.dt,
s.id,
LAG( CASE WHEN d.dt IS NOT NULL THEN s.id END, 1 )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id )
),
LEAD( d.dt, 1, d.dt )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id ),
LEAD( CASE WHEN d.dt IS NOT NULL THEN s.id END, 1, s.id + 1 )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id )
FROM data d
PARTITION BY ( d )
RIGHT OUTER JOIN statuses s
ON ( d.status = s.status )
)
SELECT status,
AVG( ( next_dt - recent_dt ) / (next_id - recent_id ) ) AS estimated_duration
FROM ranges
GROUP BY status, id
ORDER BY id;
Results:
STATUS | ESTIMATED_DURATION
:---------- | ---------------------------------------:
requirement | 12.75
analysis | 4.63888888888888888888888888888888888889
design | 4.63888888888888888888888888888888888889
accepted | 4.63888888888888888888888888888888888889
closed | 0
db<>fiddle here
Query :
select
TO_CHAR((to_date(IP_START_DATE,'DD-MM-YYYY HH24:MI:SS')+ (level-1)),'DD-MM-YYYY'),
TO_CHAR(to_date(IP_START_DATE,'DD-MM-YYYY HH24:MI:SS') + level,'DD-MM-YYYY') ,
to_number(regexp_substr(IP_PLAN_CONSUMPTION, '^\d+'))/(TO_DATE(IP_END_DATE, 'DD-MM-YYYY HH24:MI:SS') - TO_DATE(IP_START_DATE, 'DD-MM-YYYY HH24:MI:SS')) || regexp_substr(IP_PLAN_CONSUMPTION, '[A-Z]') as IP_PLAN_CONSUMPTION
FROM
dual
CONNECT BY
level <= to_date(IP_END_DATE,'DD-MM-YYYY HH24:MI:SS')-to_date(IP_START_DATE,'DD-MM-YYYY HH24:MI:SS')+1;
-> Data in Query :
select
TO_CHAR((to_date('16-07-2018 11:02','DD-MM-YYYY HH24:MI:SS')+ (level-1)),'DD-MM-YYYY'),
TO_CHAR(to_date('16-07-2018 11:02','DD-MM-YYYY HH24:MI:SS') + level,'DD-MM-YYYY'),
to_number(regexp_substr('4000 T', '^\d+'))/(TO_DATE('18-07-2018 00:00', 'DD-MM-YYYY HH24:MI:SS') - TO_DATE('16-07-2018 11:02', 'DD-MM-YYYY HH24:MI:SS')) || regexp_substr('4000 T', '[A-Z]') as IP_PLAN_CONSUMPTION
FROM
dual
CONNECT BY
level <= to_date('18-07-2018 00:00','DD-MM-YYYY HH24:MI:SS')-to_date('16-07-2018 11:02','DD-MM-YYYY HH24:MI:SS')+1;
Output will Be :
But its should be 2000 T
Not : If Start Date: 16-07-2018 00:00 & End Date : 19-07-2018 00:00 then Day Difference is 3 Days & Consumption is 4000 T then Inserted Consumption Should be 1333.333333333333 T ~ 1334 T in each date.
If you are storing dates, you should store them in your table as the DATE data type (and not as strings).
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE your_table( id, ip_start_date, ip_end_date, ip_plan_consumption ) AS
SELECT 1,
DATE '2018-07-16' + INTERVAL '11:02' HOUR TO MINUTE,
DATE '2018-07-18' + INTERVAL '00:00' HOUR TO MINUTE,
'4000 T'
FROM DUAL
UNION ALL
SELECT 2,
DATE '2018-07-16' + INTERVAL '11:02' HOUR TO MINUTE,
DATE '2018-07-16' + INTERVAL '23:08' HOUR TO MINUTE,
'3000 T'
FROM DUAL
UNION ALL
SELECT 3,
DATE '2018-07-10' + INTERVAL '00:00' HOUR TO MINUTE,
DATE '2018-07-13' + INTERVAL '23:59' HOUR TO MINUTE,
'15000 U'
FROM DUAL
;
Query 1:
WITH data ( id, start_dt, end_dt, consumption, units ) AS (
SELECT id,
TRUNC( IP_START_DATE ),
GREATEST( TRUNC( IP_START_DATE ) + 1, TRUNC( IP_END_DATE ) ),
TO_NUMBER( REGEXP_SUBSTR( IP_PLAN_CONSUMPTION, '^\d+' ) ),
REGEXP_SUBSTR( IP_PLAN_CONSUMPTION, '\S+$' )
FROM your_table
)
SELECT id,
t.column_value AS start_dt,
t.column_value + 1 AS end_dt,
consumption / ( end_dt - start_dt ) || units AS IP_PLAN_CONSUMPTION
FROM data d
CROSS JOIN
TABLE(
CAST(
MULTISET(
SELECT d.start_dt + LEVEL - 1
FROM DUAL
CONNECT BY d.start_dt + LEVEL - 1 < d.end_dt
)
AS SYS.ODCIDATELIST
)
) t
Results:
| ID | START_DT | END_DT | IP_PLAN_CONSUMPTION |
|----|----------------------|----------------------|---------------------|
| 1 | 2018-07-16T00:00:00Z | 2018-07-17T00:00:00Z | 2000T |
| 1 | 2018-07-17T00:00:00Z | 2018-07-18T00:00:00Z | 2000T |
| 2 | 2018-07-16T00:00:00Z | 2018-07-17T00:00:00Z | 3000T |
| 3 | 2018-07-10T00:00:00Z | 2018-07-11T00:00:00Z | 5000U |
| 3 | 2018-07-11T00:00:00Z | 2018-07-12T00:00:00Z | 5000U |
| 3 | 2018-07-12T00:00:00Z | 2018-07-13T00:00:00Z | 5000U |
I need to postprocess a Oracle dataset in order to find the number of heat waves.
By definition, a heat waves occurs when the data value is greater than a threshold at least two consecutive times.
For example, given the threshold=20 and the sequence
23 31 32 17 16 23 16 21 22 18
the heat waves are 2:
{23,31,32} and {21,22}
and the lenght of the longest one is 3 (size of bigger subset)
My input dataset consists of several sequences; a sample input result set is:
-----------------------------
| ID | DAY | VALUE |
-----------------------------
| 100 | 1/1/17 | 20 |
| 100 | 2/1/17 | 21 |
| 200 | 1/1/17 | 12 |
| 200 | 2/1/17 | 24 |
| ... ... ...
In other words, I have a sequence per each ID and I need to output something like that:
-----------------------
| ID | #heat waves |
-----------------------
| 100 | 3 |
| 200 | 1 |
Here the current version of my stored procedure:
create or replace PROCEDURE sp (
p_query IN VARCHAR2,
cursor_ out sys_refcursor
) AS
processed processed_data_table := processed_data_table();
c sys_refcursor;
BEGIN
OPEN c FOR p_query;
processed.EXTEND;
processed(processed.count) := processed_data_obj();
fetch c INTO processed(processed.count).ID,
processed(processed.count).DAY, processed(processed.count).VALUE;
while c%found
processed.EXTEND;
processed(processed.count) := processed_data_obj();
fetch c INTO processed(processed.count).ID,
processed(processed.count).DAY, processed(processed.count).VALUE;
END loop;
CLOSE c;
processed.TRIM;
// HERE I NEED TO PROCESS processed TABLE AND STORE RESULT IN output
TABLE
OPEN cursor_ FOR
SELECT *
FROM TABLE( output);
END sp;
Anyone could help me providing a solution?
Thanks
In Oracle 12c, use MATCH_RECOGNIZE:
select id, count(*) "# of heatwaves" from series_data
match_recognize ( partition by id
order by day
one row per match
after match skip past last row
pattern ( over_threshold{2,} )
define
over_threshold as value > 20 )
group by id
UPDATE: Also show longest heat wave for each series
To get the longest heatwave in each series, we have to introduce a MEASURES clause to the MATCH_RECOGNIZE, as below:
select id,
max(heatwave_length) "longest heatwave",
count(distinct heatwave_number) "# of heatwaves"
from series_data
match_recognize ( partition by id
order by day
measures
FINAL COUNT(*) as heatwave_length,
MATCH_NUMBER() heatwave_number
all rows per match
after match skip past last row
pattern ( over_threshold{2,} )
define
over_threshold as value > 20 )
group by id
order by id;
Full example with data:
with series_data ( id, day, value ) as
( SELECT 100, date '2017-01-01', 23 from dual union all
SELECT 100, date '2017-01-02', 31 from dual union all
SELECT 100, date '2017-01-03', 32 from dual union all
SELECT 100, date '2017-01-04', 44 from dual union all
SELECT 100, date '2017-01-05', 16 from dual union all
SELECT 100, date '2017-01-06', 23 from dual union all
SELECT 100, date '2017-01-07', 16 from dual union all
SELECT 100, date '2017-01-08', 21 from dual union all
SELECT 100, date '2017-01-09', 22 from dual union all
SELECT 100, date '2017-01-10', 18 from dual union all
SELECT 200, date '2017-01-01', 23 from dual union all
SELECT 200, date '2017-01-02', 31 from dual union all
SELECT 200, date '2017-01-03', 32 from dual union all
SELECT 200, date '2017-01-04', 17 from dual union all
SELECT 200, date '2017-01-05', 16 from dual union all
SELECT 200, date '2017-01-06', 23 from dual union all
SELECT 200, date '2017-01-07', 16 from dual union all
SELECT 200, date '2017-01-08', 21 from dual union all
SELECT 200, date '2017-01-09', 22 from dual union all
SELECT 200, date '2017-01-10', 22 from dual union all
SELECT 200, date '2017-01-11', 6 from dual union all
SELECT 200, date '2017-01-12', 22 from dual union all
SELECT 200, date '2017-01-13', 22 from dual )
select id,
max(heatwave_length) "longest heatwave",
count(distinct heatwave_number) "# of heatwaves"
from series_data
match_recognize ( partition by id
order by day
measures
FINAL COUNT(*) as heatwave_length,
MATCH_NUMBER() heatwave_number
all rows per match
after match skip past last row
pattern ( over_threshold{2,} )
define
over_threshold as value > 20 )
group by id
order by id;
Results:
ID longest heatwave # of heatwaves
----- -------------- --------------
100 4 2
200 3 3
I have the following table
+-----------+-------+-------+
| Date | Type | Value |
+-----------+-------+-------+
| 1/1/2013 | A | 1 |
| 1/2/2013 | A | 3 |
| 1/3/2013 | A | 5 |
| 1/4/2013 | A | 6 |
| 1/6/2013 | A | 8 |
| 1/7/2013 | A | 1 |
| 1/8/2013 | A | 2 |
+-----------+-------+-------+
I want to sum the value for the previous 3 dates for a certain day so i used this query.
ie: sel_date = 1/3/2013.
select type, sum(value)
from table_name
where date <= seldate
and date > seldate - 3
group by type
Now the problem is, I want to output a table with a given date range computing for the previous 3 days for each date.
ie: sel_date range 1/3/2013 - 1/8/2013
+-----------+-------+------------+
| Date | Type | Sum(Value) |
+-----------+-------+------------+
| 1/3/2013 | A | 9 | // 5 + 3 + 1
| 1/4/2013 | A | 14 | // 6 + 5 + 3
| 1/5/2013 | A | 11 | // 0 + 6 + 5
| 1/6/2013 | A | 14 | // 8 + 0 + 6
| 1/7/2013 | A | 9 | // 1 + 8 + 0
| 1/8/2013 | A | 11 | // 2 + 1 + 8
+-----------+-------+------------+
Is there a way to do this in a single query. I tried reading on partitioning but it is leading me no where.
Use range between in windowing clause:
select dt, type, value,
sum(value) over (order by dt range between 2 preceding and current row) as sv
from t
Test data and output:
create table t (dt date, type varchar2(1), value number(5));
insert into t values (date '2013-01-01', 'A', 1);
insert into t values (date '2013-01-02', 'A', 3);
insert into t values (date '2013-01-03', 'A', 5);
insert into t values (date '2013-01-04', 'A', 6);
insert into t values (date '2013-01-05', 'A', 8);
insert into t values (date '2013-01-06', 'A', 1);
insert into t values (date '2013-01-07', 'A', 2);
insert into t values (date '2013-01-12', 'A', 2);
DT TYPE VALUE SV
----------- ---- ------ ----------
2013-01-01 A 1 1
2013-01-02 A 3 4
2013-01-03 A 5 9
2013-01-04 A 6 14
2013-01-05 A 8 19
2013-01-06 A 1 15
2013-01-07 A 2 11
2013-01-12 A 2 2
You can try with something like this:
with test(Date_, Type, Value ) as
(
select to_date('01/01/2013', 'mm/dd/yyyy'), 'A', 1 from dual union all
select to_date('01/02/2013', 'mm/dd/yyyy'), 'A', 3 from dual union all
select to_date('01/03/2013', 'mm/dd/yyyy'), 'A', 5 from dual union all
select to_date('01/04/2013', 'mm/dd/yyyy'), 'A', 6 from dual union all
select to_date('01/05/2013', 'mm/dd/yyyy'), 'A', 8 from dual union all
select to_date('01/06/2013', 'mm/dd/yyyy'), 'A', 1 from dual union all
select to_date('01/07/2013', 'mm/dd/yyyy'), 'A', 2 from dual
)
select *
from (
select date_, type,
value + nvl(lag(value, 1) over (partition by type order by date_), 0)
+ nvl(lag(value, 2) over (partition by type order by date_), 0) as value
from test
)
where date_ between to_date('01/03/2013', 'mm/dd/yyyy') and to_date('01/07/2013', 'mm/dd/yyyy')
This sums, for each row, the values of the two preceding ones, based on date; the external query is simply used to apply the filter, given that applying it in the internal query would lead to a wrong sum.
The LAG is used to read values from the rows that precede the current row by 1 or 2 positions.
You can use this:
select date1 ,type,
(select sum(t1.value) sumvalue from table_name t1 where t1.date1 between (t2.date1 - 2) and t2.date1 )
from table_name t2
where date1 between startDate and endDate
select t.date, sum(t.value) OVER(ORDER BY t.date ROWS BETWEEN 2 PRECEDING AND 0 FOLLOWING) as Pre_3row_sum
from table_name t