I have a data set with overlapping dates in a slowly changing dimension table. I am trying to come up with a query to find out the persons with overlapping date ranges based on few conditions.
for example
PERSON_ID,RELATION_ID,RELATION_NAME,START_DT, END_DT, ACTIVE_IND,LATEST_ROW
STEVE -1 NONE 7/26/2018 10/24/2018 Y N
STEVE 111 STUDENT 8/8/2018 10/22/2018 Y N
STEVE 111 STUDENT 10/23/2018 12/31/4712 Y Y
STEVE -1 NONE 10/25/2018 2/7/2019 Y N
STEVE 222 Manager 8/9/2018 12/31/4712 Y Y
BOB 333 ASSOCIATE 8/9/2018 12/31/4712 Y Y
BOB 666 TEACHER 8/10/2018 12/31/4712 Y Y
Dave 555 Director 8/9/2018 12/31/4712 Y Y
I want to find the person_ids with overlapping date ranges only when atleast one of the relationship_id is -1. for example, BOB is both associate and teacher for overlapping periods. its ok to have 2 roles for a person. However, i want to find out all persons with overlapping dates which involves "None" relationship and some other role. Example: steve.
Could you please give me some suggestions as to how to come up with a query
Took a shot using the pattern recognition on it, and it shows all rows that overlap one another. In this case one row overlapped twice in RELATION_IDs = -1, so it displays the same row on both overlapping situations, and the query marks it as overlap_num 1 and 2.
with t (PERSON_ID,RELATION_ID,RELATION_NAME,START_DT, END_DT, ACTIVE_IND,LATEST_ROW) as (
select 'STEVE', -1, 'NONE', to_date('7/26/2018', 'MM/DD/YYYY'), to_date('10/24/2018', 'MM/DD/YYYY'), 'Y', 'N' from dual union all
select 'STEVE', 111, 'STUDENT', to_date('8/8/2018', 'MM/DD/YYYY'), to_date('10/22/2018', 'MM/DD/YYYY'), 'Y', 'N' from dual union all
select 'STEVE', 111, 'STUDENT', to_date('10/23/2018', 'MM/DD/YYYY'), to_date('12/31/4712', 'MM/DD/YYYY'), 'Y', 'Y' from dual union all
select 'STEVE', -1, 'NONE', to_date('10/25/2018', 'MM/DD/YYYY'), to_date('2/7/2019', 'MM/DD/YYYY'), 'Y', 'N' from dual union all
select 'STEVE', 222, 'Manager', to_date('8/9/2018', 'MM/DD/YYYY'), to_date('12/31/4712', 'MM/DD/YYYY'), 'Y', 'Y' from dual union all
select 'BOB', 333, 'ASSOCIATE', to_date('8/9/2018', 'MM/DD/YYYY'), to_date('12/31/4712', 'MM/DD/YYYY'), 'Y', 'Y' from dual union all
select 'BOB', 666, 'TEACHER', to_date('8/10/2018', 'MM/DD/YYYY'), to_date('12/31/4712', 'MM/DD/YYYY'), 'Y', 'Y' from dual union all
select 'Dave', 555, 'Director', to_date('8/9/2018', 'MM/DD/YYYY'), to_date('12/31/4712', 'MM/DD/YYYY'), 'Y', 'Y' from dual)
select person_id, var_match, overlap_num, relation_id, start_dt, end_dt, start_dt_overlap, end_dt_overlap
from t
match_recognize (
partition by person_id order by start_dt
measures
classifier() as var_match,
match_number() as overlap_num,
last(start_dt) as start_dt_overlap,
first(end_dt) as end_dt_overlap
all rows per match
after match skip to last ovlap
pattern (strt ovlap+)
define ovlap as (strt.relation_id = -1 and start_dt <= strt.end_dt)
or (ovlap.relation_id = -1 and strt.start_dt <= prev(strt.end_dt))
)
order by person_id, overlap_num, start_dt;
Sample execution:
FSITJA#db01 2019-07-11 14:55:17> with t (PERSON_ID,RELATION_ID,RELATION_NAME,START_DT, END_DT, ACTIVE_IND,LATEST_ROW) as (
2 select 'STEVE', -1, 'NONE', to_date('7/26/2018', 'MM/DD/YYYY'), to_date('10/24/2018', 'MM/DD/YYYY'), 'Y', 'N' from dual union all
3 select 'STEVE', 111, 'STUDENT', to_date('8/8/2018', 'MM/DD/YYYY'), to_date('10/22/2018', 'MM/DD/YYYY'), 'Y', 'N' from dual union all
4 select 'STEVE', 111, 'STUDENT', to_date('10/23/2018', 'MM/DD/YYYY'), to_date('12/31/4712', 'MM/DD/YYYY'), 'Y', 'Y' from dual union all
5 select 'STEVE', -1, 'NONE', to_date('10/25/2018', 'MM/DD/YYYY'), to_date('2/7/2019', 'MM/DD/YYYY'), 'Y', 'N' from dual union all
6 select 'STEVE', 222, 'Manager', to_date('8/9/2018', 'MM/DD/YYYY'), to_date('12/31/4712', 'MM/DD/YYYY'), 'Y', 'Y' from dual union all
7 select 'BOB', 333, 'ASSOCIATE', to_date('8/9/2018', 'MM/DD/YYYY'), to_date('12/31/4712', 'MM/DD/YYYY'), 'Y', 'Y' from dual union all
8 select 'BOB', 666, 'TEACHER', to_date('8/10/2018', 'MM/DD/YYYY'), to_date('12/31/4712', 'MM/DD/YYYY'), 'Y', 'Y' from dual union all
9 select 'Dave', 555, 'Director', to_date('8/9/2018', 'MM/DD/YYYY'), to_date('12/31/4712', 'MM/DD/YYYY'), 'Y', 'Y' from dual)
10 select person_id, var_match, overlap_num, relation_id, start_dt, end_dt, start_dt_overlap, end_dt_overlap
11 from t
12 match_recognize (
13 partition by person_id order by start_dt
14 measures
15 classifier() as var_match,
16 match_number() as overlap_num,
17 last(start_dt) as start_dt_overlap,
18 first(end_dt) as end_dt_overlap
19 all rows per match
20 after match skip to last ovlap
21 pattern (strt ovlap+)
22 define ovlap as (strt.relation_id = -1 and start_dt <= strt.end_dt)
23 or (ovlap.relation_id = -1 and strt.start_dt <= prev(strt.end_dt))
24 )
25 order by person_id, overlap_num, start_dt;
PERSO VAR_MATCH OVERLAP_NUM RELATION_ID START_DT END_DT START_DT_OVERLAP END_DT_OVERLAP
----- ---------- ----------- ----------- ------------------- ------------------- ------------------- -------------------
STEVE STRT 1 -1 2018-07-26 00:00:00 2018-10-24 00:00:00 2018-07-26 00:00:00 2018-10-24 00:00:00
STEVE OVLAP 1 111 2018-08-08 00:00:00 2018-10-22 00:00:00 2018-08-08 00:00:00 2018-10-24 00:00:00
STEVE OVLAP 1 222 2018-08-09 00:00:00 4712-12-31 00:00:00 2018-08-09 00:00:00 2018-10-24 00:00:00
STEVE OVLAP 1 111 2018-10-23 00:00:00 4712-12-31 00:00:00 2018-10-23 00:00:00 2018-10-24 00:00:00
STEVE STRT 2 111 2018-10-23 00:00:00 4712-12-31 00:00:00 2018-10-23 00:00:00 4712-12-31 00:00:00
STEVE OVLAP 2 -1 2018-10-25 00:00:00 2019-02-07 00:00:00 2018-10-25 00:00:00 4712-12-31 00:00:00
6 rows selected.
Oracle Setup:
CREATE TABLE table_name ( PERSON_ID, RELATION_ID, RELATION_NAME, START_DT, END_DT, ACTIVE_IND,LATEST_ROW ) AS
SELECT 'STEVE', -1, 'NONE', DATE '2018-07-26', DATE '2018-10-24', 'Y', 'N' FROM DUAL UNION ALL
SELECT 'STEVE', 111, 'STUDENT', DATE '2018-08-08', DATE '2018-10-22', 'Y', 'N' FROM DUAL UNION ALL
SELECT 'STEVE', 111, 'STUDENT', DATE '2018-10-23', DATE '4712-12-31', 'Y', 'Y' FROM DUAL UNION ALL
SELECT 'STEVE', -1, 'NONE', DATE '2018-10-25', DATE '2019-02-07', 'Y', 'N' FROM DUAL UNION ALL
SELECT 'STEVE', 222, 'Manager', DATE '2018-08-09', DATE '4712-12-31', 'Y', 'Y' FROM DUAL UNION ALL
SELECT 'BOB', 333, 'ASSOCIATE', DATE '2018-08-09', DATE '4712-12-31', 'Y', 'Y' FROM DUAL UNION ALL
SELECT 'BOB', 666, 'TEACHER', DATE '2018-08-10', DATE '4712-12-31', 'Y', 'Y' FROM DUAL UNION ALL
SELECT 'Dave', 555, 'Director', DATE '2018-08-09', DATE '4712-12-31', 'Y', 'Y' FROM DUAL;
Query:
SELECT DISTINCT
PERSON_ID
FROM table_name t
WHERE RELATION_ID = -1
AND EXISTS (
SELECT 1
FROM table_name o
WHERE t.person_id = o.person_id
AND t.end_dt > o.start_dt
AND t.start_dt < o.end_dt
AND o.relation_id <> -1
)
Output:
| PERSON_ID |
| :-------- |
| STEVE |
db<>fiddle here
Related
I have data like this my table
2020-01-01 H
2020-01-02 B
2020-01-03 B
2020-01-04 B
.
2020-01-29 B
2020-01-30 H
2020-01-31 H
2020-01-02 H
2020-02-02 H
2020-02-03 B
2020-02-04 B
2020-02-05 B
.
now my problem is in the current month i need to check third business day i.e in this case 2020-02-05 i need to get last business day of last month. i.e.2020-01-29
By adding 2 columns:
row_number() over(partition by trunc(date_value,'MM'), day_type order by date_value) as rn_month_asc,
row_number() over(partition by trunc(date_value,'MM'), day_type order by date_value desc) as rn_month_desc
in a month the 3rd business day will have rn_month_asc=3 and day_type ='B' and the latest business day will have rn_month_desc=1 and day_type ='B', and easy to query other situations if you need to.
in the current month I need to check third business day
From Oracle 12, you can use:
SELECT date_value
FROM table_name
WHERE TRUNC(SYSDATE, 'MM') <= date_value
AND date_value < ADD_MONTHS(TRUNC(SYSDATE, 'MM'), 1)
AND day_type = 'B'
ORDER BY date_value ASC
OFFSET 2 ROWS
FETCH NEXT ROW ONLY;
Which, for the sample data:
CREATE TABLE table_name (date_value, day_type) AS
SELECT DATE '2020-01-01', 'H' FROM DUAL UNION ALL
SELECT DATE '2020-01-02', 'B' FROM DUAL UNION ALL
SELECT DATE '2020-01-03', 'B' FROM DUAL UNION ALL
SELECT DATE '2020-01-04', 'B' FROM DUAL UNION ALL
SELECT DATE '2020-01-05', 'B' FROM DUAL UNION ALL
SELECT DATE '2020-01-28', 'B' FROM DUAL UNION ALL
SELECT DATE '2020-01-29', 'B' FROM DUAL UNION ALL
SELECT DATE '2020-01-30', 'H' FROM DUAL UNION ALL
SELECT DATE '2020-01-31', 'H' FROM DUAL UNION ALL
SELECT DATE '2020-01-02', 'H' FROM DUAL UNION ALL
SELECT DATE '2020-02-02', 'H' FROM DUAL UNION ALL
SELECT DATE '2020-02-03', 'B' FROM DUAL UNION ALL
SELECT DATE '2020-02-04', 'B' FROM DUAL UNION ALL
SELECT DATE '2020-02-05', 'B' FROM DUAL;
If the current month was 2020-01 then the output is:
DATE_VALUE
04-JAN-20
I need to get last business day of last month
SELECT date_value
FROM table_name
WHERE ADD_MONTHS(TRUNC(SYSDATE, 'MM'), -1) <= date_value
AND date_value < TRUNC(SYSDATE, 'MM')
AND day_type = 'B'
ORDER BY date_value DESC
FETCH FIRST ROW ONLY;
If the current month is 2020-02 then the output is:
DATE_VALUE
29-JAN-20
fiddle
have a query and test CASE that shows the number of full-time and part-time students. A full-time student is enrolled in at least 4 courses. A part-time student is enrolled in at least 1 course, but no more than 3.
Although the query appears to work it seems a bit verbose. I was wondering if there is a more succinct way to rewrite the query. In addition, I can would like to display the students first/last names with each row that meets the criteria
Perhaps with something like this?
, LISTAGG(
NVL2(s.student_id, s.last_name || ', ' || s.first_name, NULL),
'; '
) WITHIN GROUP (ORDER BY s.last_name, s.first_name) AS students
Below are my tables, data and query I would like to shorten if possible. Thanks to all who answer and for your expertise.
CREATE TABLE students(student_id, first_name, last_name) AS
SELECT 1, 'Faith', 'Aaron' FROM dual UNION ALL
SELECT 2, 'Lisa', 'Saladino' FROM dual UNION ALL
SELECT 3, 'Leslee', 'Altman' FROM dual UNION ALL
SELECT 4, 'Patty', 'Kern' FROM dual UNION ALL
SELECT 5, 'Beth', 'Cooper' FROM dual UNION ALL
SELECT 95, 'Zak', 'Despart' FROM dual UNION ALL
SELECT 96, 'Owen', 'Balbert' FROM dual UNION ALL
SELECT 97, 'Jack', 'Aprile' FROM dual UNION ALL
SELECT 98, 'Nicole', 'Kramer' FROM dual UNION ALL
SELECT 99, 'Jill', 'Coralnick' FROM dual;
CREATE TABLE student_courses (student_id,course_id) AS
SELECT 1, 1 FROM dual UNION ALL
SELECT 2, 1 FROM dual UNION ALL
SELECT 3, 1 FROM dual UNION ALL
SELECT 4, 1 FROM dual UNION ALL
SELECT 5, 1 FROM dual UNION ALL
SELECT 1, 2 FROM dual UNION ALL
SELECT 2, 2 FROM dual UNION ALL
SELECT 3, 2 FROM dual UNION ALL
SELECT 4, 2 FROM dual UNION ALL
SELECT 5, 2 FROM dual UNION ALL
SELECT 1, 3 FROM dual UNION ALL
SELECT 2, 3 FROM dual UNION ALL
SELECT 3, 3 FROM dual UNION ALL
SELECT 4, 3 FROM dual UNION ALL
SELECT 5, 3 FROM dual UNION ALL
SELECT 97, 1 FROM dual UNION ALL
SELECT 97, 3 FROM dual UNION ALL
SELECT 97, 5 FROM dual UNION ALL
SELECT 97, 6 FROM dual UNION ALL
SELECT 98, 3 FROM dual UNION ALL
SELECT 98, 4 FROM dual UNION ALL
SELECT 98, 5 FROM dual UNION ALL
SELECT 99, 2 FROM dual UNION ALL
SELECT 99, 4 FROM dual UNION ALL
SELECT 99, 5 FROM dual UNION ALL
SELECT 99, 6 FROM dual;
WITH enrolled_student_course_counts AS (
SELECT
s.student_id
, s.first_name
, s.last_name
, COUNT(sc.course_id) AS course_count
FROM students s
LEFT JOIN student_courses sc
ON s.student_id = sc.student_id
GROUP BY
s.student_id
, s.first_name
, s.last_name
HAVING COUNT(sc.course_id) > 0
)
, student_enrollment_statuses AS (
SELECT
student_id
, first_name
, last_name
, CASE WHEN course_count >= 4 THEN 'full-time'
WHEN course_count BETWEEN 1 AND 3 THEN 'part-time'
END AS student_enrollment_status
FROM enrolled_student_course_counts
)
SELECT
UPPER(student_enrollment_status) AS student_enrollment_status
, COUNT(student_enrollment_status) AS student_enrollment_status_count
FROM student_enrollment_statuses
GROUP BY student_enrollment_status;
As you only need the numbers (and not any other data), shorten the query so that it searches only the student_courses table:
SQL> with temp as
2 (select student_id,
3 count(course_id) cnt
4 from student_courses
5 group by student_id
6 )
7 select
8 sum(case when cnt < 4 then 1 else 0 end) part_time,
9 sum(case when cnt >= 4 then 1 else 0 end) full_time
10 from temp;
PART_TIME FULL_TIME
---------- ----------
6 2
SQL>
I have sample data like this
CREATE TABLE table_name (aktif, "START", "END", NO_BOX, QTY) AS
SELECT 1, 'A0001', 'A0020', 2016, 100 FROM DUAL UNION ALL
SELECT 1, 'A0021', 'A0040', 2016, 100 FROM DUAL UNION ALL
SELECT 1, 'A0041', 'A0060', 2016, 100 FROM DUAL UNION ALL
SELECT 0, 'A0061', 'A0080', NULL, 100 FROM DUAL UNION ALL
SELECT 0, 'A0081', 'A0100', NULL, 100 FROM DUAL UNION ALL
SELECT 1, 'A0101', 'A0120', 2016, 100 FROM DUAL UNION ALL
SELECT 1, 'A0121', 'A0140', 2016, 100 FROM DUAL UNION ALL
SELECT 1, 'A0141', 'A0160', 2016, 100 FROM DUAL UNION ALL
SELECT 0, 'A0161', 'A0180', NULL, 100 FROM DUAL UNION ALL
SELECT 0, 'A0181', 'A0200', NULL, 100 FROM DUAL;
I want to group by the columns where AKTIF and NO_BOX remain the same based on the order of the rows and then select SUM(QTY), MIN(START), MAX(END).
The output should be:
AKTIF
START
END
NO_BOX
QTY
1
A0001
A0060
2016
300
0
A0061
A0100
NULL
200
1
A0101
A0160
2016
300
0
A0161
A0200
NULL
200
At the end, that's gaps and islands problem.
Sample data:
SQL> with test (aktif, cstart, end, no_box, qty) as
2 (select 1, 'A0001', 'A0020', 2016, 100 from dual union all
3 select 1, 'A0021', 'A0040', 2016, 100 from dual union all
4 select 1, 'A0041', 'A0060', 2016, 100 from dual union all
5 --
6 select 0, 'A0061', 'A0080', null, 100 from dual union all
7 select 0, 'A0081', 'A0100', null, 100 from dual union all
8 --
9 select 1, 'A0101', 'A0120', 2016, 100 from dual union all
10 select 1, 'A0121', 'A0140', 2016, 100 from dual union all
11 select 1, 'A0141', 'A0160', 2016, 100 from dual union all
12 --
13 select 0, 'A0161', 'A0180', null, 100 from dual union all
14 select 0, 'A0181', 'A0200', null, 100 from dual
15 ),
Query begins here:
16 temp as
17 (select t.*,
18 row_number() over (order by cstart) -
19 row_Number() over (partition by aktif order by cstart) grp
20 from test t
21 )
22 select aktif,
23 min(cstart) cstart,
24 max(end) end,
25 no_box,
26 sum(qty) qty
27 from temp
28 group by aktif, no_box, grp
29 order by cstart;
AKTIF CSTAR END NO_BOX QTY
---------- ----- ----- ---------- ----------
1 A0001 A0060 2016 300
0 A0061 A0100 200
1 A0101 A0160 2016 300
0 A0161 A0200 200
SQL>
I have a dataset that looks something like:
asset_id,date_logged
1234,2018-02-01
1234,2018-02-02
1234,2018-02-03
1234,2018-02-04
1234,2018-02-05
1234,2018-02-06
1234,2018-02-07
1234,2018-02-08
1234,2018-02-09
1234,2018-02-10
9876,2018-02-01
9876,2018-02-02
9876,2018-02-03
9876,2018-02-07
9876,2018-02-08
9876,2018-02-09
9876,2018-02-10
For the purpose of this exercise, imagine today's date is 2018-02-10 (10 Feb 2018). For all the asset_ids in the table, I am trying to identify the start of the most recent unbroken streak for date_logged.
For asset_id = 1234, this would be 2018-02-01. The asset_id was logged all 10 days in an unbroken streak. For asset_id = 9876, this would be 2018-02-07. Because the asset_id was not logged on 2018-02-04, 2018-02-05, and 2018-02-06, the most recent unbroken streak starts on 2018-02-07.
So, my result set would hopefully look something like:
asset_id,Number_of_days_in_most_recent_logging_streak
1234,10
9876,4
Or, alternatively:
asset_id,Date_Begin_Most_Recent_Streak
1234,2018-02-01
9876,2018-02-07
I haven't been able to work out anything that gets me close -- my best effort so far is to get the number of days since the first log date and today, and the number of days the asset_id appears in the dataset, and compare these to identify situations where the streak is more recent than the first day they appear. For my real dataset this isn't particularly problematic, but it's an ugly solution and I would like to understand a better way of getting to the outcome.
Perhaps something like this. Break the query after each inline view in the WITH clause and SELECT * FROM the most recent inline view, to see what each step does.
with
inputs ( asset_id, date_logged ) as (
select 1234, to_date('2018-02-01', 'yyyy-mm-dd') from dual union all
select 1234, to_date('2018-02-02', 'yyyy-mm-dd') from dual union all
select 1234, to_date('2018-02-03', 'yyyy-mm-dd') from dual union all
select 1234, to_date('2018-02-04', 'yyyy-mm-dd') from dual union all
select 1234, to_date('2018-02-05', 'yyyy-mm-dd') from dual union all
select 1234, to_date('2018-02-06', 'yyyy-mm-dd') from dual union all
select 1234, to_date('2018-02-07', 'yyyy-mm-dd') from dual union all
select 1234, to_date('2018-02-08', 'yyyy-mm-dd') from dual union all
select 1234, to_date('2018-02-09', 'yyyy-mm-dd') from dual union all
select 1234, to_date('2018-02-10', 'yyyy-mm-dd') from dual union all
select 9876, to_date('2018-02-01', 'yyyy-mm-dd') from dual union all
select 9876, to_date('2018-02-02', 'yyyy-mm-dd') from dual union all
select 9876, to_date('2018-02-03', 'yyyy-mm-dd') from dual union all
select 9876, to_date('2018-02-07', 'yyyy-mm-dd') from dual union all
select 9876, to_date('2018-02-08', 'yyyy-mm-dd') from dual union all
select 9876, to_date('2018-02-09', 'yyyy-mm-dd') from dual union all
select 9876, to_date('2018-02-10', 'yyyy-mm-dd') from dual
),
prep ( asset_id, date_logged, grp ) as (
select asset_id, date_logged,
date_logged - row_number()
over (partition by asset_id order by date_logged)
from inputs
),
agg ( asset_id, date_logged, cnt ) as (
select asset_id, min(date_logged), count(*)
from prep
group by asset_id, grp
)
select asset_id, max(date_logged) as date_start_recent_streak,
max(cnt) keep (dense_rank last order by date_logged) as cnt
from agg
group by asset_id
order by asset_id -- If needed
;
ASSET_ID DATE_START_RECENT_STREAK CNT
---------- ------------------------ ----------
1234 2018-02-01 10
9876 2018-02-07 4
you can try this,
with test (asset_id, date_logged) as
(select 1234, date '2018-02-01' from dual union all
select 1234, date '2018-02-02' from dual union all
select 1234, date '2018-02-03' from dual union all
select 1234, date '2018-02-04' from dual union all
select 1234, date '2018-02-05' from dual union all
select 1234, date '2018-02-06' from dual union all
select 1234, date '2018-02-07' from dual union all
select 1234, date '2018-02-08' from dual union all
select 1234, date '2018-02-09' from dual union all
select 1234, date '2018-02-10' from dual union all
select 9876, date '2018-02-01' from dual union all
select 9876, date '2018-02-02' from dual union all
select 9876, date '2018-02-03' from dual union all
select 9876, date '2018-02-07' from dual union all
select 9876, date '2018-02-08' from dual union all
select 9876, date '2018-02-09' from dual union all
select 9876, date '2018-02-10' from dual union all
select 9876, date '2018-02-11' from dual union all
select 9876, date '2018-02-12' from dual
)
SELECT asset_id, MIN(date_logged), COUNT(1)
FROM (SELECT asset_id, date_logged,
MAX(date_logged) OVER (PARTITION BY asset_id)+1 max_date_logged_plus_one,
DENSE_RANK() OVER (PARTITION BY asset_id ORDER BY date_logged desc) rown
FROM test
ORDER BY asset_id, date_logged desc)
WHERE max_date_logged_plus_one - date_logged = rown
GROUP BY asset_id;
ASSET_ID MIN(DATE_LOGGED) COUNT(1)
---------- ---------------- ----------
1234 01-FEB-18 10
9876 07-FEB-18 6
if below data is commented, output is
select 9876, date '2018-02-10' from dual union all
ASSET_ID MIN(DATE_LOGGED) COUNT(1)
---------- ---------------- ----------
1234 01-FEB-18 10
9876 11-FEB-18 2
Would this make any sense?
SQL> with test (asset_id, date_logged) as
2 (select 1234, date '2018-02-01' from dual union all
3 select 1234, date '2018-02-02' from dual union all
4 select 1234, date '2018-02-03' from dual union all
5 select 1234, date '2018-02-04' from dual union all
6 select 1234, date '2018-02-05' from dual union all
7 select 1234, date '2018-02-06' from dual union all
8 select 1234, date '2018-02-07' from dual union all
9 select 1234, date '2018-02-08' from dual union all
10 select 1234, date '2018-02-09' from dual union all
11 select 1234, date '2018-02-10' from dual union all
12 select 9876, date '2018-02-01' from dual union all
13 select 9876, date '2018-02-02' from dual union all
14 select 9876, date '2018-02-03' from dual union all
15 select 9876, date '2018-02-07' from dual union all
16 select 9876, date '2018-02-08' from dual union all
17 select 9876, date '2018-02-09' from dual union all
18 select 9876, date '2018-02-10' from dual
19 ),
20 inter as
21 -- difference between DATE_LOGGED and its previous DATE_LOGGED
22 (select asset_id,
23 date_logged,
24 date_logged - lag(date_logged) over (partition by asset_id order by date_logged) diff
25 from test
26 )
27 select i.asset_id, min(i.date_logged) date_logged
28 from inter i
29 where nvl(i.diff, 1) = (select max(i1.diff) from inter i1
30 where i1.asset_id = i.asset_id
31 )
32 group by i.asset_id
33 order by i.asset_id;
ASSET_ID DATE_LOGGE
---------- ----------
1234 2018-02-01
9876 2018-02-07
SQL>
This question is very much like my previous question, but a bit more complicated. Rob van Wijk's answer worked perfectly for my other question, and I've been using that as a starting point. My problem now is that I am pivoting dates for different fields. Whereas before I cared about getting all open_in and open_out values for a given id, now I want new_in, new_out, open_in, open_out, fixed_in, and fixed_out for each id. I have the following:
SELECT id,
state,
state_time,
MAX(new_row_num) OVER (PARTITION BY id ORDER BY state_time) AS new_row_group,
MAX(open_row_num) OVER (PARTITION BY id ORDER BY state_time) AS open_row_group,
MAX(fixed_row_num) OVER (PARTITION BY id ORDER BY state_time) AS fixed_row_group
FROM (
SELECT id,
state,
state_time,
CASE state
WHEN 'New'
THEN ROW_NUMBER() OVER (PARTITION BY id ORDER BY state_time)
END AS new_row_num,
CASE state
WHEN 'Open'
THEN ROW_NUMBER() OVER (PARTITION BY id ORDER BY state_time)
END AS open_row_num,
CASE state
WHEN 'Fixed'
THEN ROW_NUMBER() OVER (PARTITION BY id ORDER BY state_time)
END AS fixed_row_num
FROM ...
)
This gives me data like the following:
id state state_time new_row_group open_row_group fixed_row_group
1 New 2009-03-03 00:03:31 1
1 Closed 2009-03-04 04:15:27 1
2 New 2010-05-22 14:38:49 1
2 Open 2010-05-22 14:39:14 1 2
2 Fixed 2010-05-22 17:15:27 1 2 3
I would like data like the following:
id new_in new_out open_in open_out fixed_in fixed_out
1 2009-03-03 00:03:31 2009-03-04 04:15:27
2 2010-05-22 14:38:49 2010-05-22 14:39:14 2010-05-22 14:39:14 2010-05-22 17:15:27 2010-05-22 17:15:27
How can I pivot the data to get this date-pairing for each id?
Edit: to clarify, an id can enter and leave a state multiple times. For example, an id might go from New to Open to Fixed to Open to Fixed to Closed. In that case, there would need to be as many rows as is necessary to hold all the state times, e.g.:
id new_in new_out open_in open_out fixed_in fixed_out
4 2009-01-01 00:00:00 2009-01-02 00:00:00 2009-01-02 00:00:00 2009-01-03 00:00:00 2009-01-03 00:00:00 2009-01-04 00:00:00
4 2009-01-04 00:00:00 2009-01-05 00:00:00 2009-01-05 00:00:00 2009-01-06 00:00:00
Sarah,
Here is an example with your sample data:
SQL> create table yourtable (id,state,state_time)
2 as
3 select 1, 'New', to_date('2009-03-03 00:03:31','yyyy-mm-dd hh24:mi:ss') from dual union all
4 select 1, 'Closed', to_date('2009-03-04 04:15:27','yyyy-mm-dd hh24:mi:ss') from dual union all
5 select 2, 'New', to_date('2010-05-22 14:38:49','yyyy-mm-dd hh24:mi:ss') from dual union all
6 select 2, 'Open', to_date('2010-05-22 14:39:14','yyyy-mm-dd hh24:mi:ss') from dual union all
7 select 2, 'Fixed', to_date('2010-05-22 17:15:27','yyyy-mm-dd hh24:mi:ss') from dual union all
8 select 3, 'New', date '2009-01-01' from dual union all
9 select 3, 'Open', date '2009-01-02' from dual union all
10 select 3, 'Fixed', date '2009-01-03' from dual union all
11 select 3, 'Open', date '2009-01-04' from dual union all
12 select 3, 'Fixed', date '2009-01-05' from dual union all
13 select 3, 'Closed', date '2009-01-06' from dual
14 /
Table created.
The query:
SQL> select id
2 , max(decode(state,'New',state_time)) new_in
3 , max(decode(state,'New',out_time)) new_out
4 , max(decode(state,'Open',state_time)) open_in
5 , max(decode(state,'Open',out_time)) open_out
6 , max(decode(state,'Fixed',state_time)) fixed_in
7 , max(decode(state,'Fixed',out_time)) fixed_out
8 from ( select id
9 , state
10 , state_time
11 , max(cnt) over (partition by id order by state_time) the_row
12 , lead(state_time) over (partition by id order by state_time) out_time
13 from ( select id
14 , state
15 , state_time
16 , count(*) over (partition by id,state order by state_time) cnt
17 from yourtable
18 )
19 )
20 group by id
21 , the_row
22 order by id
23 , the_row
24 /
ID NEW_IN NEW_OUT OPEN_IN OPEN_OUT FIXED_IN FIXED_OUT
---------- ------------------- ------------------- ------------------- ------------------- ------------------- -------------------
1 03-03-2009 00:03:31 04-03-2009 04:15:27
2 22-05-2010 14:38:49 22-05-2010 14:39:14 22-05-2010 14:39:14 22-05-2010 17:15:27 22-05-2010 17:15:27
3 01-01-2009 00:00:00 02-01-2009 00:00:00 02-01-2009 00:00:00 03-01-2009 00:00:00 03-01-2009 00:00:00 04-01-2009 00:00:00
3 04-01-2009 00:00:00 05-01-2009 00:00:00 05-01-2009 00:00:00 06-01-2009 00:00:00
4 rows selected.
To understand how it works, execute the query from the inside out and check the intermediate result sets. Please let me know if you need some additional explanation.
Regards,
Rob.
I'm not sure how you'd prefer to handle the situation where the same state is repeated more than once for an ID. The following answer takes the easy route, assuming that you would want the first time the state was set and the last time the state was replaced.
select id,
min(case state when 'New' then state_time else null end) as new_in,
max(case state when 'New' then out_state_time else null end) as new_out,
min(case state when 'Open' then state_time else null end) as open_in,
max(case state when 'Open' then out_state_time else null end) as open_out,
min(case state when 'Fixed' then state_time else null end) as fixed_in,
max(case state when 'Fixed' then out_state_time else null end) as fixed_out
from
(select id,
state,
state_time,
lead(state_time) over (partition by id
order by state_time) as out_state_time
from ...
)
group by id
The lead analytic function gets the next row described by the partition/order statement, so that's the easiest way to find out when the state changed. The middle query is a basic pivot query (transforming columns to rows).
select news.id, news.state_time as new_in, min(not_news.state_time) as new_out
, min(opens.state_time) as open_in
, min(not_opens.state_time) as open_out
, min(closes.state_time) as close_in
, min(not_closed.state_time) as close_out
from
(SELECT id,
state,
state_time
from mytable
where state = 'New' ) news
left join
(SELECT id,
state,
state_time
from mytable
where state <> 'New' ) not_news on news.id = not_news.id and news.state_time <= not_news.state_time
left join
(SELECT id,
state,
state_time
from mytable
where state = 'Open' ) opens on news.id = opens.id and news.state_time <= opens.state_time
left join
(SELECT id,
state,
state_time
from mytable
where state not in ('New', 'Open' )) not_opens on news.id = opens.id and news.state_time <= opens.state_time and opens.state_time <= not_opens.state_time
left join
(SELECT id,
state,
state_time
from mytable
where state = 'Closed' ) closes on news.id = closes.id and news.state_time <= closes.state_time
left join
(SELECT id,
state,
state_time
from mytable
where state not in ('Closed' )) not_closed on news.id = not_closed.id and news.state_time <= closes.state_time and closes.state_time <= not_closed.state_time
group by news.id, news.state_time
order by id, news.state_time
My test data (borrowed from Rob):
create table mytable (id,state,state_time)
as
select 1, 'New', to_date('2009-03-03 00:03:31','yyyy-mm-dd hh24:mi:ss') from dual union all
select 1, 'Closed', to_date('2009-03-04 04:15:27','yyyy-mm-dd hh24:mi:ss') from dual union all
select 2, 'New', to_date('2010-05-22 14:38:49','yyyy-mm-dd hh24:mi:ss') from dual union all
select 2, 'Open', to_date('2010-05-22 14:39:14','yyyy-mm-dd hh24:mi:ss') from dual union all
select 2, 'Fixed', to_date('2010-05-22 17:15:27','yyyy-mm-dd hh24:mi:ss') from dual union all
select 3, 'New', date '2009-01-01' from dual union all
select 3, 'Open', date '2009-01-02' from dual union all
select 3, 'Fixed', date '2009-01-03' from dual union all
select 3, 'Open', date '2009-01-04' from dual union all
select 3, 'Fixed', date '2009-01-05' from dual union all
select 3, 'Closed', date '2009-01-06' from dual
query results:
ID NEW_IN NEW_OUT OPEN_IN OPEN_OUT CLOSE_IN CLOSE_OUT
1 3/3/2009 12:03:31 3/4/2009 4:15:27 3/4/2009 4:15:27
2 5/22/2010 2:38:49 5/22/2010 2:39:14 5/22/2010 2:39:14 5/22/2010 5:15:27
3 1/1/2009 1/2/2009 1/2/2009 1/3/2009 1/6/2009
I hope you can read the above, I'm having trouble formatting it.