oracle sql hierarchical queries - oracle

I have two tables:
CREATE TABLE CATEGORY (
cat_id NUMBER PRIMARY KEY,
cat_ust_id NUMBER REFERENCES Category( cat_id )
);
CREATE TABLE PRODUCT (
cat_1 NUMBER REFERENCES Category( cat_id ),
cat_2 NUMBER REFERENCES Category( cat_id ),
cat_3 NUMBER REFERENCES Category( cat_id ),
cat_4 NUMBER REFERENCES Category( cat_id ),
cat_id NUMBER PRIMARY KEY
REFERENCES Category( cat_id )
);
INSERT INTO Category
SELECT 1, NULL FROM DUAL UNION ALL
SELECT 2, NULL FROM DUAL UNION ALL
SELECT 11, 1 FROM DUAL UNION ALL
SELECT 112, 11 FROM DUAL UNION ALL
SELECT 202, 24 FROM DUAL UNION ALL
SELECT 24, 2 FROM DUAL UNION ALL
SELECT 2035, 203 FROM DUAL UNION ALL
SELECT 203, 20 FROM DUAL UNION ALL
SELECT 20, 2 FROM DUAL;
INSERT INTO Product
SELECT 1, NULL, NULL, NULL, 11 FROM DUAL UNION ALL
SELECT 2, NULL, NULL, NULL, 202 FROM DUAL UNION ALL
SELECT 1, NULL, NULL, NULL, 112 FROM DUAL UNION ALL
SELECT 2, NULL, NULL, NULL, 2035 FROM DUAL;
In PRODUCT table, I have to update some column according to CATEGORY table hierarchy to get this result:
cat_1 | cat_2 | cat_3 | cat_4 | cat_id
---------------------------------------
1 | 11 | NULL | NULL | 11
2 | 24 | 202 | NULL | 202
1 | 11 | 112 | NULL | 112
2 | 24 | 203 | 2035 | 2035
Should I create a procedure or function for this?

Oracle Setup:
CREATE TABLE Category ( CAT_ID, CAT_UST_ID ) AS
SELECT 1, NULL FROM DUAL UNION ALL
SELECT 2, NULL FROM DUAL UNION ALL
SELECT 11, 1 FROM DUAL UNION ALL
SELECT 112, 11 FROM DUAL UNION ALL
SELECT 202, 24 FROM DUAL UNION ALL
SELECT 24, 2 FROM DUAL UNION ALL
SELECT 2035, 203 FROM DUAL UNION ALL
SELECT 203, 20 FROM DUAL UNION ALL
SELECT 20, 2 FROM DUAL;
CREATE TABLE Product ( Cat1, Cat2, Cat3, Cat4, Cat_ID ) AS
SELECT 1, CAST( NULL AS NUMBER ), CAST( NULL AS NUMBER ), CAST( NULL AS NUMBER ), 11 FROM DUAL UNION ALL
SELECT 2, NULL, NULL, NULL, 202 FROM DUAL UNION ALL
SELECT 1, NULL, NULL, NULL, 112 FROM DUAL UNION ALL
SELECT 2, NULL, NULL, NULL, 2035 FROM DUAL;
Update:
UPDATE Product p
SET ( cat1, cat2, cat3, cat4 ) = (
SELECT new_cat1,
new_cat2,
new_cat3,
new_cat4
FROM (
SELECT TO_NUMBER( REGEXP_SUBSTR( SYS_CONNECT_BY_PATH( CAT_ID, ',' ), '\d+', 1, 1 ) ) AS new_cat1,
TO_NUMBER( REGEXP_SUBSTR( SYS_CONNECT_BY_PATH( CAT_ID, ',' ), '\d+', 1, 2 ) ) AS new_cat2,
TO_NUMBER( REGEXP_SUBSTR( SYS_CONNECT_BY_PATH( CAT_ID, ',' ), '\d+', 1, 3 ) ) AS new_cat3,
TO_NUMBER( REGEXP_SUBSTR( SYS_CONNECT_BY_PATH( CAT_ID, ',' ), '\d+', 1, 4 ) ) AS new_cat4,
cat_id
FROM Category
START WITH CAT_UST_ID IS NULL
CONNECT BY CAT_UST_ID = PRIOR CAT_ID
) c
WHERE p.cat_id = c.cat_id
);
Results:
SELECT * FROM Product;
gives:
CAT1 CAT2 CAT3 CAT4 CAT_ID
---------- ---------- ---------- ---------- ----------
1 11 11
2 24 202 202
1 11 112 112
2 20 203 2035 2035

Related

Use listagg to group all students for teacher, course, semster

I have a relationship where a teacher -> courses -> enrollmenr-> students. I am trying to use listagg to get all the students first/last names on the same line as the teacher_id and course_id. In addition, I want to add the count of each teacher_id, course_id, semester.
Below is my test CASE, which has the tables, data and a part of the query. I would appreciate any help completing the query. Thanks in advance to all who answer.
CREATE TABLE teachers(teacher_id, first_name, last_name) AS
SELECT 101, 'Keith', 'Stein' FROM dual UNION ALL
SELECT 102, 'Roger', 'Wood' FROM dual UNION ALL
SELECT 103, 'Douglas', 'Kern' FROM dual UNION ALL
SELECT 104, 'Paul', 'Weber' FROM dual UNION ALL
SELECT 105, 'Jeffrey', 'Lebowitz' FROM dual UNION ALL
SELECT 106, 'Carol', 'Seltzer' FROM dual;
CREATE TABLE students(student_id, first_name, last_name) AS
SELECT 1, 'Faith', 'Aaron' FROM dual UNION ALL
SELECT 2, 'Lisa', 'Saladino' FROM dual UNION ALL
SELECT 3, 'Leslee', 'Altman' FROM dual UNION ALL
SELECT 4, 'Patty', 'Kern' FROM dual UNION ALL
SELECT 5, 'Beth', 'Cooper' FROM dual UNION ALL
SELECT 99, 'Jill', 'Coralnick' FROM dual;
CREATE TABLE courses(course_id, course_name, teacher_id, semester) AS
SELECT 1, 'Geometry', 101, '2022-2' FROM DUAL UNION ALL
SELECT 2, 'Trigonometry', 102, '2022-2' FROM DUAL UNION ALL
SELECT 3, 'Calculus', 103, '2022-2' FROM DUAL UNION ALL
SELECT 4, 'Chemistry', 104, '2022-2' FROM DUAL UNION ALL
SELECT 5, 'Biology', 105, '2022-2' FROM DUAL UNION ALL
SELECT 6, 'Physcology', 106, '2022-2' FROM DUAL;
CREATE TABLE enrollment(student_id,course_id) AS
SELECT 1, 1 FROM dual UNION ALL
SELECT 2, 1 FROM dual UNION ALL
SELECT 3, 1 FROM dual UNION ALL
SELECT 4, 1 FROM dual UNION ALL
SELECT 5, 1 FROM dual UNION ALL
SELECT 1, 2 FROM dual UNION ALL
SELECT 2, 2 FROM dual UNION ALL
SELECT 3, 2 FROM dual UNION ALL
SELECT 4, 2 FROM dual UNION ALL
SELECT 5, 2 FROM dual UNION ALL
SELECT 1, 3 FROM dual UNION ALL
SELECT 2, 3 FROM dual UNION ALL
SELECT 3, 3 FROM dual UNION ALL
SELECT 4, 3 FROM dual UNION ALL
SELECT 5, 3 FROM dual UNION ALL
SELECT 99, 3 FROM dual;
/* list all teachers, courses, student count, all students for teacher_id, course_id, semester
*/
SELECT
t.teacher_id
, t.first_name
, t.last_name
, c.course_id
, c.course_name
, c.semester
FROM teachers t
LEFT JOIN courses c
ON t.teacher_id = c.teacher_id
ORDER BY teacher_id;
TEACHER_ID FIRST_NAME LAST_NAME COURSE_ID COURSE_NAME SEMESTER
101 Keith Stein 1 Geometry 2022-2
102 Roger Wood 2 Trigonometry 2022-2
103 Douglas Kern 3 Calculus 2022-2
104 Paul Weber 4 Chemistry 2022-2
105 Jeffrey Lebowitz 5 Biology 2022-2
106 Carol Seltzer 6 Physcology 2022-2
You can use a correlated sub-query:
SELECT t.teacher_id
, t.first_name
, t.last_name
, c.course_id
, c.course_name
, c.semester
, (
SELECT LISTAGG(s.last_name || ', ' || s.first_name, '; ')
WITHIN GROUP (ORDER BY s.last_name, s.first_name)
FROM enrollment e
INNER JOIN students s
ON (e.student_id = s.student_id)
WHERE e.course_id = c.course_id
) AS students
FROM teachers t
LEFT JOIN courses c
ON t.teacher_id = c.teacher_id
ORDER BY
teacher_id;
Which, for the sample data, outputs:
TEACHER_ID
FIRST_NAME
LAST_NAME
COURSE_ID
COURSE_NAME
SEMESTER
STUDENTS
101
Keith
Stein
1
Geometry
2022-2
Aaron, Faith; Altman, Leslee; Cooper, Beth; Kern, Patty; Saladino, Lisa
102
Roger
Wood
2
Trigonometry
2022-2
Aaron, Faith; Altman, Leslee; Cooper, Beth; Kern, Patty; Saladino, Lisa
103
Douglas
Kern
3
Calculus
2022-2
Aaron, Faith; Altman, Leslee; Cooper, Beth; Coralnick, Jill; Kern, Patty; Saladino, Lisa
104
Paul
Weber
4
Chemistry
2022-2
null
105
Jeffrey
Lebowitz
5
Biology
2022-2
null
106
Carol
Seltzer
6
Physcology
2022-2
null
Or you can use JOINs and aggregate:
SELECT t.teacher_id
, MAX(t.first_name) AS first_name
, MAX(t.last_name) AS last_name
, c.course_id
, MAX(c.course_name) AS course_name
, MAX(c.semester) AS semester
, LISTAGG(
NVL2(s.student_id, s.last_name || ', ' || s.first_name, NULL),
'; '
) WITHIN GROUP (ORDER BY s.last_name, s.first_name) AS students
, COUNT(s.student_id) AS num_students
FROM teachers t
LEFT OUTER JOIN courses c
ON t.teacher_id = c.teacher_id
LEFT OUTER JOIN (
enrollment e
INNER JOIN students s
ON (e.student_id = s.student_id)
)
ON (e.course_id = c.course_id)
GROUP BY
t.teacher_id,
c.course_id
ORDER BY
t.teacher_id,
c.course_id;
db<>fiddle here

ORACLE Recursive query

I'm trying to build a recursive query and I'm facing a problem.
please find below my dataset
WITH table1 ( ID, Code, Label ) as(
SELECT 123, 'C1', 'LABEL_1' from dual UNION ALL
SELECT 1, 'C2', 'LABEL_2' from dual UNION ALL
SELECT 30, 'C3', 'LABEL_3' from dual UNION ALL
SELECT 44, 'C4', 'LABEL_4' from dual UNION ALL
SELECT 5, 'C5', 'LABEL_5' from dual
),
table2 ( ID, id_table1, code_child, label_child ) as (
SELECT 1, 123, 'C1_1','LABEL_1_1' from dual UNION ALL
SELECT 2, 123, 'C1_2','LABEL_1_2' from dual UNION ALL
SELECT 3, 123, 'C1_3','LABEL_1_3' from dual UNION ALL
SELECT 4, 123, 'C1_4','LABEL_1_4' from dual UNION ALL
SELECT 6, 30, 'C3_1','LABEL_3_1' from dual UNION ALL
SELECT 7, 30, 'C3_2','LABEL_3_2' from dual UNION ALL
SELECT 8, 30, 'C3_3','LABEL_3_3' from dual UNION ALL
SELECT 9, 30, 'C3_4','LABEL_3_4' from dual UNION ALL
SELECT 10, 5, 'C5_1','LABEL_5_1' from dual
),
hierarchy as (
Select
a.id, code, label, CODE_CHILD,id_table1
from table1 a
left join table2 b on b.id_table1 = a.ID
)
,recursive (base, id, code, label, CODE_CHILD,id_table1) as (
SELECT
id as base,
id,
code,
label,
CODE_CHILD,
id_table1
FROM hierarchy
UNION ALL
SELECT
previous_level.base,
current_level.id,
current_level.code,
current_level.label,
current_level.CODE_CHILD,
current_level.id_table1
FROM recursive previous_level,
hierarchy current_level
WHERE 1=1
and current_level.id = previous_level.id_table1
)
SELECT * FROM recursive order by base;
And i'm getting this error :
32044. 00000 - "cycle detected while executing recursive WITH query"
*Cause: A recursive WITH clause query produced a cycle and was stopped
in order to avoid an infinite loop.
*Action: Rewrite the recursive WITH query to stop the recursion or use
the CYCLE clause.
Where i'm wrong ?
I need to merge these two tables into one.
here's what I'd like to get as a result.
id code label id_parent
1 C1 LABEL_1
2 C2 LABEL_2
3 C3 LABEL_3
4 C4 LABEL_4
5 C5 LABEL_5
6 C1_1 LABEL_1_1 1
7 C1_2 LABEL_1_2 1
8 C1_3 LABEL_1_3 1
9 C1_4 LABEL_1_4 1
10 C3_1 LABEL_3_1 3
11 C3_2 LABEL_3_2 3
12 C3_3 LABEL_3_3 3
13 C3_4 LABEL_3_4 3
14 C5_1 LABEL_5_1 5
Thank you
Not sure why you want a recursive query? It appears that you could just use UNION ALL and join the two tables:
WITH table1 ( ID, Code, Label ) as(
SELECT 1, 'C1', 'LABEL_1' from dual UNION ALL
SELECT 2, 'C2', 'LABEL_2' from dual UNION ALL
SELECT 3, 'C3', 'LABEL_3' from dual UNION ALL
SELECT 4, 'C4', 'LABEL_4' from dual UNION ALL
SELECT 5, 'C5', 'LABEL_5' from dual
),
table2 ( ID, id_table1, code_child, label_child ) as (
SELECT 1, 1, 'C1_1','LABEL_1_1' from dual UNION ALL
SELECT 2, 1, 'C1_2','LABEL_1_2' from dual UNION ALL
SELECT 3, 1, 'C1_3','LABEL_1_3' from dual UNION ALL
SELECT 4, 1, 'C1_4','LABEL_1_4' from dual UNION ALL
SELECT 6, 3, 'C3_1','LABEL_3_1' from dual UNION ALL
SELECT 7, 3, 'C3_2','LABEL_3_2' from dual UNION ALL
SELECT 8, 3, 'C3_3','LABEL_3_3' from dual UNION ALL
SELECT 9, 3, 'C3_4','LABEL_3_4' from dual UNION ALL
SELECT 10, 5, 'C5_1','LABEL_5_1' from dual
)
SELECT ROW_NUMBER() OVER ( ORDER BY table_no, code ) AS id,
code,
label,
id_parent
FROM (
SELECT code,
label,
1 AS table_no,
NULL AS id_parent
FROM table1
UNION ALL
SELECT code_child,
label_child,
2 AS table_no,
id_table1
FROM table2
)
order by table_no, code;
Which outputs:
ID | CODE | LABEL | ID_PARENT
-: | :--- | :-------- | --------:
1 | C1 | LABEL_1 | null
2 | C2 | LABEL_2 | null
3 | C3 | LABEL_3 | null
4 | C4 | LABEL_4 | null
5 | C5 | LABEL_5 | null
6 | C1_1 | LABEL_1_1 | 1
7 | C1_2 | LABEL_1_2 | 1
8 | C1_3 | LABEL_1_3 | 1
9 | C1_4 | LABEL_1_4 | 1
10 | C3_1 | LABEL_3_1 | 3
11 | C3_2 | LABEL_3_2 | 3
12 | C3_3 | LABEL_3_3 | 3
13 | C3_4 | LABEL_3_4 | 3
14 | C5_1 | LABEL_5_1 | 5
db<>fiddle here
A recursive WITH clause query produced a cycle and was stopped in order to avoid an infinite loop.
This issue is coming due to bad data in the DB. There are some records which are causing circular relationship among them which is causing infinite loops.
For example: P is parent of C and C is again parent of P.
You can fetch the above output simple using UNION ALL and join of the tables.

How to create incrementing columns?

I have table with these column names.
Province/State
Country/Region
Lat
Long
1/22/20
1/23/20
1/24/20
1/25/20
...
...
3/21/20
I know to create first 4 columns but I don't know how create date column and increment it.
How can I implement such number of columns at once?
Thank you!
Infected
Dead
Recovered
Object relational data model created by me
Question -: Submit working Oracle script for your database schema.
Don't try to create a column-per-day; just create a table with columns for location, date and for each statistic (i.e. infected, recovered, dead, etc.) and then if you need to pivot them do that in a query (or in whatever middle-tier application [i.e. PHP, Java, .net] you're using to access the database).
Something like:
CREATE TABLE Regions(
id VARCHAR2(6)
CONSTRAINT regions__id__pk PRIMARY KEY,
parent_id VARCHAR2(6)
CONSTRAINT regions_parent__fk REFERENCES Regions ( id ),
name VARCHAR2(50)
CONSTRAINT regions__name__nn NOT NULL
CONSTRAINT regions__name__u UNIQUE,
latitude NUMBER
CONSTRAINT regions__lat__nn NOT NULL,
longitude NUMBER
CONSTRAINT regions__long__nn NOT NULL,
CONSTRAINT regions__id__chk CHECK (
( parent_id IS NULL AND REGEXP_LIKE( id, '^[A-Z]{2}$' ) )
OR ( parent_id IS NOT NULL AND REGEXP_LIKE( id, '^[A-Z]{2}-[A-Z0-9]{1,3}$' ) )
)
);
COMMENT ON COLUMN Regions.id IS 'ISO 3166-2 Alpha-2 Country Code or ISO 3166-2 Province Code';
COMMENT ON COLUMN Regions.name IS 'ISO 3166-2 English Short Name.';
COMMENT ON COLUMN Regions.latitude IS 'Latitude of the region''s main city.';
COMMENT ON COLUMN Regions.longitude IS 'Longitude of the region''s main city.';
CREATE TABLE Virus_Statistics(
id NUMBER(20,0)
GENERATED ALWAYS AS IDENTITY
CONSTRAINT virus_statistics__id__pk PRIMARY KEY,
location VARCHAR2(6)
CONSTRAINT virus_statistics__loc__nn NOT NULL
CONSTRAINT virus_statistics__loc__fk REFERENCES Regions ( id ),
datetime DATE
CONSTRAINT virus_statistics__dt__nn NOT NULL
CONSTRAINT virus_statistics__dt__chk CHECK ( datetime = TRUNC( datetime ) ),
infected NUMBER(10,0),
recovered NUMBER(10,0),
dead NUMBER(10,0),
CONSTRAINT virus_statistics__loc__dt__u UNIQUE ( location, datetime )
);
Then you can input your data. For example, the regions would be:
INSERT INTO Regions ( id, parent_id, name, latitude, longitude )
SELECT 'TH', NULL, 'Thailand', 15.00000, 101.00000 FROM DUAL UNION ALL
SELECT 'JP', NULL, 'Japan', 36.00000, 138.00000 FROM DUAL UNION ALL
SELECT 'SG', NULL, 'Singapore', 1.28333, 103.83333 FROM DUAL UNION ALL
SELECT 'NP', NULL, 'Nepal', 28.16667, 84.25000 FROM DUAL UNION ALL
SELECT 'MY', NULL, 'Malaysia', 2.50000, 112.50000 FROM DUAL UNION ALL
SELECT 'CA', NULL, 'Canada', 45.42472, - 75.69500 FROM DUAL UNION ALL
SELECT 'CA-BC', 'CA', 'British Columbia', 48.40733, -123.32977 FROM DUAL;
And the first 3 columns of data would be:
INSERT INTO Virus_Statistics ( location, datetime, infected, recovered, dead )
SELECT 'TH', DATE '2020-01-22', 2 AS i, 0 AS r, 0 AS d FROM DUAL UNION ALL
SELECT 'TH', DATE '2020-01-23', 3, 0, 0 FROM DUAL UNION ALL
SELECT 'TH', DATE '2020-01-24', 5, 0, 0 FROM DUAL UNION ALL
SELECT 'JP', DATE '2020-01-22', 2, 0, 0 FROM DUAL UNION ALL
SELECT 'JP', DATE '2020-01-23', 1, 0, 0 FROM DUAL UNION ALL
SELECT 'JP', DATE '2020-01-24', 2, 0, 0 FROM DUAL UNION ALL
SELECT 'SG', DATE '2020-01-22', 0, 0, 0 FROM DUAL UNION ALL
SELECT 'SG', DATE '2020-01-23', 1, 0, 0 FROM DUAL UNION ALL
SELECT 'SG', DATE '2020-01-24', 3, 0, 0 FROM DUAL UNION ALL
SELECT 'NP', DATE '2020-01-22', 0, 0, 0 FROM DUAL UNION ALL
SELECT 'NP', DATE '2020-01-23', 0, 0, 0 FROM DUAL UNION ALL
SELECT 'NP', DATE '2020-01-24', 0, 0, 0 FROM DUAL UNION ALL
SELECT 'MY', DATE '2020-01-22', 0, 0, 0 FROM DUAL UNION ALL
SELECT 'MY', DATE '2020-01-23', 0, 0, 0 FROM DUAL UNION ALL
SELECT 'MY', DATE '2020-01-24', 0, 0, 0 FROM DUAL UNION ALL
SELECT 'CA-BC', DATE '2020-01-22', 0, 0, 0 FROM DUAL UNION ALL
SELECT 'CA-BC', DATE '2020-01-23', 0, 0, 0 FROM DUAL UNION ALL
SELECT 'CA-BC', DATE '2020-01-24', 0, 0, 0 FROM DUAL;
Then if you want to output it as columns-per-day then use a PIVOT:
SELECT *
FROM (
SELECT name,
latitude,
longitude,
datetime,
infected
FROM Virus_Statistics v
INNER JOIN Regions r
ON ( r.id = v.location )
)
PIVOT (
MAX( infected )
FOR datetime IN (
DATE '2020-01-22' AS "2020-01-22",
DATE '2020-01-23' AS "2020-01-23",
DATE '2020-01-24' AS "2020-01-24"
)
)
Which outputs:
NAME | LATITUDE | LONGITUDE | 2020-01-22 | 2020-01-23 | 2020-01-24
:--------------- | -------: | ---------: | ---------: | ---------: | ---------:
Japan | 36 | 138 | 2 | 1 | 2
Malaysia | 2.5 | 112.5 | 0 | 0 | 0
Singapore | 1.28333 | 103.83333 | 0 | 1 | 3
Nepal | 28.16667 | 84.25 | 0 | 0 | 0
British Columbia | 48.40733 | -123.32977 | 0 | 0 | 0
Thailand | 15 | 101 | 2 | 3 | 5
db<>fiddle here

calculate the average time difference between each stage

How to calculate the average time difference between each stage.
The challenge with the actual data set is not every id will go through all stages.. some will skip stages and the date is not continuous for all Id's like below.
id date status
1 1/1/18 requirement
1 1/8/18 analysis
1 ? design
1 1/30/18 closed
2 2/1/18 requirement
2 2/18/18 closed
3 1/2/18 requirement
3 1/29/18 analysis
3 ? accepted
3 2/5/18 closed
?--we have missing dates as well
Expected output
id date status time_spent
1 1/1/18 requirement 0
1 1/8/18 analysis 7
1 ? design
1 1/30/18 closed 22
2 2/1/18 requirement 0
2 2/18/18 closed 17
3 1/2/18 requirement 0
3 1/29/18 analysis 27
3 ? accepted
3 2/5/18 closed 24
status avg(timespent)
requirement 0
analysis 17
design
closed 21
You can use windowing functions LAG (or LEAD) to get the data of the previous (or next) status for each id. That will let you compute the time elapsed in each stage. Then, compute the average time elapsed for each stage.
Here is an example of how to do that:
with input_data (id, dte, status) as (
SELECT 1, TO_DATE('1/1/18','MM/DD/YY'), 'requirement' FROM DUAL UNION ALL
SELECT 1, TO_DATE('1/8/18','MM/DD/YY'), 'analysis' FROM DUAL UNION ALL
SELECT 1, NULL, 'design' FROM DUAL UNION ALL
SELECT 1, TO_DATE('1/30/18','MM/DD/YY'), 'closed' FROM DUAL UNION ALL
SELECT 2, TO_DATE('2/1/18','MM/DD/YY'), 'requirement' FROM DUAL UNION ALL
SELECT 2, TO_DATE('2/18/18','MM/DD/YY'), 'closed' FROM DUAL UNION ALL
SELECT 3, TO_DATE('1/2/18','MM/DD/YY'), 'requirement' FROM DUAL UNION ALL
SELECT 3, TO_DATE('1/29/18','MM/DD/YY'), 'analysis' FROM DUAL UNION ALL
SELECT 3, NULL, 'accepted' FROM DUAL UNION ALL
SELECT 3, TO_DATE('2/5/18','MM/DD/YY'), 'closed' FROM DUAL ),
----- Solution begins here
data_with_elapsed_days as (
SELECT id.*, dte-nvl(lag(dte ignore nulls) over ( partition by id order by dte ), dte) elapsed
from input_data id)
SELECT status, avg(elapsed)
FROM data_with_elapsed_days d
group by status
order by decode(status,'requirement',1,'analysis',2,'design',3,'accepted',4,'closed',5,99);
+-------------+-------------------------------------------+
| STATUS | AVG(ELAPSED) |
+-------------+-------------------------------------------+
| requirement | 0 |
| analysis | 17 |
| design | |
| accepted | |
| closed | 15.33333333333333333333333333333333333333 |
+-------------+-------------------------------------------+
As I said in my comment, that logic computes the elapsed days as the time to the given status from the prior status. Since, "requirement" has no prior status, this logic will always show zero days spent in requirements. It would probably be better to compute the time from the given status to the next status. For "closed", there would be no next status. You could just leave that blank or use SYSDATE as the data of the next status. Here is an example of that:
with input_data (id, dte, status) as (
SELECT 1, TO_DATE('1/1/18','MM/DD/YY'), 'requirement' FROM DUAL UNION ALL
SELECT 1, TO_DATE('1/8/18','MM/DD/YY'), 'analysis' FROM DUAL UNION ALL
SELECT 1, NULL, 'design' FROM DUAL UNION ALL
SELECT 1, TO_DATE('1/30/18','MM/DD/YY'), 'closed' FROM DUAL UNION ALL
SELECT 2, TO_DATE('2/1/18','MM/DD/YY'), 'requirement' FROM DUAL UNION ALL
SELECT 2, TO_DATE('2/18/18','MM/DD/YY'), 'closed' FROM DUAL UNION ALL
SELECT 3, TO_DATE('1/2/18','MM/DD/YY'), 'requirement' FROM DUAL UNION ALL
SELECT 3, TO_DATE('1/29/18','MM/DD/YY'), 'analysis' FROM DUAL UNION ALL
SELECT 3, NULL, 'accepted' FROM DUAL UNION ALL
SELECT 3, TO_DATE('2/5/18','MM/DD/YY'), 'closed' FROM DUAL ),
----- Solution begins here
data_with_elapsed_days as (
SELECT id.*, nvl(lead(dte ignore nulls) over ( partition by id order by dte ), trunc(sysdate))-dte elapsed
from input_data id)
SELECT status, avg(elapsed)
FROM data_with_elapsed_days d
group by status
order by decode(status,'requirement',1,'analysis',2,'design',3,'accepted',4,'closed',5,99);
+-------------+------------------------------------------+
| STATUS | AVG(ELAPSED) |
+-------------+------------------------------------------+
| requirement | 17 |
| analysis | 14.5 |
| design | |
| accepted | |
| closed | 361.666666666666666666666666666666666667 |
+-------------+------------------------------------------+
I agree with #MatthewMcPeak. Your requirements seem a bit odd: you spend zero days of requirement stage but spend an average of 21 days on closed? Fnord.
This solution treats the presented date as the start date of the stage and calculates the difference between it and the start_date of the next phase.
with cte as (
select status
, lead(dd ignore nulls) over (partition by id order by dd) - dd as dt_diff
from your_table)
select status, avg(dt_diff) as avg_ela
from cte
group by status
/
If you wish to include all stages for each d and estimate the time spent in each (using linear interpolation) then you can create a sub-query with all the statuses and use a PARTITION OUTER JOIN to join them and then use LAG and LEAD to find the date range the status is in and interpolate between:
Oracle Setup:
CREATE TABLE data ( d, dt, status ) AS
SELECT 1, TO_DATE( '1/1/18', 'MM/DD/YY' ), 'requirement' FROM DUAL UNION ALL
SELECT 1, TO_DATE( '1/8/18', 'MM/DD/YY' ), 'analysis' FROM DUAL UNION ALL
SELECT 1, NULL, 'design' FROM DUAL UNION ALL
SELECT 1, TO_DATE( '1/30/18', 'MM/DD/YY' ), 'closed' FROM DUAL UNION ALL
SELECT 2, TO_DATE( '2/1/18', 'MM/DD/YY' ), 'requirement' FROM DUAL UNION ALL
SELECT 2, TO_DATE( '2/18/18', 'MM/DD/YY' ), 'closed' FROM DUAL UNION ALL
SELECT 3, TO_DATE( '1/2/18', 'MM/DD/YY' ), 'requirement' FROM DUAL UNION ALL
SELECT 3, TO_DATE( '1/29/18', 'MM/DD/YY' ), 'analysis' FROM DUAL UNION ALL
SELECT 3, NULL, 'accepted' FROM DUAL UNION ALL
SELECT 3, TO_DATE( '2/5/18', 'MM/DD/YY' ), 'closed' FROM DUAL;
Query:
WITH statuses ( status, id ) AS (
SELECT 'requirement', 1 FROM DUAL UNION ALL
SELECT 'analysis', 2 FROM DUAL UNION ALL
SELECT 'design', 3 FROM DUAL UNION ALL
SELECT 'accepted', 4 FROM DUAL UNION ALL
SELECT 'closed', 5 FROM DUAL
),
ranges ( d, dt, status, id, recent_dt, recent_id, next_dt, next_id ) AS (
SELECT d.d,
d.dt,
s.status,
s.id,
NVL(
d.dt,
LAG( d.dt, 1 )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id )
),
NVL2(
d.dt,
s.id,
LAG( CASE WHEN d.dt IS NOT NULL THEN s.id END, 1 )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id )
),
LEAD( d.dt, 1, d.dt )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id ),
LEAD( CASE WHEN d.dt IS NOT NULL THEN s.id END, 1, s.id + 1 )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id )
FROM data d
PARTITION BY ( d )
RIGHT OUTER JOIN statuses s
ON ( d.status = s.status )
)
SELECT d,
dt,
status,
( next_dt - recent_dt ) / (next_id - recent_id ) AS estimated_duration
FROM ranges;
Output:
D | DT | STATUS | ESTIMATED_DURATION
-: | :-------- | :---------- | ---------------------------------------:
1 | 01-JAN-18 | requirement | 7
1 | 08-JAN-18 | analysis | 7.33333333333333333333333333333333333333
1 | null | design | 7.33333333333333333333333333333333333333
1 | null | accepted | 7.33333333333333333333333333333333333333
1 | 30-JAN-18 | closed | 0
2 | 01-FEB-18 | requirement | 4.25
2 | null | analysis | 4.25
2 | null | design | 4.25
2 | null | accepted | 4.25
2 | 18-FEB-18 | closed | 0
3 | 02-JAN-18 | requirement | 27
3 | 29-JAN-18 | analysis | 2.33333333333333333333333333333333333333
3 | null | design | 2.33333333333333333333333333333333333333
3 | null | accepted | 2.33333333333333333333333333333333333333
3 | 05-FEB-18 | closed | 0
Query 2:
Then of you can easily change that to take the average for each status:
WITH statuses ( status, id ) AS (
SELECT 'requirement', 1 FROM DUAL UNION ALL
SELECT 'analysis', 2 FROM DUAL UNION ALL
SELECT 'design', 3 FROM DUAL UNION ALL
SELECT 'accepted', 4 FROM DUAL UNION ALL
SELECT 'closed', 5 FROM DUAL
),
ranges ( d, dt, status, id, recent_dt, recent_id, next_dt, next_id ) AS (
SELECT d.d,
d.dt,
s.status,
s.id,
NVL(
d.dt,
LAG( d.dt, 1 )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id )
),
NVL2(
d.dt,
s.id,
LAG( CASE WHEN d.dt IS NOT NULL THEN s.id END, 1 )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id )
),
LEAD( d.dt, 1, d.dt )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id ),
LEAD( CASE WHEN d.dt IS NOT NULL THEN s.id END, 1, s.id + 1 )
IGNORE NULLS OVER ( PARTITION BY d.d ORDER BY s.id )
FROM data d
PARTITION BY ( d )
RIGHT OUTER JOIN statuses s
ON ( d.status = s.status )
)
SELECT status,
AVG( ( next_dt - recent_dt ) / (next_id - recent_id ) ) AS estimated_duration
FROM ranges
GROUP BY status, id
ORDER BY id;
Results:
STATUS | ESTIMATED_DURATION
:---------- | ---------------------------------------:
requirement | 12.75
analysis | 4.63888888888888888888888888888888888889
design | 4.63888888888888888888888888888888888889
accepted | 4.63888888888888888888888888888888888889
closed | 0
db<>fiddle here

Find highest and lowest selling item in a table

I have two tables as follows--
ORDERS
create table orders (
ono number(5) not null primary key,
cno number(5) references customers,
eno number(4) references employees,
received date,
shipped date);
ODETAILS
create table odetails (
ono number(5) not null references orders,
pno number(5) not null references parts,
qty integer check(qty > 0),
primary key (ono,pno));
ODETAILS Table
Now I'm trying to figure out the highest and lowest selling product. Logically PNO 10601 which has the highest QTY of 4 is the highest selling product. the following query returns the highest selling product.
SELECT PNO FROM
(SELECT od.PNO, SUM(od.QTY) AS TOTAL_QTY
FROM ODETAILS od
GROUP BY od.PNO
ORDER BY SUM(od.QTY) DESC)
WHERE ROWNUM =1
--Thanks to Bob Jarvis
How do I add a DATE WHERE clause to the SQL above so that I can find out the highest selling product for a given month(lets say DECEMBER) ? The DATE that I'm referring to is from ORDERS table and RECEIVED attribute. I guess I need to join the tables first as well
SQL Fiddle
Oracle 11g R2 Schema Setup:
create table orders (
ono number(5) not null primary key,
cno number(5),
eno number(4),
received date,
shipped date
);
INSERT INTO orders
SELECT 1020, 1, 1, DATE '2015-12-21', NULL FROM DUAL UNION ALL
SELECT 1021, 1, 1, DATE '2015-12-20', DATE '2015-12-20' FROM DUAL UNION ALL
SELECT 1022, 1, 1, DATE '2015-12-18', DATE '2015-12-20' FROM DUAL UNION ALL
SELECT 1023, 1, 1, DATE '2015-12-21', NULL FROM DUAL UNION ALL
SELECT 1024, 1, 1, DATE '2015-12-20', DATE '2015-12-20' FROM DUAL;
create table odetails (
ono number(5) not null references orders(ono),
pno number(5) not null,
qty integer check(qty > 0),
primary key (ono,pno)
);
INSERT INTO odetails
SELECT 1020, 10506, 1 FROM DUAL UNION ALL
SELECT 1020, 10507, 1 FROM DUAL UNION ALL
SELECT 1020, 10508, 2 FROM DUAL UNION ALL
SELECT 1020, 10509, 3 FROM DUAL UNION ALL
SELECT 1021, 10601, 4 FROM DUAL UNION ALL
SELECT 1022, 10601, 1 FROM DUAL UNION ALL
SELECT 1022, 10701, 1 FROM DUAL UNION ALL
SELECT 1023, 10800, 1 FROM DUAL UNION ALL
SELECT 1024, 10900, 1 FROM DUAL;
Query 1 - The onoand pnos for the pno which has sold the maximum total quantity in December 2015:
SELECT ono,
pno,
TOTAL_QTY
FROM (
SELECT q.*,
RANK() OVER ( ORDER BY TOTAL_QTY DESC ) AS rnk
FROM (
SELECT od.ono,
od.PNO,
SUM( od.QTY ) OVER ( PARTITION BY od.PNO ) AS TOTAL_QTY
FROM ODETAILS od
INNER JOIN
orders o
ON ( o.ono = od.ono )
WHERE TRUNC( o.received, 'MM' ) = DATE '2015-12-01'
-- WHERE EXTRACT( MONTH FROM o.received ) = 12
) q
)
WHERE rnk = 1
Change the WHERE clause to get the results for any December rather than just December 2015.
Results:
| ONO | PNO | TOTAL_QTY |
|------|-------|-----------|
| 1021 | 10601 | 5 |
| 1022 | 10601 | 5 |
Query 2 - The ono and pnos for the items which have sold the maximum quantity in a single order in December 2015:
SELECT ono,
pno,
qty
FROM (
SELECT od.*,
RANK() OVER ( ORDER BY od.qty DESC ) AS qty_rank
FROM ODETAILS od
INNER JOIN
orders o
ON ( o.ono = od.ono )
WHERE TRUNC( o.received, 'MM' ) = DATE '2015-12-01'
-- WHERE EXTRACT( MONTH FROM o.received ) = 12
)
WHERE qty_rank = 1
Change the WHERE clause to get the results for any December rather than just December 2015.
Results:
| ONO | PNO | QTY |
|------|-------|-----|
| 1021 | 10601 | 4 |
... where received between to_date('12/01/2015','MM/DD/YYYY') and to_date('12/31/2015','MM/DD/YYYY')
I believe I have solved it!
SELECT PNO
FROM (SELECT OD.PNO, SUM(OD.QTY) AS TOTAL_QTY
FROM ODETAILS OD INNER JOIN ORDERS ON OD.ONO = ORDERS.ONO
WHERE EXTRACT(MONTH FROM ORDERS.RECEIVED) = &MONTH_NUMBER
GROUP BY OD.PNO
ORDER BY SUM(OD.QTY) DESC)
WHERE ROWNUM =1;
You can add some to_char calls to your query on the date columns to parse out year and month, or just month if you want all years divided by month (month and year seems more useful), then add that to your where clause. See my self-contained example:
with odetails as
(
select 1 as ono, 1 as pno, 4 as qty from dual
union all
select 1 as ono, 2 as pno, 1 as qty from dual
union all
select 1 as ono, 3 as pno, 2 as qty from dual
union all
select 1 as ono, 4 as pno, 1 as qty from dual
union all
select 2 as ono, 2 as pno, 1 as qty from dual
union all
select 2 as ono, 3 as pno, 2 as qty from dual
),
orders as
(
select 1 as ono, 1 as cno, 1 as eno, to_date('2015-10-12', 'YYYY-MM-DD') as received, to_date('2015-10-15', 'YYYY-MM-DD') as shipped from dual
union all
select 2 as ono, 1 as cno, 1 as eno, to_date('2015-11-12', 'YYYY-MM-DD') as received, to_date('2015-11-15', 'YYYY-MM-DD') as shipped from dual
)
select pno
from
(
select od.pno, Sum(od.qty) as total_qty, to_char(received, 'YYYY-MM') as year_month
from odetails od
join orders o
on o.ono = od.ono
group by od.pno, to_char(received, 'YYYY-MM')
order by Sum(od.qty) desc
)
where rownum = 1
and year_month = '2015-11'
;
This gives you PNO of 3, since it has the highest quantity in november of 2015.

Resources