Oracle Analytics Get Summary Counts - oracle

Given this query, how can I get summary counts for each Tree_ID?
with tree_row
as (
select '1111' as tree_id, 'PULP' as tree_product from dual union all
select '1111' as tree_id, 'PULP' as tree_product from dual union all
select '2222' as tree_id, 'PULP' as tree_product from dual union all
select '2222' as tree_id, 'CHIP' as tree_product from dual union all
select '3333' as tree_id, 'PULP' as tree_product from dual union all
select '3333' as tree_id, 'CHIP' as tree_product from dual union all
select '3333' as tree_id, 'CHIP' as tree_product from dual
)
select distinct tree_id,
count(*) over (partition by tree_id, tree_product) as pulp_count,
count(*) over (partition by tree_id, tree_product) as chip_count,
count(*) over (partition by tree_id) as tree_total
from tree_row;
Desired Result
TREE_ID PULP_COUNT CHIP_COUNT TREE_TOTAL
1111 2 0 2
2222 1 1 2
3333 1 2 3

You don't need analytic functions, you can use conditional aggregation - normal aggregation but with a case expression (the conditional part) to determine which rows to include in the count:
with tree_row
as (
select '1111' as tree_id, 'PULP' as tree_product from dual union all
select '1111' as tree_id, 'PULP' as tree_product from dual union all
select '2222' as tree_id, 'PULP' as tree_product from dual union all
select '2222' as tree_id, 'CHIP' as tree_product from dual union all
select '3333' as tree_id, 'PULP' as tree_product from dual union all
select '3333' as tree_id, 'CHIP' as tree_product from dual union all
select '3333' as tree_id, 'CHIP' as tree_product from dual
)
select tree_id,
count(case when tree_product = 'PULP' then tree_id end) as pulp_count,
count(case when tree_product = 'CHIP' then tree_id end) as chip_count,
count(*) as tree_total
from tree_row
group by tree_id;
TREE_ID
PULP_COUNT
CHIP_COUNT
TREE_TOTAL
1111
2
0
2
2222
1
1
2
3333
1
2
3
fiddle

Related

IDENTITY full-time and part-time students

have a query and test CASE that shows the number of full-time and part-time students. A full-time student is enrolled in at least 4 courses. A part-time student is enrolled in at least 1 course, but no more than 3.
Although the query appears to work it seems a bit verbose. I was wondering if there is a more succinct way to rewrite the query. In addition, I can would like to display the students first/last names with each row that meets the criteria
Perhaps with something like this?
, LISTAGG(
NVL2(s.student_id, s.last_name || ', ' || s.first_name, NULL),
'; '
) WITHIN GROUP (ORDER BY s.last_name, s.first_name) AS students
Below are my tables, data and query I would like to shorten if possible. Thanks to all who answer and for your expertise.
CREATE TABLE students(student_id, first_name, last_name) AS
SELECT 1, 'Faith', 'Aaron' FROM dual UNION ALL
SELECT 2, 'Lisa', 'Saladino' FROM dual UNION ALL
SELECT 3, 'Leslee', 'Altman' FROM dual UNION ALL
SELECT 4, 'Patty', 'Kern' FROM dual UNION ALL
SELECT 5, 'Beth', 'Cooper' FROM dual UNION ALL
SELECT 95, 'Zak', 'Despart' FROM dual UNION ALL
SELECT 96, 'Owen', 'Balbert' FROM dual UNION ALL
SELECT 97, 'Jack', 'Aprile' FROM dual UNION ALL
SELECT 98, 'Nicole', 'Kramer' FROM dual UNION ALL
SELECT 99, 'Jill', 'Coralnick' FROM dual;
CREATE TABLE student_courses (student_id,course_id) AS
SELECT 1, 1 FROM dual UNION ALL
SELECT 2, 1 FROM dual UNION ALL
SELECT 3, 1 FROM dual UNION ALL
SELECT 4, 1 FROM dual UNION ALL
SELECT 5, 1 FROM dual UNION ALL
SELECT 1, 2 FROM dual UNION ALL
SELECT 2, 2 FROM dual UNION ALL
SELECT 3, 2 FROM dual UNION ALL
SELECT 4, 2 FROM dual UNION ALL
SELECT 5, 2 FROM dual UNION ALL
SELECT 1, 3 FROM dual UNION ALL
SELECT 2, 3 FROM dual UNION ALL
SELECT 3, 3 FROM dual UNION ALL
SELECT 4, 3 FROM dual UNION ALL
SELECT 5, 3 FROM dual UNION ALL
SELECT 97, 1 FROM dual UNION ALL
SELECT 97, 3 FROM dual UNION ALL
SELECT 97, 5 FROM dual UNION ALL
SELECT 97, 6 FROM dual UNION ALL
SELECT 98, 3 FROM dual UNION ALL
SELECT 98, 4 FROM dual UNION ALL
SELECT 98, 5 FROM dual UNION ALL
SELECT 99, 2 FROM dual UNION ALL
SELECT 99, 4 FROM dual UNION ALL
SELECT 99, 5 FROM dual UNION ALL
SELECT 99, 6 FROM dual;
WITH enrolled_student_course_counts AS (
SELECT
s.student_id
, s.first_name
, s.last_name
, COUNT(sc.course_id) AS course_count
FROM students s
LEFT JOIN student_courses sc
ON s.student_id = sc.student_id
GROUP BY
s.student_id
, s.first_name
, s.last_name
HAVING COUNT(sc.course_id) > 0
)
, student_enrollment_statuses AS (
SELECT
student_id
, first_name
, last_name
, CASE WHEN course_count >= 4 THEN 'full-time'
WHEN course_count BETWEEN 1 AND 3 THEN 'part-time'
END AS student_enrollment_status
FROM enrolled_student_course_counts
)
SELECT
UPPER(student_enrollment_status) AS student_enrollment_status
, COUNT(student_enrollment_status) AS student_enrollment_status_count
FROM student_enrollment_statuses
GROUP BY student_enrollment_status;
As you only need the numbers (and not any other data), shorten the query so that it searches only the student_courses table:
SQL> with temp as
2 (select student_id,
3 count(course_id) cnt
4 from student_courses
5 group by student_id
6 )
7 select
8 sum(case when cnt < 4 then 1 else 0 end) part_time,
9 sum(case when cnt >= 4 then 1 else 0 end) full_time
10 from temp;
PART_TIME FULL_TIME
---------- ----------
6 2
SQL>

Duplicated rows numbering

I need to number the rows so that the row number with the same ID is the same. For example:
Oracle database. Any ideas?
Use the DENSE_RANK analytic function:
SELECT DENSE_RANK() OVER (ORDER BY id) AS row_number,
id
FROM your_table
Which, for the sample data:
CREATE TABLE your_table ( id ) AS
SELECT 86325 FROM DUAL UNION ALL
SELECT 86325 FROM DUAL UNION ALL
SELECT 86326 FROM DUAL UNION ALL
SELECT 86326 FROM DUAL UNION ALL
SELECT 86352 FROM DUAL UNION ALL
SELECT 86353 FROM DUAL UNION ALL
SELECT 86354 FROM DUAL UNION ALL
SELECT 86354 FROM DUAL;
Outputs:
ROW_NUMBER
ID
1
86325
1
86325
2
86326
2
86326
3
86352
4
86353
5
86354
5
86354
db<>fiddle here

Oracle Ranking query

Need help to achieve below result:
source data:
Output Expected:
Query to generate source data:
SELECT '43443' AS MSISDN,'Turkey' AS LOC,TRUNC(SYSDATE) AS DATA_DAY FROM DUAL
UNION
SELECT '43443' AS MSISDN,'Turkey' AS LOC,TRUNC(SYSDATE-1) AS DATA_DAY FROM DUAL
UNION
SELECT '43443' AS MSISDN,'India' AS LOC,TRUNC(SYSDATE-2) AS DATA_DAY FROM DUAL
UNION
SELECT '43443' AS MSISDN,'Eng' AS LOC,TRUNC(SYSDATE-3) AS DATA_DAY FROM DUAL
UNION
SELECT '43446' AS MSISDN,'Eng' AS LOC,TRUNC(SYSDATE-4) AS DATA_DAY FROM DUAL
UNION
SELECT '43446' AS MSISDN,'India' AS LOC,TRUNC(SYSDATE-5) AS DATA_DAY FROM DUAL;

Determine start of data's most recent uninterrupted 'streak' by date

I have a dataset that looks something like:
asset_id,date_logged
1234,2018-02-01
1234,2018-02-02
1234,2018-02-03
1234,2018-02-04
1234,2018-02-05
1234,2018-02-06
1234,2018-02-07
1234,2018-02-08
1234,2018-02-09
1234,2018-02-10
9876,2018-02-01
9876,2018-02-02
9876,2018-02-03
9876,2018-02-07
9876,2018-02-08
9876,2018-02-09
9876,2018-02-10
For the purpose of this exercise, imagine today's date is 2018-02-10 (10 Feb 2018). For all the asset_ids in the table, I am trying to identify the start of the most recent unbroken streak for date_logged.
For asset_id = 1234, this would be 2018-02-01. The asset_id was logged all 10 days in an unbroken streak. For asset_id = 9876, this would be 2018-02-07. Because the asset_id was not logged on 2018-02-04, 2018-02-05, and 2018-02-06, the most recent unbroken streak starts on 2018-02-07.
So, my result set would hopefully look something like:
asset_id,Number_of_days_in_most_recent_logging_streak
1234,10
9876,4
Or, alternatively:
asset_id,Date_Begin_Most_Recent_Streak
1234,2018-02-01
9876,2018-02-07
I haven't been able to work out anything that gets me close -- my best effort so far is to get the number of days since the first log date and today, and the number of days the asset_id appears in the dataset, and compare these to identify situations where the streak is more recent than the first day they appear. For my real dataset this isn't particularly problematic, but it's an ugly solution and I would like to understand a better way of getting to the outcome.
Perhaps something like this. Break the query after each inline view in the WITH clause and SELECT * FROM the most recent inline view, to see what each step does.
with
inputs ( asset_id, date_logged ) as (
select 1234, to_date('2018-02-01', 'yyyy-mm-dd') from dual union all
select 1234, to_date('2018-02-02', 'yyyy-mm-dd') from dual union all
select 1234, to_date('2018-02-03', 'yyyy-mm-dd') from dual union all
select 1234, to_date('2018-02-04', 'yyyy-mm-dd') from dual union all
select 1234, to_date('2018-02-05', 'yyyy-mm-dd') from dual union all
select 1234, to_date('2018-02-06', 'yyyy-mm-dd') from dual union all
select 1234, to_date('2018-02-07', 'yyyy-mm-dd') from dual union all
select 1234, to_date('2018-02-08', 'yyyy-mm-dd') from dual union all
select 1234, to_date('2018-02-09', 'yyyy-mm-dd') from dual union all
select 1234, to_date('2018-02-10', 'yyyy-mm-dd') from dual union all
select 9876, to_date('2018-02-01', 'yyyy-mm-dd') from dual union all
select 9876, to_date('2018-02-02', 'yyyy-mm-dd') from dual union all
select 9876, to_date('2018-02-03', 'yyyy-mm-dd') from dual union all
select 9876, to_date('2018-02-07', 'yyyy-mm-dd') from dual union all
select 9876, to_date('2018-02-08', 'yyyy-mm-dd') from dual union all
select 9876, to_date('2018-02-09', 'yyyy-mm-dd') from dual union all
select 9876, to_date('2018-02-10', 'yyyy-mm-dd') from dual
),
prep ( asset_id, date_logged, grp ) as (
select asset_id, date_logged,
date_logged - row_number()
over (partition by asset_id order by date_logged)
from inputs
),
agg ( asset_id, date_logged, cnt ) as (
select asset_id, min(date_logged), count(*)
from prep
group by asset_id, grp
)
select asset_id, max(date_logged) as date_start_recent_streak,
max(cnt) keep (dense_rank last order by date_logged) as cnt
from agg
group by asset_id
order by asset_id -- If needed
;
ASSET_ID DATE_START_RECENT_STREAK CNT
---------- ------------------------ ----------
1234 2018-02-01 10
9876 2018-02-07 4
you can try this,
with test (asset_id, date_logged) as
(select 1234, date '2018-02-01' from dual union all
select 1234, date '2018-02-02' from dual union all
select 1234, date '2018-02-03' from dual union all
select 1234, date '2018-02-04' from dual union all
select 1234, date '2018-02-05' from dual union all
select 1234, date '2018-02-06' from dual union all
select 1234, date '2018-02-07' from dual union all
select 1234, date '2018-02-08' from dual union all
select 1234, date '2018-02-09' from dual union all
select 1234, date '2018-02-10' from dual union all
select 9876, date '2018-02-01' from dual union all
select 9876, date '2018-02-02' from dual union all
select 9876, date '2018-02-03' from dual union all
select 9876, date '2018-02-07' from dual union all
select 9876, date '2018-02-08' from dual union all
select 9876, date '2018-02-09' from dual union all
select 9876, date '2018-02-10' from dual union all
select 9876, date '2018-02-11' from dual union all
select 9876, date '2018-02-12' from dual
)
SELECT asset_id, MIN(date_logged), COUNT(1)
FROM (SELECT asset_id, date_logged,
MAX(date_logged) OVER (PARTITION BY asset_id)+1 max_date_logged_plus_one,
DENSE_RANK() OVER (PARTITION BY asset_id ORDER BY date_logged desc) rown
FROM test
ORDER BY asset_id, date_logged desc)
WHERE max_date_logged_plus_one - date_logged = rown
GROUP BY asset_id;
ASSET_ID MIN(DATE_LOGGED) COUNT(1)
---------- ---------------- ----------
1234 01-FEB-18 10
9876 07-FEB-18 6
if below data is commented, output is
select 9876, date '2018-02-10' from dual union all
ASSET_ID MIN(DATE_LOGGED) COUNT(1)
---------- ---------------- ----------
1234 01-FEB-18 10
9876 11-FEB-18 2
Would this make any sense?
SQL> with test (asset_id, date_logged) as
2 (select 1234, date '2018-02-01' from dual union all
3 select 1234, date '2018-02-02' from dual union all
4 select 1234, date '2018-02-03' from dual union all
5 select 1234, date '2018-02-04' from dual union all
6 select 1234, date '2018-02-05' from dual union all
7 select 1234, date '2018-02-06' from dual union all
8 select 1234, date '2018-02-07' from dual union all
9 select 1234, date '2018-02-08' from dual union all
10 select 1234, date '2018-02-09' from dual union all
11 select 1234, date '2018-02-10' from dual union all
12 select 9876, date '2018-02-01' from dual union all
13 select 9876, date '2018-02-02' from dual union all
14 select 9876, date '2018-02-03' from dual union all
15 select 9876, date '2018-02-07' from dual union all
16 select 9876, date '2018-02-08' from dual union all
17 select 9876, date '2018-02-09' from dual union all
18 select 9876, date '2018-02-10' from dual
19 ),
20 inter as
21 -- difference between DATE_LOGGED and its previous DATE_LOGGED
22 (select asset_id,
23 date_logged,
24 date_logged - lag(date_logged) over (partition by asset_id order by date_logged) diff
25 from test
26 )
27 select i.asset_id, min(i.date_logged) date_logged
28 from inter i
29 where nvl(i.diff, 1) = (select max(i1.diff) from inter i1
30 where i1.asset_id = i.asset_id
31 )
32 group by i.asset_id
33 order by i.asset_id;
ASSET_ID DATE_LOGGE
---------- ----------
1234 2018-02-01
9876 2018-02-07
SQL>

Duplicate rows with spaces:

I have a dataset as below:
SELECT ' 1234 ' ID,NULL TAG,' AB' CODE FROM DUAL
UNION ALL
SELECT '453' ID,'GEN' TAG,'AB' CODE FROM DUAL
UNION ALL
SELECT '1234' ID,NULL TAG,' AB' CODE FROM DUAL)
I am trying to get duplicates with the below query. The output should be row 1 and 3 but i get only one row as trimmed output of 1 or 3.
SELECT TRIM(ID),TRIM(TAG),TRIM(CODE) FROM
(SELECT ' 1234 ' ID,NULL TAG,' AB' CODE FROM DUAL
UNION ALL
SELECT '453' ID,'GEN' TAG,'AB' CODE FROM DUAL
UNION ALL
SELECT '1234' ID,NULL TAG,' AB' CODE FROM DUAL)
WHERE (TRIM(ID),TRIM(TAG),TRIM(CODE)) IN
(
SELECT TRIM(ID),TRIM(TAG),TRIM(CODE) FROM
(SELECT ' 1234 ' ID,NULL TAG,' AB' CODE FROM DUAL
UNION ALL
SELECT '453' ID,'GEN' TAG,'AB' CODE FROM DUAL
UNION ALL
SELECT '1234' ID,NULL TAG,' AB' CODE FROM DUAL)
GROUP BY TRIM(ID),TRIM(TAG),TRIM(CODE)
HAVING COUNT(*) >1
)
I just ran this and it returned rows 1 and 3:
SELECT TRIM(ID),TRIM(TAG),TRIM(CODE)
FROM
(
SELECT ' 1234 ' ID,NULL TAG,' AB' CODE FROM DUAL
UNION ALL
SELECT '453' ID,'GEN' TAG,'AB' CODE FROM DUAL
UNION ALL
SELECT '1234' ID,NULL TAG,' AB' CODE FROM DUAL
)
WHERE TRIM(ID) IN
(
SELECT TRIM(ID)
FROM
(
SELECT ' 1234 ' ID,NULL TAG,' AB' CODE FROM DUAL
UNION ALL
SELECT '453' ID,'GEN' TAG,'AB' CODE FROM DUAL
UNION ALL
SELECT '1234' ID,NULL TAG,' AB' CODE FROM DUAL
)
GROUP BY TRIM(ID),TRIM(TAG),TRIM(CODE)
HAVING COUNT(*) >1
)
I changed your WHERE to reference only the TRIM(ID) instead of all 3 values.
Edit #1, part of the problem is you are comparing null to null which you cannot do. So you can do a null check on the columns and if it is null then replace it. I wrapped the null columns with nvl(null, 'na') so then it had a value to compare:
SELECT TRIM(ID) id,TRIM(TAG) tag,TRIM(CODE) code
FROM
(
SELECT ' 1234 ' ID, nvl(null, 'na') TAG,' AB' CODE FROM DUAL
UNION ALL
SELECT '453' ID,'GEN' TAG,'AB' CODE FROM DUAL
UNION ALL
SELECT ' 1234 ' ID,nvl(null, 'na') TAG,' AB' CODE FROM DUAL
UNION ALL
SELECT ' 1234 ' ID,nvl(null, 'na') TAG,' AC' CODE FROM DUAL
)
WHERE (TRIM(ID),TRIM(TAG),TRIM(CODE)) IN
(
SELECT TRIM(ID),TRIM(TAG),TRIM(CODE)
FROM
(
SELECT ' 1234 ' ID,nvl(null, 'na') TAG,' AB' CODE FROM DUAL
UNION ALL
SELECT '453' ID,'GEN' TAG,'AB' CODE FROM DUAL
UNION ALL
SELECT ' 1234 ' ID,nvl(null, 'na') TAG,' AB' CODE FROM DUAL
UNION ALL
SELECT ' 1234 ' ID,nvl(null, 'na') TAG,' AC' CODE FROM DUAL
)
GROUP BY TRIM(ID), TRIM(CODE), TRIM(TAG)
HAVING COUNT(*) >1
)
See SQL Fiddle with Demo

Resources