How to get data with removed duplicates? - oracle

The code below is to get distinct data in terms of column name e1 and mdl, but does not show any reslut.
I have put "AND ROWNUM IN (SELECT MAX(ROWNUM) FROM T1.." to remove dulicated row.
If I remove "AND ROWNUM IN (SELECT MAX(ROWNUM) FROM T1..", then of course all the data in Table T1 selected.
<Table and data>
CREATE TABLE T1 (
dte VARCHAR2(15),
gu1 VARCHAR2(15),
gu2 VARCHAR2(15),
eq VARCHAR2(15),
mdl VARCHAR2(15),
val VARCHAR2(15)
);
INSERT INTO T1 VALUES('20190801','30','30','E1','M1','1.5');
INSERT INTO T1 VALUES('20190801','30','30','E1','M1',NULL);
INSERT INTO T1 VALUES('20190801','30','30','E1','M1','0');
INSERT INTO T1 VALUES('20190802','30','30','E1','M1','1.5');
INSERT INTO T1 VALUES('20190803','30','30','E3','M1','3.0');
<Code>
SELECT gu1,gu2,eq,mdl
FROM T1
WHERE val <> '0' AND val IS NOT NULL
AND dte >= '20190801' AND dte <= '20190803'
AND gu1 = '30'
AND ROWNUM IN (SELECT MAX(ROWNUM) FROM T1 --to get only one among dulplicated rows in terms of column e1, mdl,
WHERE val <> '0' AND val IS NOT NULL
AND dte >= '20190801' AND dte <= '20190803'
AND gu1 = '30'
GROUP BY eq,mdl)
;
<Expexted result is>
GU1 GU2 EQ MDL
---- ---- ---- ----
30 30 E1 M1
30 30 E3 M1

rownum is generated after the row is output. what you can do instead is to use
row_number analytical function as follows
SELECT * FROM (
SELECT gu1,gu2,eq,mdl,row_number() over(partition by eq,mdl order by dte desc) as rnk
FROM T1
WHERE val <> '0' AND val IS NOT NULL
AND dte >= '20190801' AND dte <= '20190803'
AND gu1 = '30'
)x
WHERE x.rnk=1

Try to use SELECT DISTINCT Statement.
SELECT DISTINCT
gu1,gu2,eq,mdl
FROM T1
WHERE val <> '0' AND val IS NOT NULL
AND dte >= '20190801' AND dte <= '20190803'
AND gu1 = '30'
;
More info on DISTINCT use here

As far as I understood from sample data and expected output, You can use one of the following method:
Distinct - as decribed in VSMent answer
Using rownum - as described in George Joseph answer
Using EXISTS as described following
-- in following example, you can also use WITH AS to remove excess duplicate coding
SELECT T1.gu1,T1.gu2,T1.eq,T1.mdl
FROM T1
WHERE T1 val <> '0'
AND T1.val IS NOT NULL
AND T1.dte >= '20190801'
AND T1.dte <= '20190803'
AND T1.gu1 = '30'
AND NOT EXISTS (SELECT 1
FROM T2
WHERE T2.val <> '0'
AND T2.val IS NOT NULL
AND T2.dte >= '20190801'
AND T2.dte <= '20190803'
AND T2.GU1 = 30
-- FOLLOWING 3 CONDITION WILL RESTRICT DUPLICATE ROWS
AND T1.EQ = T2.EQ
AND T1.MDL = T2.MDL
T1.ROWID > T2.ROWID
);
Cheers!!

Related

Pl / SQL Oracle helps to run a Date in Subquery

How could I get the date of the Maximum Value, by means of a subquery
I can't put the Date in the Main query because I would have to add it to the group by it would bring me a lot of data
Here is the Code:
SELECT MAX (A1.VALOR) AS VALOR,
(SELECT sq1.FECHA
FROM VARIABLE_VALORES_SMEC sq1
WHERE sq1.ID_AGENTE = A1.ID_AGENTE)
MES, -- {<-- Here is the Problem}
(SELECT CODIGO_AGENTE
FROM AGENTES
WHERE ID_AGENTE = A1.ID_AGENTE)
Agentess,
(SELECT NOMBRE_AGENTE
FROM AGENTES
WHERE ID_AGENTE = A1.ID_AGENTE)
Nombre_Agente
FROM VARIABLE_VALORES_SMEC A1
WHERE A1.VALOR < '1'
AND A1.VALOR != '0'
AND A1.ID_AGENTE IN (SELECT C1.ID_AGENTE
FROM VARIABLE_VALORES_SMEC C1
WHERE A1.FECHA = C1.FECHA)
AND A1.ID_AGENTE IN (SELECT B1.ID_AGENTE
FROM AGENTES B1
WHERE ID_CATEGORIA_AGENTE = 'AC006')
AND (A1.FECHA BETWEEN (ADD_MONTHS (TO_DATE ( :FECHAIN, 'MM/DD/YYYY'),
-1))
AND (LAST_DAY (
ADD_MONTHS (
TO_DATE ( :FECHAIN, 'MM/DD/YYYY'),
-1))))
AND A1.ID_VARIABLE LIKE '%_calc_total_pot#%'
GROUP BY ID_AGENTE
Am I correct that you need (fecha) for maximum A1.VALOR?
If - yes, you can use the following query, or if - no, just replace A1.VALOR with the required column in keep() clause:
SELECT MAX (A1.VALOR) AS VALOR,
max(A1.FECHA)keep(dense_rank first order by A1.VALOR desc) MES, -- A1.VALOR is used here as sort key, replace it with what you want
(SELECT CODIGO_AGENTE
FROM AGENTES
WHERE ID_AGENTE = A1.ID_AGENTE)
Agentess,
(SELECT NOMBRE_AGENTE
FROM AGENTES
WHERE ID_AGENTE = A1.ID_AGENTE)
Nombre_Agente
FROM VARIABLE_VALORES_SMEC A1
WHERE A1.VALOR < '1'
AND A1.VALOR != '0'
AND A1.ID_AGENTE IN (SELECT C1.ID_AGENTE
FROM VARIABLE_VALORES_SMEC C1
WHERE A1.FECHA = C1.FECHA)
AND A1.ID_AGENTE IN (SELECT B1.ID_AGENTE
FROM AGENTES B1
WHERE ID_CATEGORIA_AGENTE = 'AC006')
AND (A1.FECHA BETWEEN (ADD_MONTHS (TO_DATE ( :FECHAIN, 'MM/DD/YYYY'),
-1))
AND (LAST_DAY (
ADD_MONTHS (
TO_DATE ( :FECHAIN, 'MM/DD/YYYY'),
-1))))
AND A1.ID_VARIABLE LIKE '%_calc_total_pot#%'
GROUP BY ID_AGENTE
You can use row_number analytical function to fetch one record for which value is highest and use the fecha of that record. Use following sub query:
(Select fecha from
(SELECT sq1.FECHA, row_number() over (order by sq1.value desc nulls last) as rn
FROM VARIABLE_VALORES_SMEC sq1
WHERE sq1.ID_AGENTE = A1.ID_AGENTE)
Where rn = 1) MES

How to use 'EXIST' in a simple oracle query

I have a table called ‘MainTable’ with following data
Another table called ‘ChildTable’ with following data (foreighn key Number)
Now I want to fetch those records from ‘ChildTable’ if there exists at least one ‘S’ status.
But if any other record for this number id ‘R’ then I don’t want to fetch it
Something like this-
I tried following
Select m.Number, c.Status from MainTable m, ChildTable c
where EXISTS (SELECT NULL
FROM ChildTable c2
WHERE c2.status =’S’ and c2.status <> ‘R’
AND c2.number = m.number)
But here I am getting record having ‘R’ status also, what I am doing wrong?
You can try something like this
select num, status
from
(select id, num, status,
sum(decode(status, 'R', 1, 0)) over (partition by num) Rs,
sum(decode(status, 'S', 1, 0)) over (partition by num) Ss
from child_table) t
where t.Rs = 0 and t.Ss >= 1
-- and status = 'S'
Here is a sqlfiddle demo
The child records with 'R' might be associated with a maintable record that also has another child record with status 'S' -- that is what your query is asking for.
Select
m.Number,
c.Status
from MainTable m
join ChildTable c on c.number = m.number
where EXISTS (
SELECT NULL
FROM ChildTable c2
WHERE c2.status =’S’
AND c2.number = m.number) and
NOT EXISTS (
SELECT NULL
FROM ChildTable c2
WHERE c2.status =’R’
AND c2.number = m.number)
WITH ChildrenWithS AS (
SELECT Number
FROM ChildTable
WHERE Status = 'S'
)
,ChildrenWithR AS (
SELECT Number
FROM ChildTable
WHERE Status = 'R'
)
SELECT MaintTable.Number
,ChildTable.Status
FROM MainTable
INNER JOIN ChildTable
ON MainTable.Number = ChildTable.Number
WHERE MainTable.Number IN (SELECT Number FROM ChildrenWithS)
AND MainTable.Number NOT IN (SELECT Number FROM ChildrenWithR)

CROSS APPLY too slow for running total - TSQL

Please see my code below as it is running too slowly with the CROSS APPLY.
How can I remove the CROSS APPLY and add something else that will run faster?
Please note I am using SQL Server 2008 R2.
;WITH MyCTE AS
(
SELECT
R.NetWinCURRENCYValue AS NetWin
,dD.[Date] AS TheDay
FROM
dimPlayer AS P
JOIN
dbo.factRevenue AS R ON P.playerKey = R.playerKey
JOIN
dbo.vw_Date AS dD ON Dd.dateKey = R.dateKey
WHERE
P.CustomerID = 12345)
SELECT
A.TheDay AS [Date]
,ISNULL(A.NetWin, 0) AS NetWin
,rt.runningTotal AS CumulativeNetWin
FROM MyCTE AS A
CROSS APPLY (SELECT SUM(NetWin) AS runningTotal
FROM MyCTE WHERE TheDay <= A.TheDay) AS rt
ORDER BY A.TheDay
CREATE TABLE #temp (NetWin money, TheDay datetime)
insert into #temp
SELECT
R.NetWinCURRENCYValue AS NetWin
,dD.[Date] AS TheDay
FROM
dimPlayer AS P
JOIN
dbo.factRevenue AS R ON P.playerKey = R.playerKey
JOIN
dbo.vw_Date AS dD ON Dd.dateKey = R.dateKey
WHERE
P.CustomerID = 12345;
SELECT
A.TheDay AS [Date]
,ISNULL(A.NetWin, 0) AS NetWin
,SUM(B.NetWin) AS CumulativeNetWin
FROM #temp AS A
JOIN #temp AS B
ON A.TheDay >= B.TheDay
GROUP BY A.TheDay, ISNULL(A.NetWin, 0);
Here https://stackoverflow.com/a/13744550/613130 it's suggested to use recursive CTE.
;WITH MyCTE AS
(
SELECT
R.NetWinCURRENCYValue AS NetWin
,dD.[Date] AS TheDay
,ROW_NUMBER() OVER (ORDER BY dD.[Date]) AS RN
FROM dimPlayer AS P
JOIN dbo.factRevenue AS R ON P.playerKey = R.playerKey
JOIN dbo.vw_Date AS dD ON Dd.dateKey = R.dateKey
WHERE P.CustomerID = 12345
)
, MyCTERec AS
(
SELECT C.TheDay AS [Date]
,ISNULL(C.NetWin, 0) AS NetWin
,ISNULL(C.NetWin, 0) AS CumulativeNetWin
,C.RN
FROM MyCTE AS C
WHERE C.RN = 1
UNION ALL
SELECT C.TheDay AS [Date]
,ISNULL(C.NetWin, 0) AS NetWin
,P.CumulativeNetWin + ISNULL(C.NetWin, 0) AS CumulativeNetWin
,C.RN
FROM MyCTERec P
INNER JOIN MyCTE AS C ON C.RN = P.RN + 1
)
SELECT *
FROM MyCTERec
ORDER BY RN
OPTION (MAXRECURSION 0)
Note that this query will work if you have 1 record == 1 day! If you have multiple records in a day, the results will be different from the other query.
As I said here, if you want really fast calculation, put it into temporary table with sequential primary key and then calculate rolling total:
create table #Temp (
ID bigint identity(1, 1) primary key,
[Date] date,
NetWin decimal(29, 10)
)
insert into #Temp ([Date], NetWin)
select
dD.[Date],
sum(R.NetWinCURRENCYValue) as NetWin,
from dbo.dimPlayer as P
inner join dbo.factRevenue as R on P.playerKey = R.playerKey
inner join dbo.vw_Date as dD on Dd.dateKey = R.dateKey
where P.CustomerID = 12345
group by dD.[Date]
order by dD.[Date]
;with cte as (
select T.ID, T.[Date], T.NetWin, T.NetWin as CumulativeNetWin
from #Temp as T
where T.ID = 1
union all
select T.ID, T.[Date], T.NetWin, T.NetWin + C.CumulativeNetWin as CumulativeNetWin
from cte as C
inner join #Temp as T on T.ID = C.ID + 1
)
select C.[Date], C.NetWin, C.CumulativeNetWin
from cte as C
order by C.[Date]
I assume that you could have duplicates dates in the input, but don't want duplicates in the output, so I grouped data before puting it into the table.

How to exclude holidays between two dates?

I have two dates and I have to find out the number of Sundays and holidays fall between those two dates. Can I do this using BETWEEN? If so, how?
SELECT date1, date2, trunc(deposit_date - transaction_date) TOTAL
FROM Table_Name FULL OUTER JOIN Holidays ON date2 = hdate
WHERE hdate IN (date1, date2)
Using this I can definitely check whether there is a holiday on either of the two days, i.e. date1 or date2 but what I am not able to find out that whether there lies a holiday or a Sunday between these two dates. Help!
The solution you've posted is horribly inefficient; you can do all of this in a single SQL statement:
Firstly generate all possible dates between the two you have:
select trunc(:min_date) + level - 1
from dual
connect by level <= trunc(:min_date) - trunc(:max_date)
Then use your HOLIDAY table to restrict to what you want:
with all_dates as (
select trunc(:min_date) + level - 1 as the_date
from dual
connect by level <= trunc(:min_date) - trunc(:max_date)
)
select count(*)
from all_dates a
left outer join holiday b
on a.the_date = b.hdate
where b.hdate is null
and to_char(a.the_date, 'DY') <> 'SUN'
If you want to check if hdate is between the two dates you can query using
where hdate between date1 and date2
If you want to check if hdate is on the same day as date1 or date two you can query like this
where trunc(hdate) in (trunc(date1) ,trunc(date2))
The trunc function removed the time.
You should create a table with the holidays and maintain it on your own.
CREATE TABLE holidays
(
holiday VARCHAR2(100)
, d_date DATE
);
INSERT INTO holidays VALUES ('National Developer Day', DATE'2013-06-01');
SELECT *
FROM holidays;
-- National Developer Day 2013-06-01 00:00:00
The rest is just a matter of a SQL statment
Scenario 1: EXISTS
SELECT COUNT
(
CASE
WHEN TRIM(TO_CHAR(d.start_date_level, 'DAY')) = 'SUNDAY'
OR CASE
WHEN EXISTS (SELECT 1 FROM holidays h WHERE d.start_date_level = h.d_date)
THEN 1
ELSE NULL
END = 1
THEN 1
ELSE NULL
END
) AS holiday_check
FROM
(
SELECT start_date + (LEVEL - 1) AS start_date_level
FROM
(
SELECT start_date, end_date, end_date - start_date AS diff_date
FROM
(
SELECT TRUNC(ADD_MONTHS(SYSDATE, -2)) AS start_date
, TRUNC(SYSDATE) AS end_date
FROM DUAL
)
)
CONNECT BY
LEVEL <= (diff_date + 1)
) d
Scenario 2: LEFT JOIN
SELECT COUNT
(
CASE
WHEN TRIM(TO_CHAR(d.start_date_level, 'DAY')) = 'SUNDAY'
OR h.d_date IS NOT NULL
THEN 1
ELSE NULL
END
) AS holiday_check
FROM
(
SELECT start_date + (LEVEL - 1) AS start_date_level
FROM
(
SELECT start_date, end_date, end_date - start_date AS diff_date
FROM
(
SELECT TRUNC(ADD_MONTHS(SYSDATE, -2)) AS start_date
, TRUNC(SYSDATE) AS end_date
FROM DUAL
)
)
CONNECT BY
LEVEL <= (diff_date + 1)
) d
LEFT JOIN holidays h
ON d.start_date_level = h.d_date
9 Sundays + 1 "National Developer Day" = 10
CREATE OR REPLACE FUNCTION workdays (dt1 DATE, dt2 DATE) RETURN NUMBER IS
weekday_count NUMBER := 0;
date1 DATE := dt1;
date2 DATE := dt2;
cur_dt date;
holiday_count number;
begin
if date1 = date2 then
return 0;
end if;
cur_dt := transaction_date;
while cur_dt <= date2 loop
if cur_dt = date2 then
null;
else
SELECT count(*) INTO holiday_count
FROM holiday
WHERE hdate = cur_dt;
IF holiday_count = 0 THEN
IF to_char(cur_dt,'DY') NOT IN ('SUN') THEN
weekday_count := weekday_count + 1;
END IF;
END IF;
END IF;
cur_dt := cur_dt +1;
END LOOP;
RETURN weekday_count;
END;
And then I queried my database and got the right results. Do post if you have an optimal solution for this.
Here is an even better and efficient solution to the problem,
SELECT A.ID,
COUNT(A.ID) AS COUNTED
FROM tableA A
LEFT JOIN TableB B
ON A.tableB_id=B.id
LEFT JOIN holiday C
ON TRUNC(C.hdate) BETWEEN (TRUNC(a.date1) +1) AND TRUNC(B.date2)
WHERE c.hdate IS NOT NULL
GROUP BY A.ID;
where TableA contains date1 and tableB contains date2. Holiday contains the list of holidays and Sundays. And this query excludes 'date1' from the count.
RESULT LOGIC
trunc(date2) - trunc(date1) = x
x - result of the query
Make a table T$HOLIDAYS with your holidays (HDATE column). These dates will be excluded from calculation of working days within given period (sdate is start date and edate end date of period). Here is the function that calculates working days within given period excluding holidays, saturdays and sundays:
CREATE OR REPLACE FUNCTION WorkingDays(sdate IN DATE,edate IN DATE) RETURN NUMBER IS
days NUMBER;
BEGIN
WITH dates AS (SELECT sdate+LEVEL-1 AS d FROM DUAL CONNECT BY LEVEL<=edate-sdate+1)
SELECT COUNT(*) INTO days
FROM dates
WHERE d NOT IN (SELECT hdate FROM t$holidays) --exclude holidays
AND TO_CHAR(d,'D') NOT IN (6,7); --exclude saturdays + sundays
RETURN days;
END WorkingDays;
/
select sum(qq) from (
select case when to_number(to_char((trunc(sysdate-10) + level - 1),'D'))<=5 then 1 else 0 end as qq
from dual
connect by level <= trunc(sysdate) - trunc(sysdate-10))

Oracle Query Tuning (Duplicate table access)

I'm trying to further tune this query. The query returns the status for three different tests for each sample. However, if I wish to further filter the samples returned, I have to put the conditions in both 'SELECT ... FROM sample ...' queries.
Can this query be rewritten referencing the sample table only once?
SELECT sample_id,
created_on,
s_acid,
s_ph,
s_titr
FROM
(SELECT sample_id, -- Rows w/ same sample_id to columns
MAX (CASE WHEN tst_tmpl_id = 36 THEN status END) AS s_acid,
MAX (CASE WHEN tst_tmpl_id = 43 THEN status END) AS s_ph,
MAX (CASE WHEN tst_tmpl_id = 66 THEN status END) AS s_titr
FROM (SELECT test_id, test_tmpl_id, sample_id FROM test)
JOIN (SELECT sample_id FROM sample WHERE sam_tmpl_id = 18)
USING (sample_id)
GROUP BY sample_id
)
-- get other sample fields
JOIN (SELECT sample_id, created_on FROM sample WHERE sam_tmpl_id = 18)
USING (sample_id)
WITH q AS
(
SELECT sample_id, created_on
FROM sample
WHERE sam_tmpl_id = 18
)
SELECT sample_id,
created_on,
s_acid,
s_ph,
s_titr
FROM (
SELECT sample_id, -- Rows w/ same sample_id to columns
MAX (CASE WHEN tst_tmpl_id = 36 THEN status END) AS s_acid,
MAX (CASE WHEN tst_tmpl_id = 43 THEN status END) AS s_ph,
MAX (CASE WHEN tst_tmpl_id = 66 THEN status END) AS s_titr
FROM (
SELECT test_id, test_tmpl_id, sample_id
FROM test
)
JOIN q
USING (sample_id)
GROUP BY
sample_id
)
JOIN q
USING (sample_id)

Resources