PROCEDURE DELETE_X1
IS
v_emp_unum VARCHAR2 (25);
BEGIN
/*FOR rec
IN (SELECT l2.*,
row_number ()
OVER (PARTITION BY case_uid,nvl(min_eff_date, eff_begin_date)
ORDER BY case_uid,nvl(min_eff_date, eff_begin_date))
rw,
ROWID rid
FROM (SELECT l1.*,
MIN (
CASE
WHEN eff_end_date = next_begin - 1
THEN
eff_begin_date
END)
OVER (PARTITION BY case_uid)
min_eff_date,
MAX (
CASE
WHEN previous_end <>
TO_DATE ('12/31/9999',
'mm/dd/yyyy')
AND previous_end + 1 = eff_begin_date
THEN
eff_end_date
END)
OVER (PARTITION BY case_uid)
max_end_date
FROM (SELECT GT.*,
LEAD (
EFF_begin_DATE)
OVER (PARTITION BY CASE_UID
ORDER BY EFF_BEGIN_DATE)
next_begin,
LAG (
EFF_end_DATE)
OVER (PARTITION BY CASE_UID
ORDER BY EFF_BEGIN_DATE)
previous_end
FROM TABLE_OUTPUT GT
WHERE SRC = 'ERICSSON' AND STATUS_CODE = 'X1') l1)
l2)*/
FOR rec
IN (SELECT l2.*,
ROW_NUMBER ()
OVER (PARTITION BY case_uid, min_eff_date, max_end_date
ORDER BY case_uid, min_eff_date, max_end_date)
rw,
ROWID rid
FROM (SELECT l1.*,
(MAX (
start_at)
OVER (
PARTITION BY case_uid
ORDER BY EFF_BEGIN_DATE
ROWS UNBOUNDED PRECEDING))
min_eff_date,
(MIN (
break_at)
OVER (
PARTITION BY case_uid
ORDER BY EFF_BEGIN_DATE
ROWS BETWEEN CURRENT ROW
AND UNBOUNDED FOLLOWING))
max_end_date
FROM (SELECT GT.*,
(CASE
WHEN LAG (
EFF_end_DATE)
OVER (
PARTITION BY CASE_UID
ORDER BY EFF_BEGIN_DATE) =
EFF_BEGIN_DATE
- 1
THEN
NULL
ELSE
EFF_BEGIN_DATE
END)
start_at,
(CASE
WHEN LEAD (
EFF_BEGIN_DATE)
OVER (
PARTITION BY case_uid
ORDER BY EFF_BEGIN_DATE) =
CASE
WHEN EFF_end_DATE <>
TO_DATE (
'12/31/9999',
'mm/dd/yyyy')
THEN
EFF_end_DATE
+ 1
ELSE
EFF_end_DATE
END
THEN
NULL
ELSE
EFF_end_DATE
END)
break_at
FROM TABLE_OUTPUT GT
WHERE SRC = 'ERICSSON' AND STATUS_CODE = 'X1') l1)
l2)
The part of code is commented out an re written.
commented out code output
OFF TIME 1/1/2017 1/7/2017 X1
OFF TIME 1/8/2017 2/1/2017 X1
New code output
OFF TIME 1/1/2017 2/1/2017 X1
NORMAL 2/2/2017 2/2/2017 AB
OFF TIME 2/20/2017
The LAG function is used to access data from a previous row.
The LEAD function is used to return data from rows further down the result set.
aggregate functions MIN , MAX
i am pretty confused with the flow of code.
I can't understand that code on the whole please explain the logic for the code
Related
The table contains numbers from 944900000 to 944999999 and i want to split these numbers into ranges of 1000 each like
944900000 to 944900999 -- 1000
944901000 to 944901999 -- 1000
..
..
944999000 to 944999999 -- 1000
is there any way to generate this through oracle SQL not with PL/SQL
You could also use this.
--Your data
CREATE TABLE your_table(your_column) AS
SELECT 944900000 + LEVEL - 1
FROM dual
CONNECT BY 944900000 + LEVEL < 944999999 + 2
;
--from 944900000 to 944999999
WITH cte AS (
SELECT your_column, CASE WHEN MOD(your_column, 1000) = 0 THEN your_column END start_range
FROM your_table
)
SELECT start_range, end_range
FROM (
SELECT start_range, CASE WHEN start_range IS NOT NULL THEN LEAD(your_column, 999)OVER(ORDER BY your_column) END end_range
FROM cte
)T
WHERE end_range IS NOT NULL /*because of the last execution of lead function in the inline view t*/
GROUP BY start_range, end_range
ORDER BY 1, 2
;
You could use ROW_NUMBER to number all records according to increasing number value. Then, compute a DENSE_RANK using the "group" of increments of 1000 to which each record belongs.
WITH cte AS (
SELECT t.*, ROW_NUMBER OVER (ORDER BY col) rn
FROM yourTable t
),
cte2 AS (
SELECT t.*, DENSE_RANK() OVER (ORDER BY FLOOR(rn / 1000)) dr
FROM cte t
)
SELECT *
FROM cte2
WHERE dr = 2; -- e.g. for 2nd partition of 1000 records
I have the following setup, which works fine and generates output as expected.
I'm trying to add the locations subquery into the CTE so my output will have a random location_id for each row.
The subquery is straight forward and should work but I am getting syntax errors when I try to place it into the 'data's CTE. I was hoping someone could help me out.
CREATE TABLE employees(
employee_id NUMBER(6),
emp_name VARCHAR2(30)
);
INSERT INTO employees(
employee_id,
emp_name
) VALUES
(1, 'John Doe');
INSERT INTO employees(
employee_id,
emp_name
) VALUES
(2, 'Jane Smith');
INSERT INTO employees(
employee_id,
emp_name
) VALUES
(3, 'Mike Jones');
CREATE TABLE locations AS
SELECT level AS location_id,
'Door ' || level AS location_name
FROM dual
CONNECT BY level <=
with rws as (
select level rn from dual connect by level <= 5 ),
data as ( select e.*,round (dbms_random.value(1,5)
) n from employees e)
select employee_id,
emp_name,
trunc (sysdate) + dbms_random.value (0, 5) AS random_date
from rws
join data d on rn <= n
order by employee_id;
-- trying to make this work
with rws as ( select level rn from dual connect by level <= 5 ),
data as ( select e.*, loc.location_id = (
select location_id
from locations order by dbms_random.value()
fetch first 1 row only
),
round (dbms_random.value(1,5)
) n from employees e )
select employee_id,
emp_name,
trunc (sysdate) + dbms_random.value (0, 5) AS random_date
from rws
join data d on rn <= n
order by employee_id;
You need to alias the subquery column expression, rather than trying to assign it to a [variable] name. So instead of this:
with rws as ( select level rn from dual connect by level <= 5 ),
data as ( select e.*, loc.location_id = (
select location_id
from locations order by dbms_random.value()
fetch first 1 row only
),
round (dbms_random.value(1,5)
) n from employees e )
you would do this:
with rws as (
select level rn
from dual
connect by level <= 5
),
data as (
select e.*,
(
select location_id
from locations
order by dbms_random.value()
fetch first 1 row only
) as location_id,
round (dbms_random.value(1,5)) as n
from employees e
)
db<>fiddle
But yes, you'll get the same location_id for each row, which probably isn't what you want.
There are probably better ways to avoid it (or to approach whatever you're actually trying to achieve) but one option is to force the subquery to be correlated by adding something like:
where location_id != -1 * e.employee_id
db<>fiddle
although that might be expensive. It's probably worth asking a new question about that specific aspect.
I am getting the same location_id for every employee_id, which I don't want either.
The subquery is in the wrong place then; move it to the main query, and correlate against both ID and n:
with rws as (
select level rn
from dual
connect by level <= 5
),
data as (
select e.*,
round (dbms_random.value(1,5)) as n
from employees e
)
select d.employee_id,
d.emp_name,
(
select location_id
from locations
where location_id != -1 * d.employee_id * d.n
order by dbms_random.value()
fetch first 1 row only
) as location_id,
trunc (sysdate) + dbms_random.value (0, 5) AS random_date
from rws r
join data d on r.rn <= d.n
order by d.employee_id;
db<>fiddle
Or move the location part to a new CTE, I suppose, with its own row number; and join that on one of your other generated values.
I have HIVE table ( details below):
hive> select * from abcd ;
OK
a 1 1
b 2 2
a 3 3
Time taken: 0.261 seconds, Fetched: 3 row(s)
hive> desc abcd;
OK
val001 string
val002 int
val003 int
Time taken: 0.084 seconds, Fetched: 3 row(s)
I am writing following query but receiving below error :
select max(rnk) rnk, max(val) val, sum(cnt) cnt from (select val, count(*) cnt, row_number() over (order by case val when null then 0 else count(*) end desc, val) rnk from (select VAL001 val from abcd ) group by val) group by case when rnk <= 100 or val is null then rnk else 100 + 1 end;
FAILED: ParseException line 3:55 missing ) at 'by' near 'by'
line 3:58 missing EOF at 'val' near 'by'
I am looking for following result from above query :
RNK VAL CNT
--- ------------------------------ ---
1 a 2
2 b 1
I was able to achieve the same from Oracle database having similar kind of table. Only difference was instead of order by case I used order by decode in Oracle DB but since decode is not supported in HIVe I can not do the same.
Please find ORacle DB SQL query which is working :
SQL> select max(rnk) rnk, max(val) val, sum(cnt) cnt from
(select val, count(*) cnt, row_number() over (order by
decode(val,null,0,count(*)) desc, val) rnk from (select VAL001 val from
table_name ) group by val)
group by case when rnk <= 100 or val is null then rnk else 100 + 1 end;
RNK VAL CNT
--- ------------------------------ ---
1 a 2
2 b 1
Can anyone please help me fixing HIVE query. Let me know if you need any more details.
This is your query. I suspect there is a simpler way to get what you want:
select max(rnk) as rnk, max(val) as val, sum(cnt) as cnt
from (select val, count(*) as cnt,
row_number() over (order by case val when null then 0 else count(*) end desc, val) as rnk
from (select VAL001 val from abcd )
group by val
)
group by case when rnk <= 100 or val is null then rnk else 100 + 1 end;
I think you just need table aliases for the subqueries in the from clause:
select max(rnk) as rnk, max(val) as val, sum(cnt) as cnt
from (select val, count(*) as cnt,
row_number() over (order by case val when null then 0 else count(*) end desc, val) as rnk
from (select VAL001 val from abcd
) x
group by val
) x
group by case when rnk <= 100 or val is null then rnk else 100 + 1 end;
This is not technically simpler solution, but possible easier to read:
The first subquery performs the count and ranking,
the second subquery the categorisation in the top 1 - top 100 and the special categories for other (top) and unknown.
The final query makes the grouping.
with cnt as (
select VAL001 val,
count(*) as cnt,
row_number() over (order by decode(VAL001,null,0,count(*)) desc, VAL001) as rnk
from abcd
group by VAL001),
ctg as (
select
val, cnt, rnk,
case when val is NULL then 'unknown'
when rnk <= 100 then 'top '||rnk
else 'other' end as category_code
from cnt)
select
max(rnk) as rnk, max(val) as val, sum(cnt) as cnt
from ctg
group by category_code
order by 1
I have a procedure that returns multiple rows on some criteria and in specific order. These rows are separated into few pages (50 rows per page).
How can I retrieve all rows from page having some specific row.
I've created a query the query that do this work, but it is not optimized and have huge impact on performance. Help me please to optimize it or give an alternative to it:
select *
from
(
select file_id, row_number() over (order by rownum) rn
from my_table
)
where trunc(rn/50) = (
select trunc(rn/50) from
(select t.*, rownum rn from my_table t)
where file_id = 29987);
You have to adjust it a little...
with tab as
(
-- the
-- row_number() over (order by rownum) rn
-- should be here
select level + 1000 as val
, level/50 as rn_50
from dual
connect by
level < 140
)
, val as
(
select rn_50
from tab
where val = 1004 -- pg 1
--where val = 1051 -- pg 2
--where val = 1101 -- pg 3
)
select *
from tab t
where rn_50 >= (select floor(rn_50) from val)
and rn_50 <= (select ceil (rn_50) from val)
;
I am have a table with 500k transactions. I want to fetch the last balance for a particular date. So I have have returned a query like below.
SELECT curr_balance
FROM transaction_details
WHERE acct_num = '10'
AND is_deleted = 'N'
AND ( value_date, srl_num ) IN(
SELECT MAX( value_date ), MAX( srl_num )
FROM transaction_details
WHERE TO_DATE( value_date, 'dd/mm/yyyy' )
<= TO_DATE( ADD_MONTHS( '05-APR-2012', 1 ), 'dd/mm/yyyy' )
AND acct_num = '10'
AND is_deleted = 'N'
AND ver_status = 'Y' )
AND ver_status = 'Y'
This has to be executed for incrementing of 12 months to find the last balance for each particular month. But this query is having more cpu cost, 12 times it is taking huge time. How to remodify the above query to get the results in faster way. Whether this can be broken into two part in PL/SQL to achieve the performance. ?
Try:
select * from(
SELECT value_date, srl_num, curr_balance
FROM transaction_details
WHERE acct_num = '10'
AND is_deleted = 'N'
AND ver_status = 'Y'
row_number() over (partition by trunc(value_date - interval '5' day,'MM')
order by srl_num desc
) as rnk
)
where rnk = 1;
You'll get a report with the ballance on last srl_num on each month in your table.
The benefit is that your approach scans the table 24 times for 12 months report and my approach scans the table once.
The analytic function gets the rank of record in current month(partition by clause) ordering the rows in the month after srl_num.
You don't have to query your table twice. Try using analytic functions
SELECT t.curr_balance
-- , any other column you want as long it is in the subselect.
FROM (
SELECT
trans.curr_balance
, trans.value_date
-- any other column you want
, trans.srl_num
, MAX(trans.srl_num) OVER(PARTITION BY trans.value_date, trans.srl_num) max_srl_num
, MAX(trans.value_date) OVER(PARTITION BY trans.value_date, trans.srl_num) max_date
FROM transaction_details trans
WHERE TO_DATE( value_date, 'dd/mm/yyyy' ) <= TO_DATE( ADD_MONTHS( '01-APR-2012', 1 ), 'dd/mm/yyyy' )
AND acct_num = '10'
AND is_deleted = 'N'
AND ver_status = 'Y'
) t
WHERE t.max_date = t.value_date
AND t.max_srl_num = t.srl_num
A couple of thoughts.
Why do you have TO_DATE( value_date...? Isn't your data type DATE? this might be breaking your index if you have one in that column.
Note that (this is a wild guess) if your srl_num is not the highest for the latest date, you will have incorrect results and might not return any rows.