create or replace procedure prcdr_Clustering is
v_sampleCount number;
v_sampleFlag number;
v_matchPercent number;
v_SpendAmount Number(18, 2);
cursor cur_PDCSample is
SELECT *
FROM TBL_BIL
WHERE UDF_CHK = 'N';
rec_Pdcsample TBL_BIL%rowtype;
BEGIN
OPEN cur_PDCSample;
LOOP
FETCH cur_PDCSample
into rec_Pdcsample;
EXIT WHEN cur_PDCSample%NOTFOUND;
SELECT COUNT(*)
INTO v_sampleCount
FROM TBL_BIL
WHERE UDF_TOKENIZED = rec_Pdcsample.UDF_TOKENIZED;
IF v_sampleCount <> 0 THEN
UPDATE TBL_BIL
SET UDF_CHK = 'Y'
WHERE UDF_TOKENIZED = rec_Pdcsample.UDF_TOKENIZED;
IF v_sampleCount > 1 THEN
v_sampleFlag := 1;
ELSE
IF v_sampleCount = 1 THEN
v_sampleFlag := 2;
ELSE
v_sampleFlag := 0;
END IF;
END IF;
UPDATE TBL_BIL
SET UDF_SAMPLECOUNT = v_sampleCount, UDF_SAMPLEFLAG = v_sampleFlag
WHERE uniqueid = rec_Pdcsample.uniqueid;
UPDATE TBL_BIL
SET UDF_PID = rec_Pdcsample.uniqueid
WHERE UDF_TOKENIZED = rec_Pdcsample.UDF_TOKENIZED;
UPDATE TBL_BIL
SET UDF_PIDSPEND = v_SpendAmount
WHERE uniqueid = rec_Pdcsample.uniqueid;
UPDATE TBL_BIL
SET UDF_MATCHPERCENT = 1
WHERE uniqueid <> rec_Pdcsample.uniqueid
AND UDF_TOKENIZED = rec_Pdcsample.UDF_TOKENIZED;
END IF;
IF cur_PDCSample%ISOPEN THEN
CLOSE cur_PDCSample;
END IF;
OPEN cur_PDCSample;
END LOOP;
IF cur_PDCSample%ISOPEN THEN
CLOSE cur_PDCSample;
END IF;
end PrcdrClustering;
It takes me days to execute, my table has 225,846 rows of data.
The structure of my table is :-
UNIQUEID NUMBER Notnull primary key
VENDORNAME VARCHAR2(200)
SHORTTEXT VARCHAR2(500)
SPENDAMT NUMBER(18,2)
UDF_TOKENIZED VARCHAR2(999)
UDF_PID NUMBER(10)
UDF_SAMPLEFLAG NUMBER(4)
UDF_SAMPLECOUNT NUMBER(4)
UDF_MATCHPERCENT NUMBER(4)
UDF_TOKENCNT NUMBER(4)
UDF_PIDSPEND NUMBER(18,2)
UDF_CHK VARCHAR2(1)
Where to start? I've a number points to make.
You're doing bulk updates; this implies that bulk collect ... forall would be far more efficient.
You're doing multiple updates of the same table, which doubles the amount of DML.
As you've already selected from the table, re-entering it to do another count is pretty pointless, use an analytic function to get the result you need.
Indentation, indentation, indentation. Makes your code much easier to read.
You can use elsif to reduce the amount of statements to be evaluated ( very, very minor win )
If the uniqueid is unique you can use rowid to update the table.
You're updating udf_pidspend to null, whether this is intentional or not there's no need to do a separate update for it.
You can do a lot more in the cursor, but there's obviously no need to select everything, which'll decrease the amount of data you need to read from the disks.
You may need a couple of commits in there; though this means you can't rollback if it fails midway.
I hope tbl_bil is indexed on uniqueid
As GolzeTrol noted you're opening the cursor multiple times. There's no need for this.
As general rules:
If you're going to select / update or delete from a table do it once if possible and as few times as possible if not.
If you're doing bulk operations use bulk collect.
Never write select *
Use rowid where possible it avoids all index problems.
This will only work in 11G, I answered this question recently where I provided my own way of dealing with this implementation restriction in versions prior to 11G and linked to Ollie's, Tom Kyte's and Sathya's
I'm not entirely certain what you're trying to do here so please forgive me if the logic is a little off.
create or replace procedure prcdr_Clustering is
cursor c_pdcsample is
select rowid as rid
, count(*) over ( partition by udf_tokenized ) as samplecount
, udf_chk
, max(uniqueid) over ( partition by udf_tokenized ) as udf_pid
from tbl_bil
where udf_chk = 'N';
type t__pdcsample is table of c_pdcsample%rowtype index by binary_integer;
t_pdcsample t__pdcsample;
begin
open c_pdcsample;
loop
fetch c_pdcsample bulk collect into t_pdcsample limit 1000;
exit when t_pdcsample.count = 0;
if t_pdcsample.samplecount <> 0 then
t_pdcsample.udf_chk := 'y';
if t_pdcsample.samplecount > 1 then
t_pdcsample.samplecount := 1;
elsif t_pdcsample.samplecount = 1 then
t_pdcsample.samplecount := 2;
else
t_pdcsample.samplecount := 0;
end if;
end if;
forall i in t_pdcsample.first .. t_pdcsample.last
update tbl_bil
set udfsamplecount = t_pdcsample.samplecount
, udf_sampleflag = t_pdcsample.sampleflag
, udf_pidspend = null
, udf_pid = t_pdcsample.udf_pid
where rowid = t_pdcsample(i).rowid
;
for i in t_pdcsample.first .. t_pdcsample.last loop
update tbl_bil TBL_BIL
set udfmatchpercent = 1
where uniqueid <> t_pdcsample.uniqueid
and udf_tokenized = t_pdcsample.udf_tokenized;
end loop;
commit ;
end loop;
close c_pdcsample;
end PrcdrClustering;
/
Lastly calling all tables tbl_... is a little bit unnecessary.
Here is a variant using a single SQL statement. I'm not 100% certain that the logic is exactly the same, but for my test set, it is. Also the current procedure is non deterministic when you have more than one record with udf_chk = 'N' and the same udf_tokenized ...
This is the refactored procedure
SQL> create procedure prcdr_clustering_refactored
2 is
3 begin
4 merge into tbl_bil t
5 using ( select tb1.uniqueid
6 , count(*) over (partition by tb1.udf_tokenized) cnt
7 , max(decode(udf_chk,'N',uniqueid)) over (partition by tb1.udf_tokenized order by tb1.udf_chk) pid
8 from tbl_bil tb1
9 where udf_chk = 'N'
10 or exists
11 ( select 'dummy'
12 from tbl_bil tb2
13 where tb2.udf_tokenized = tb1.udf_tokenized
14 )
15 ) q
16 on ( t.uniqueid = q.uniqueid )
17 when matched then
18 update
19 set t.udf_samplecount = decode(t.udf_chk,'N',q.cnt,t.udf_samplecount)
20 , t.udf_sampleflag = decode(t.udf_chk,'N',decode(q.cnt,1,2,1),t.udf_sampleflag)
21 , t.udf_pid = q.pid
22 , t.udf_pidspend = decode(t.udf_chk,'N',null,t.udf_pidspend)
23 , t.udf_matchpercent = decode(t.udf_chk,'N',t.udf_matchpercent,1)
24 , t.udf_chk = 'Y'
25 ;
26 end;
27 /
Procedure created.
And here is a test:
SQL> select *
2 from tbl_bil
3 order by uniqueid
4 /
UNIQUEID VENDORNAME SHORTTEXT SPENDAMT UDF_TOKENI UDF_PID UDF_SAMPLEFLAG UDF_SAMPLECOUNT UDF_MATCHPERCENT UDF_TOKENCNT UDF_PIDSPEND U
-------- ---------- ---------- -------- ---------- ------- -------------- --------------- ---------------- ------------ ------------ -
1 a a 1 bl 0 0 0 0 0 0 N
2 a a 1 bla 0 0 0 0 0 0 N
3 a a 1 bla 0 0 0 0 0 0 Y
4 a a 1 bla 0 0 0 0 0 0 Y
5 a a 1 bla 0 0 0 0 0 0 Y
6 a a 1 blah 0 0 0 0 0 0 N
7 a a 1 blah 0 0 0 0 0 0 Y
8 a a 1 blah 0 0 0 0 0 0 Y
9 a a 1 blah 0 0 0 0 0 0 Y
10 a a 1 blah 0 0 0 0 0 0 Y
11 a a 1 blah 0 0 0 0 0 0 Y
11 rows selected.
SQL> exec prcdr_clustering
PL/SQL procedure successfully completed.
SQL> select *
2 from tbl_bil
3 order by uniqueid
4 /
UNIQUEID VENDORNAME SHORTTEXT SPENDAMT UDF_TOKENI UDF_PID UDF_SAMPLEFLAG UDF_SAMPLECOUNT UDF_MATCHPERCENT UDF_TOKENCNT UDF_PIDSPEND U
-------- ---------- ---------- -------- ---------- ------- -------------- --------------- ---------------- ------------ ------------ -
1 a a 1 bl 1 2 1 0 0 Y
2 a a 1 bla 2 1 4 0 0 Y
3 a a 1 bla 2 0 0 1 0 0 Y
4 a a 1 bla 2 0 0 1 0 0 Y
5 a a 1 bla 2 0 0 1 0 0 Y
6 a a 1 blah 6 1 6 0 0 Y
7 a a 1 blah 6 0 0 1 0 0 Y
8 a a 1 blah 6 0 0 1 0 0 Y
9 a a 1 blah 6 0 0 1 0 0 Y
10 a a 1 blah 6 0 0 1 0 0 Y
11 a a 1 blah 6 0 0 1 0 0 Y
11 rows selected.
SQL> rollback
2 /
Rollback complete.
SQL> exec prcdr_clustering_refactored
PL/SQL procedure successfully completed.
SQL> select *
2 from tbl_bil
3 order by uniqueid
4 /
UNIQUEID VENDORNAME SHORTTEXT SPENDAMT UDF_TOKENI UDF_PID UDF_SAMPLEFLAG UDF_SAMPLECOUNT UDF_MATCHPERCENT UDF_TOKENCNT UDF_PIDSPEND U
-------- ---------- ---------- -------- ---------- ------- -------------- --------------- ---------------- ------------ ------------ -
1 a a 1 bl 1 2 1 0 0 Y
2 a a 1 bla 2 1 4 0 0 Y
3 a a 1 bla 2 0 0 1 0 0 Y
4 a a 1 bla 2 0 0 1 0 0 Y
5 a a 1 bla 2 0 0 1 0 0 Y
6 a a 1 blah 6 1 6 0 0 Y
7 a a 1 blah 6 0 0 1 0 0 Y
8 a a 1 blah 6 0 0 1 0 0 Y
9 a a 1 blah 6 0 0 1 0 0 Y
10 a a 1 blah 6 0 0 1 0 0 Y
11 a a 1 blah 6 0 0 1 0 0 Y
11 rows selected.
Regards,
Rob.
I don't know why, but you open the cur_PDCSample, which select (I suspect) thousands of records. And then, in a loop, you close the cursor and reopen it, each time processing only the first record that is returned.
If you open the cursor once, process each record and then close it, your procedure will probably go a lot faster.
Actually, since you do not always update TBL_BIL.UDF_CHK to 'Y', it seems to me that your current procedure may run infinitely.
Related
I'd like to make "SORTKEY" like the below. It's not the same observations for each one.
Basically, each one is 3 obs but if flg=1 then "SORTKEY" includes that observation.
In this example, it means SORTKEY = 2 is 4 obs, SORTKEY ^=2 is 3 obs.
Is there the way to make the SORTKEY manually?. If you have a good idea, please give me some advice.
I want the following dataset, using the "test" dataset.
/*
SORTKEY NO FLG
1 1 0
1 2 0
1 3 0
2 4 0
2 5 0
2 6 0
2 7 1
3 8 0
3 9 0
3 10 0
*/
data test;
input no flg;
cards;
1 0
2 0
3 0
4 0
5 0
6 0
7 1
8 0
9 0
10 0
;
run;
Use a sequence counter to track the 3-rows-per-sortkey requirement.
Example:
data want;
set have;
retain sortkey 1;
seq+1;
if seq > 3 and flag ne 1 then do;
seq = 1;
sortkey+1;
end;
run;
I would like to get a null value when i SUM UP and divide multiple values in event any of the values that i am summing up has a null. in the example below i would like the return value to be be a null if any of the values i am summing up have a null or zero.
(((CAST (NVL(XYY.SCR,NULL)AS NUMBER) - 57.81114) / 24.79211) + ((CAST(NVL(WPM_SCR,NULL)AS NUMBER) - 40.7836082505127) / 17.5946375921401) + ((CAST (NVL(SLOT3,NULL) AS NUMBER) - 50.204190919674 ) / 25.5100093808846) ) / 3 BASE
A simplified example:
anything + null or / null will be null anyway, so you don't have to do anything about it
for + 0 or / 0, use CASE (see lines #7 and #11)
SQL> with test (a, b) as
2 (select 6, 3 from dual union all
3 select 5, 0 from dual union all
4 select 2, null from dual
5 )
6 select a, b,
7 case when b = 0 then null
8 else a/b
9 end result_div,
10 --
11 case when a = 0 or b = 0 then null
12 else a + b
13 end result_sum
14 from test;
A B RESULT_DIV RESULT_SUM
---------- ---------- ---------- ----------
6 3 2 9
5 0
2
SQL>
I am building my knowledge of using SQL by using the basic 10x10 (-5 to 5) grid system on the game battleships to work out which grids avoid getting hit over a series of games.
I have come up with the following queries, to compare the X and Y grids on the board (game) to a table containing 11 rows of the -5 to 5 (including 0) numbers used per axis (grid_format). I have inserted 9 lines of test data (some of which are on the same grids).
The result should return about 114 rows, instead I only get 49 rows. Individually, the x_coord and y_coord queries return 7 rows, excluding the ones that were used on game, meaning that it is working. The problem lies with bringing them together, where entire y_coord grids are omitted.
Both these queries have given me the same result. Is anyone able to help me solve this dilemma please?
-- Table
CREATE TABLE game(
x_coord NUMBER(2,0),
y_coord NUMBER(2,0));
INSERT INTO game VALUES (4,4);
INSERT INTO game VALUES (1,-4);
INSERT INTO game VALUES (0,0);
INSERT INTO game VALUES (0,0);
INSERT INTO game VALUES (1,-5);
INSERT INTO game VALUES (1,-5);
INSERT INTO game VALUES (-2,4);
INSERT INTO game VALUES (1,-5);
INSERT INTO game VALUES (0,0);
CREATE TABLE grid_format(
grid NUMBER(2,0));
INSERT INTO grid_format VALUES (-5);
INSERT INTO grid_format VALUES (-4);
INSERT INTO grid_format VALUES (-3);
INSERT INTO grid_format VALUES (-2);
INSERT INTO grid_format VALUES (-1);
INSERT INTO grid_format VALUES (-0);
INSERT INTO grid_format VALUES (1);
INSERT INTO grid_format VALUES (2);
INSERT INTO grid_format VALUES (3);
INSERT INTO grid_format VALUES (4);
INSERT INTO grid_format VALUES (5);
-- Query
SELECT X_Grid, Y_Grid
FROM
(SELECT grid AS Y_Grid
FROM grid_format
WHERE
NOT EXISTS (
SELECT *
FROM game
WHERE game.y_coord = grid_format.grid)),
(SELECT grid AS X_Grid
FROM grid_format
WHERE
NOT EXISTS (
SELECT *
FROM game
WHERE game.x_coord = grid_format.grid)
ORDER BY X_Grid DESC);
-- Alternative Solution
SELECT X_Grid, Y_Grid
FROM
(SELECT grid AS X_Grid
FROM grid_format
MINUS
SELECT x_coord
FROM game),
(SELECT grid AS Y_Grid
FROM grid_format
MINUS
SELECT y_coord
FROM game)
Here are the results for the test (see link):
Result from query
Thank you.
You could use something like that:
WITH
xaxis AS (SELECT grid AS x FROM grid_format),
yaxis AS (SELECT grid AS y FROM grid_format),
grid AS (SELECT x, y FROM xaxis, yaxis)
SELECT *
FROM grid
LEFT JOIN game
ON grid.x = game.x_coord
AND grid.y = game.y_coord;
which returns all the grid positions (x,y) and, if there is a ship, a value in X_COORD and Y_COORD, something like
X Y X_COORD Y_COORD
3 3 3 3
-5 2
-2 5
3 -5
-2 -4
EDIT:
You could even display the grid graphically with something like
WITH
xaxis AS (SELECT grid AS x_coord FROM grid_format),
yaxis AS (SELECT grid AS y_coord FROM grid_format),
grid AS (SELECT x_coord, y_coord FROM xaxis, yaxis),
grid2 AS (SELECT x_coord, y_coord, NVL2(game.rowid,1,0) as ship
FROM grid LEFT JOIN game USING(x_coord, y_coord))
SELECT *
FROM grid2
PIVOT (sum(ship)
FOR x_coord IN (-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5))
ORDER BY y_coord;
-5 0 0 0 0 0 0 0 0 0 0 0
-4 0 0 0 0 0 0 0 0 0 0 0
-3 0 0 0 0 0 0 0 0 0 0 0
-2 0 0 0 0 0 0 0 0 0 0 0
-1 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 1 0 0
4 0 0 0 0 0 0 0 0 0 0 0
5 0 0 0 0 0 0 0 0 0 0 0
I want to create 10 pairs of random integers within the range of [-4,+4) using PLSQL and then using the pair to solve a primary equation(num1*X+num2=0) and save the results inside the table, as well as a text output of what the type of outcome for the equation(Solvable/Indefinite/Impossible). I am using Oracle LiveSQL.
I used cursor to make sure the randomized number is an integer.
CURSOR NUMcursor1 IS
SELECT ROUND(DBMS_RANDOM.VALUE(-4,+4),0) FROM DUAL;
num1 EquatA2.dat1%TYPE; num2 EquatA2.dat2%TYPE;
solution EquatA2.sol%TYPE; notes EquatA2.note%TYPE;
`
`
CREATE TABLE EquatA2
(
dat1 NUMBER(2,0),
dat2 NUMBER(2,0),
sol NUMBER(6,3),
note VARCHAR2(20)
)
DECLARE
num1 EquatA2.dat1%TYPE; num2 EquatA2.dat2%TYPE;
solution EquatA2.sol%TYPE; notes EquatA2.note%TYPE;
i INT; i:=1;
CURSOR NUMcursor1 IS
SELECT ROUND(DBMS_RANDOM.VALUE(-4,+4),0) FROM DUAL;
CURSOR NUMcursor2 IS
SELECT ROUND(DBMS_RANDOM.VALUE(-4,+4),0) FROM DUAL;
BEGIN
OPEN NUMcursor1;
OPEN NUMcursor2;
FOR i IN 1..10
LOOP
FETCH NUMcursor1 INTO num1;
EXIT WHEN NUMcursor1%NOTFOUND;
FETCH NUMcursor2 INTO num2;
EXIT WHEN NUMcursor2%NOTFOUND;
DBMS_OUTPUT.PUT_LINE(num1);
DBMS_OUTPUT.PUT_LINE(num2);
IF (num1 != 0) THEN solution := -num2 / num1 AND notes := 'solvable';
ELSIF (num1 == 0 AND num2 == 0) THEN notes := 'indefinite';
ELSIF (num1 == 0 AND num2 != 0) THEN notes := 'impossible';
END IF;
INSERT INTO EquatA2 VALUES(num1,num2,solution,notes);
END LOOP;
END;
`
`
Expected results: 10 text outputs and the range of the random numbers to be [-4,+4)
Actual results(errors):
ORA-00922: missing or invalid option
Invalid statement
Unsupported Command
Invalid statement
Result Set 6
ROUND(DBMS_RANDOM.VALUE(-4,+4),0)
-2
Download CSV
Invalid statement
Result Set 7
ROUND(DBMS_RANDOM.VALUE(-4,+4),0)
-3
Download CSV
ORA-06550: line 18, column 56: PLS-00103: Encountered the symbol "=" when expecting one of the following: . ( * # % & = - + ; < / > at in is mod remainder not rem <an exponent (**)> <> or != or ~= >= <= <> and or like like2 like4 likec between || multiset member submultiset The symbol "* was inserted before "=" to continue.
I've modified your code so that it works. Here you go.
Table:
SQL> CREATE TABLE EquatA2
2 (
3 dat1 NUMBER(2,0),
4 dat2 NUMBER(2,0),
5 sol NUMBER(6,3),
6 note VARCHAR2(20)
7 );
Table created.
SQL>
PL/SQL anonymous procedure:
SQL> DECLARE
2 num1 EquatA2.dat1%TYPE;
3 num2 EquatA2.dat2%TYPE;
4 solution EquatA2.sol%TYPE;
5 notes EquatA2.note%TYPE;
6 BEGIN
7 delete from equata2;
8 FOR i IN 1..10 LOOP
9 num1 := ROUND(DBMS_RANDOM.VALUE(-4, +4), 0);
10 num2 := ROUND(DBMS_RANDOM.VALUE(-4, +4), 0);
11 -- DBMS_OUTPUT.PUT_LINE(num1);
12 -- DBMS_OUTPUT.PUT_LINE(num2);
13
14 IF num1 != 0 THEN
15 solution := -num2 / num1;
16 notes := 'solvable';
17 ELSIF num1 = 0 AND num2 = 0 THEN
18 notes := 'indefinite';
19 ELSIF num1 = 0 AND num2 != 0
20 THEN notes := 'impossible';
21 END IF;
22
23 INSERT INTO EquatA2 VALUES (num1, num2, solution, notes);
24 END LOOP;
25 END;
26 /
PL/SQL procedure successfully completed.
Result:
SQL> select * from equata2;
DAT1 DAT2 SOL NOTE
---------- ---------- ---------- --------------------
1 -4 4 solvable
-1 0 0 solvable
0 3 0 impossible
0 2 0 impossible
0 0 0 indefinite
3 -1 ,333 solvable
4 3 -,75 solvable
1 -1 1 solvable
-1 -2 -2 solvable
2 3 -1,5 solvable
10 rows selected.
SQL>
I've created a pivot table with data from multiple tables (using JOINS). How can I add another column to the table which adds up each column from each row?
Example:
Category | A | B | C |
ABC 1 1 1
A 1 0 0
B 0 1 0
C 0 0 1
Category | A | B | C | TOTAL
ABC 1 1 1 3
A 1 0 0 1
B 0 1 0 1
C 0 0 1 1
SCOTT#research 15-APR-15> select * from testing ;
CATEG A B C
----- ---------- ---------- ----------
ABC 1 1 1
A 1 0 0
B 0 1 0
C 0 0 1
SCOTT#research 15-APR-15> select category,a,b,c, sum(a+b+c) as "total" from testing group by category,a,b,c order by category;
CATEG A B C total
----- ---------- ---------- ---------- ----------
A 1 0 0 1
ABC 1 1 1 3
B 0 1 0 1
C 0 0 1 1
In case you want to add a column, then can add one use a procedure to update the values using this,
alter table testing add total int;
use this procedure to update the values
create or replace procedure add_Test
is
sqlis varchar2(10);
total1 int;
begin
for i in (select * from testing) loop
select sum(a+b+c) into total1 from testing where category=i.category;
update testing set total=total1 where category=i.category;
end loop;
commit;
end;
exec add_test;
SCOTT#research 15-APR-15> select * from testing;
CATEG A B C TOTAL
----- ---------- ---------- ---------- ----------
ABC 1 1 1 3
A 1 0 0 1
B 0 1 0 1
C 0 0 1 1