Greenplum does not execute the query - greenplum

We are facing an issue with a query when we try to run it.
Any idea on why Greenplum would not pick a query ?
The query for reference :
WITH
crmp AS (
SELECT json_data
FROM (
SELECT
json_data,
ROW_NUMBER() OVER (PARTITION BY oid ORDER BY modificationDate DESC) AS sequ
FROM (
SELECT
json_data#>>'{_id,$oid}' AS oid,
json_data,
CAST(json_data#>>'{mpData,common,modificationDate,$date}' AS TIMESTAMP WITHOUT TIME ZONE) AS modificationDate
FROM ods.daily_all_json
WHERE lib_collection LIKE '%-Document-CRMP'
) t
) t2 WHERE sequ = 1
),
cr AS (
SELECT json_data FROM (
SELECT
json_data,
ROW_NUMBER() OVER (PARTITION BY oid ORDER BY modificationDate DESC) AS sequ
FROM (
SELECT
json_data#>>'{_id,$oid}' AS oid,
json_data,
CAST(json_data#>>'{common,modificationDate,$date}' AS TIMESTAMP WITHOUT TIME ZONE) AS modificationDate
FROM ods.daily_all_json
WHERE lib_collection LIKE '%-CompteRendu'
) t
) t2 WHERE sequ = 1
)
SELECT
MD5(questions.cod_cr_oid||questions.cod_formulaire_oid||COALESCE(questions.formulaire_type,'')||questions.row_num||(COALESCE(question#>>'{_id,$oid}',question#>>'{label}'))) AS cod_question_oid,
question#>>'{_id,$oid}' AS cod_question_oid_original,
(question#>>'{hasInfobulle}')::BOOLEAN AS bl_infobulle,
question#>>'{infobulleText}' AS lib_infobulle,
question#>>'{label}' AS lib_label,
(question#>>'{isDefault}')::BOOLEAN AS bl_default,
question#>>'{aide}' AS lib_aide,
question#>>'{questionType}' AS lib_question_type,
(question#>>'{required}')::BOOLEAN AS bl_required,
(question#>>'{hideCommentaireInput}')::BOOLEAN AS bl_hide_commentaire_input,
(question#>>'{poids}')::INTEGER AS nbr_poids,
question#>>'{metierQuestion}' AS lib_metier_question,
question#>>'{acteurQuestion}' AS lib_acteur_question,
question#>>'{idConditionQuestion,$oid}' AS cod_question_condition_oid_original,
questions_condition.cod_question_oid AS cod_question_condition_oid,
questions_condition.lib_label AS lib_question_condition_label,
questions_condition.lib_reponses AS lib_reponses_condition
FROM (
SELECT
cod_cr_oid,
cod_formulaire_oid,
formulaire_type,
JSON_ARRAY_ELEMENTS(questions) AS question,
ROW_NUMBER() OVER (PARTITION BY cod_cr_oid) AS row_num
FROM (
SELECT
json_data#>>'{_id,$oid}' AS cod_cr_oid,
json_data#>>'{mpData,idFormulaireParent}' AS cod_formulaire_oid,
json_data#>>'{mpData,formulaire,formulaireType}' AS formulaire_type,
json_data#>'{mpData,formulaire,questions}' AS questions
FROM crmp
) formulaire_crmp
UNION ALL
SELECT
cod_cr_oid,
cod_formulaire_oid,
formulaire_type,
JSON_ARRAY_ELEMENTS(questions) AS question,
ROW_NUMBER() OVER (PARTITION BY cod_cr_oid) AS row_num
FROM (
SELECT
json_data#>>'{_id,$oid}' AS cod_cr_oid,
json_data#>>'{mpData,idFormulaireParent}' AS cod_formulaire_oid,
json_data#>>'{mpData,postFormulaire,formulaireType}' AS formulaire_type,
json_data#>'{mpData,postFormulaire,questions}' AS questions
FROM crmp
) postformulaire_crmp
UNION ALL
SELECT
cod_cr_oid,
cod_formulaire_oid,
formulaire_type,
JSON_ARRAY_ELEMENTS(questions),
ROW_NUMBER() OVER (PARTITION BY cod_cr_oid) AS row_num
FROM (
SELECT
cod_cr_oid,
cod_formulaire_oid,
json_formulaire#>>'{formulaireType}' AS formulaire_type,
json_formulaire#>'{questions}' AS questions
FROM (
SELECT
json_data#>>'{_id,$oid}' AS cod_cr_oid,
json_data#>>'{mpData,idFormulaireParent}' AS cod_formulaire_oid,
JSON_ARRAY_ELEMENTS(json_data#>'{mpData,formulairesReseaux}') AS json_formulaire
FROM crmp
) formulaires_reseaux
) formulaires_reseaux
UNION ALL
SELECT
cod_cr_oid,
cod_formulaire_oid,
formulaire_type,
JSON_ARRAY_ELEMENTS(questions) AS question,
ROW_NUMBER() OVER (PARTITION BY cod_cr_oid) AS row_num
FROM (
SELECT
json_data#>>'{_id,$oid}' AS cod_cr_oid,
json_data#>>'{formulaire,_id,$oid}' AS cod_formulaire_oid,
json_data#>>'{formulaire,formulaireType}' AS formulaire_type,
json_data#>'{formulaire,questions}' AS questions
FROM cr
) cr
) questions
LEFT JOIN (
SELECT
MD5(cod_cr_oid||cod_formulaire_oid||COALESCE(formulaire_type,'')||row_num||(COALESCE(question_condition#>>'{_id,$oid}',question_condition#>>'{label}'))) AS cod_question_oid,
cod_cr_oid,
cod_formulaire_oid,
row_num,
formulaire_type,
question_condition#>>'{_id,$oid}' AS cod_question_oid_original,
question_condition#>>'{label}' AS lib_label,
ARRAY_TO_STRING_AGG(ARRAY(
SELECT reponse_condition#>>'{label}'
FROM (
SELECT JSON_ARRAY_ELEMENTS(question_condition#>'{reponses}') AS reponse_condition) reps
WHERE (reponse_condition#>>'{isSelected}')::BOOLEAN
AND reponse_condition#>>'{_id,$oid}' IN (
SELECT reponse#>>'{$oid}'
FROM (
SELECT JSON_ARRAY_ELEMENTS(question#>'{idsConditionReponse}') reponse
FROM (
SELECT JSON_ARRAY_ELEMENTS(json_data#>'{formulaire,questions}') question
FROM (SELECT * FROM cr UNION ALL SELECT * FROM crmp) crs
) a
) b
)
),'|') AS lib_reponses
FROM (
SELECT
json_data#>>'{_id,$oid}' AS cod_cr_oid,
json_data#>>'{formulaire,_id,$oid}' AS cod_formulaire_oid,
json_data#>>'{formulaire,formulaireType}' AS formulaire_type,
JSON_ARRAY_ELEMENTS(json_data#>'{formulaire,questions}') AS question_condition,
ROW_NUMBER() OVER (PARTITION BY json_data#>>'{_id,$oid}') AS row_num
FROM (SELECT * FROM cr UNION ALL SELECT * FROM crmp) crs
) cond
) questions_condition
ON COALESCE(questions_condition.cod_question_oid_original,questions_condition.lib_label) = COALESCE(question#>>'{idConditionQuestion,$oid}')
AND questions_condition.cod_formulaire_oid = questions.cod_formulaire_oid
AND questions_condition.row_num = questions.row_num
AND COALESCE(questions_condition.formulaire_type,'') = COALESCE(questions.formulaire_type,'')
AND questions_condition.cod_cr_oid = questions.cod_cr_oid
It looks like Greenplum sometimes doesn't pick the query, like if it was waiting (but not set as waiting in pg_stat_activity)
No noticeable activity on the Greenplum server is visible.
It looks like the query is picked or not depending on the input data.
The query is quite complex, and we are able to rewrite it to make it run 100% of the time using regular tables instead of WITH queries

Related

Using JOIN with cte

I have the following setup, which seems to be working fine. I am having trouble modifying the query to include the department_name in the output.
I can't seem to get the JOIN working with the CTE. Its probably something trivial but after many attempts I can't get it to work.
Any help would be appreciated.
Below is my setup and test case.
CREATE TABLE departments( department_id, department_name) AS
SELECT 1, 'IT' FROM DUAL UNION ALL
SELECT 2, 'DBA' FROM DUAL;
CREATE TABLE employees (employee_id, first_name, last_name, hire_date, salary, department_id) AS
SELECT 1, 'Lisa', 'Saladino', DATE '2001-04-03', 100000, 1 FROM DUAL UNION ALL
SELECT 2, 'Abby', 'Abbott', DATE '2001-04-04', 50000, 1 FROM DUAL UNION ALL
SELECT 3, 'Beth', 'Cooper', DATE '2001-04-05', 60000, 1 FROM DUAL UNION ALL
SELECT 4, 'Carol', 'Orr', DATE '2001-04-06', 70000,1 FROM DUAL UNION ALL
SELECT 5, 'Vicky', 'Palazzo', DATE '2001-04-07', 88000,2 FROM DUAL UNION ALL
SELECT 6, 'Cheryl', 'Ford', DATE '2001-04-08', 110000,1 FROM DUAL UNION ALL
SELECT 7, 'Leslee', 'Altman', DATE '2001-04-10', 666666, 1 FROM DUAL UNION ALL
SELECT 8, 'Jill', 'Coralnick', DATE '2001-04-11', 190000, 2 FROM DUAL UNION ALL
SELECT 9, 'Faith', 'Aaron', DATE '2001-04-17', 122000,2 FROM DUAL;
WITH cte AS (
SELECT department_id,
first_name,
last_name,
salary,
DENSE_RANK() OVER(PARTITION BY department_id ORDER BY salary DESC) AS rnk
FROM employees
)
SELECT department_id,
/* department_name */
first_name,
last_name,
salary
FROM cte
WHERE rnk=1
You did not join the table.
WITH cte AS (
SELECT department_id,
first_name,
last_name,
salary,
DENSE_RANK() OVER(PARTITION BY department_id ORDER BY salary DESC) AS rnk
FROM employees
)
SELECT e.department_id,
d.department_name,
e.first_name,
e.last_name,
e.salary
FROM cte e
INNER JOIN departments d
ON (d.department_id = e.department_id)
WHERE rnk=1
or:
WITH cte AS (
SELECT e.department_id,
d.department_name,
e.first_name,
e.last_name,
e.salary,
DENSE_RANK() OVER(PARTITION BY e.department_id ORDER BY e.salary DESC) AS rnk
FROM employees e
INNER JOIN departments d
ON (d.department_id = e.department_id)
)
SELECT department_id,
department_name,
first_name,
last_name,
salary
FROM cte
WHERE rnk=1
or using a sub-query, instead of the sub-query factoring clause:
SELECT e.department_id,
d.department_name,
e.first_name,
e.last_name,
e.salary
FROM (
SELECT department_id,
first_name,
last_name,
salary,
DENSE_RANK() OVER(PARTITION BY department_id ORDER BY salary DESC) AS rnk
FROM employees
) e
INNER JOIN departments d
ON (d.department_id = e.department_id)
WHERE rnk=1
or:
SELECT department_id,
department_name,
first_name,
last_name,
salary
FROM (
SELECT e.department_id,
d.department_name,
e.first_name,
e.last_name,
e.salary,
DENSE_RANK() OVER(PARTITION BY e.department_id ORDER BY e.salary DESC) AS rnk
FROM employees e
INNER JOIN departments d
ON (d.department_id = e.department_id)
)
WHERE rnk=1
fiddle

Group By inside Rtrim(Xmlagg (Xmlelement (e,element || ',')).extract ( '//text()' ).GetClobVal(), ',')

I need to group values ​​inside a query using (or not) the command Rtrim(Xmlagg (Xmlelement (e,column || ',')).extract ( '//text()' ).GetClobVal(), ','), but I can't find any literature where explain a way to group the data inside this command. The code is very simple, as you can see below:
SELECT ID,
Rtrim(Xmlagg (Xmlelement (and, CONTRACTS || ',')).extract ( '//text()' ).GetClobVal(), ',') AS CONTRACTS
FROM TABLE_A
GROUP BY ID
The result in CONTRACTS is always repeated when the ID is found, thats ok, it´s working!
ID
CONTRACTS
876
1,1,1,2,3,3
But what I really need is this return:
ID
CONTRACTS
876
1,2,3
It´s not necessary to use the command Rtrim(Xmlagg (Xmlelement (e,column || ',')).extract ( '//text()' ).GetClobVal(), ','), instead, I just use to concatenate element with comma "," in the same column.
If anyone can help me, I would be very grateful!
If your values will fit into a VARCHAR2 data type (rather than a CLOB) then you can use a nested sub-query to get the DISTINCT values for each ID:
SELECT ID,
LISTAGG(contracts, ',') WITHIN GROUP (ORDER BY contracts) AS CONTRACTS
FROM ( SELECT DISTINCT id, contracts FROM TABLE_A)
GROUP BY ID
Or, from Oracle 19c, it is built-in to LISTAGG:
SELECT ID,
LISTAGG(DISTINCT contracts, ',') WITHIN GROUP (ORDER BY contracts) AS CONTRACTS
FROM TABLE_A
GROUP BY ID
If you want a CLOB then you can use the same technique as the first query:
SELECT ID,
Rtrim(
Xmlagg(
Xmlelement(name, CONTRACTS || ',')
ORDER BY contracts
).extract ( '//text()' ).GetClobVal(),
','
) AS CONTRACTS
FROM (SELECT DISTINCT id, contracts FROM TABLE_A)
GROUP BY ID
Which, for the sample data:
CREATE TABLE table_a (id, contracts) AS
SELECT 876, 1 FROM DUAL UNION ALL
SELECT 876, 1 FROM DUAL UNION ALL
SELECT 876, 1 FROM DUAL UNION ALL
SELECT 876, 2 FROM DUAL UNION ALL
SELECT 876, 2 FROM DUAL UNION ALL
SELECT 876, 3 FROM DUAL UNION ALL
SELECT 876, 3 FROM DUAL UNION ALL
SELECT 876, 3 FROM DUAL;
All output:
ID
CONTRACTS
876
1,2,3
db<>fiddle here
It's much easier to do all those operation in XML functions: DBFiddle
SELECT--+ NO_XML_QUERY_REWRITE
id,
xmlquery(
'string-join(distinct-values($R/R/X/text()),",")'
passing
Xmlelement(
R,
Xmlagg(
Xmlelement (X, CONTRACTS)
order by CONTRACTS
)) as R
RETURNING CONTENT
) AS CONTRACTS
FROM TABLE_A
GROUP BY ID;
Full example with test data:
with table_a (id, contracts) AS (
SELECT 876, 1 FROM DUAL UNION ALL
SELECT 876, 1 FROM DUAL UNION ALL
SELECT 876, 1 FROM DUAL UNION ALL
SELECT 876, 2 FROM DUAL UNION ALL
SELECT 876, 2 FROM DUAL UNION ALL
SELECT 876, 3 FROM DUAL UNION ALL
SELECT 876, 3 FROM DUAL UNION ALL
SELECT 876, 3 FROM DUAL
)
SELECT--+ NO_XML_QUERY_REWRITE
id,
xmlquery(
'string-join(distinct-values($R/R/X/text()),",")'
passing
Xmlelement(
R,
Xmlagg(
Xmlelement (X, CONTRACTS)
order by CONTRACTS
)) as R
RETURNING CONTENT
) AS CONTRACTS
FROM TABLE_A
GROUP BY ID;

Incorrect results from max and rank function

I am trying to return a row with the latest psi score but my code returns multiple rows and the row that I actually want is returned as the last row on result set. What can I do with this code to resolve the issue.
S
ELECT REQUEST_NUM,PI_CANDIDATE_NUM,
COALESCE (MAX_GCO_AD_KNOWLEDGE_TEST_SCORE, MAX_PSI_OVERALL_SCORE) AS PSI_SCORE
--COALESCE(MAX_GCO_AD_KNOWLEDGE_TEST_SCORE,MAX_PSI_OVERALL_SCORE)-39.035024/8.439997 AS PSI XMation
FROM(
SELECT
REQUEST_NUM,PI_CANDIDATE_NUM,
MAX(AA.PSI_OVERALL_SCORE) KEEP (DENSE_RANK FIRST ORDER BY AA.ARANK) MAX_PSI_OVERALL_SCORE,
MAX(AA.GCO_AD_KNOWLEDGE_TEST_SCORE) KEEP (DENSE_RANK FIRST ORDER BY AA.ARANK) MAX_GCO_AD_KNOWLEDGE_TEST_SCORE
FROM (
select
RANK() OVER (PARTITION BY REQUEST_NUM ORDER BY REQUEST_LAST_MODIFIED_DT ) ARANK
,PARENT_PI_NUMBER,REQUEST_NUM,PI_CANDIDATE_NUM,PSI_OVERALL_SCORE,GCO_AD_KNOWLEDGE_TEST_SCORE,REQUEST_LAST_MODIFIED_DT
from
WC_APPLICATION_EVENT_F
-- GCO_AD_KNOWLEDGE_TEST_SCORE != '10100' and
where PI_CANDIDATE_NUM = '4173093'
--nd GCO_AD_KNOWLEDGE_TEST_SCORE is null
) AA
--where AA.ARANK=1
GROUP BY REQUEST_NUM,PI_CANDIDATE_NUM
--ORDER By PARENT_PI_NUMBER,PI_NUMBER,REQUEST_LAST_MODIFIED_DT;
)
BB;
Sample data:
REQUEST_NUM PI_CANDIDATE_NUM REQUEST_LAST_MODIFIED_DT PSI_SCORE
----------- ---------------- ------------------------ ---------
4639022
1655626 4639022 5-Mar-17
1662401 4639022 8-Mar-17 22.6
1662470 4639022 6-Apr-17
1662486 4639022 6-Apr-17
1662499 4639022 8-Mar-17 30.3
1771817 4639022 7-Jun-17 35.3
1797323 4639022 24-Jun-17 38.5
My expected results is the last row with a value of 38.5 since has the latest date.
Here is one way to do this. It assumes you input a specific candidate (otherwise the analytic function in the subquery can be modified to get the most recent NON-NULL score for each candidate).
EDIT: If you need to retrieve the most recent score for ALL candidates, then: (1) remove the condition that filters on a single candidate in the inner query; (2) add partition by pi_candidate_num right after rank() over ( (after the opening parenthesis) and add a space before order by ... (still in the over(.....) clause of rank()). \EDIT
If two NON-NULL scores were achieved ON THE SAME DATE (perhaps that is impossible in your data, but if it is...) then BOTH rows will be returned.
I assumed the candidate number is a NUMBER, so I removed the single quotes in the condition; and I changed the candidate number to match your sample inputs.
The WITH clause is NOT PART OF THE QUERY - remove it before you try the solution.
with
test_data ( request_num, pi_candidate_num, request_last_modified_dt, psi_score ) as (
select null, 4639022, null , null from dual union all
select 1655626, 4639022, to_date( '5-Mar-17', 'dd-Mon-rr'), null from dual union all
select 1662401, 4639022, to_date( '8-Mar-17', 'dd-Mon-rr'), 22.6 from dual union all
select 1662470, 4639022, to_date( '6-Apr-17', 'dd-Mon-rr'), null from dual union all
select 1662486, 4639022, to_date( '6-Apr-17', 'dd-Mon-rr'), null from dual union all
select 1662499, 4639022, to_date( '8-Mar-17', 'dd-Mon-rr'), 30.3 from dual union all
select 1771817, 4639022, to_date( '7-Jun-17', 'dd-Mon-rr'), 35.3 from dual union all
select 1797323, 4639022, to_date('24-Jun-17', 'dd-Mon-rr'), 38.5 from dual
)
select request_num, pi_candidate_num, request_last_modified_dt, psi_score
from (
select t.*, rank() over (order by request_last_modified_dt desc) rn
from test_data t
where pi_candidate_num = 4639022
and psi_score is not null
)
where rn = 1
;
REQUEST_NUM PI_CANDIDATE_NUM REQUEST_LAST_MODIFIED_DT PSI_SCORE
----------- ---------------- ------------------------ ----------
1797323 4639022 24-Jun-17 38.5

Recursive hierarchical Oracle SQL query

I have a source table like below:
Emp_ID| Name| Manager_ID
001|abc|005
005|cde|010
010|xyz|050
050 | bcg| 100
100|sta|NULL
My requirement is to populate the target table like below:
Emp_ID| Name| Manager_1| Manager_2| Manager_3| Manager_4
005|cde|xyz|bcg|sta|NULL
050|bcg|sta| NULL|NULL|NULL
100|sta|NULL|NULL|NULL
001|abc|cde|xyz|bcg|sta
I am able to use recursive select through Connect by clause and populate the value for Manager_1 but not able to get through the logic to populate Manager_2, Manager_3 , Manager_4 values as different column values in a single row depending on how many level of hierarchy is present for a certain employee.
Please help.
I think the following query will help you. But to split the string to individual manager id, you need to know the max no of level of managers.
WITH data_set AS
(SELECT '001' emp_id, 'aaa' emp_name, '005' mgr_id
FROM DUAL
UNION
SELECT '005' emp_id, 'bbb' emp_name, '010' mgr_id
FROM DUAL
UNION
SELECT '010' emp_id, 'ccc' emp_name, '050' mgr_id
FROM DUAL
UNION
SELECT '020' emp_id, 'ddd' emp_name, '050' mgr_id
FROM DUAL
UNION
SELECT '050' emp_id, 'eee' emp_name, '100' mgr_id
FROM DUAL
UNION
SELECT '100' emp_id, 'fff' emp_name, '200' mgr_id
FROM DUAL
UNION
SELECT '200' emp_id, 'ggg' emp_name, NULL mgr_id
FROM DUAL)
SELECT emp_id, emp_name, mgr_id,
LTRIM (SYS_CONNECT_BY_PATH (emp_id, '-'), '-') chain
FROM data_set
START WITH mgr_id IS NULL
CONNECT BY mgr_id = PRIOR emp_id
ORDER SIBLINGS BY emp_id;
If your hierarchy only extends to 4 levels deep, the following query may be used:
select t1.Emp_ID,
t1.Name,
t2.Name as Manager_1,
t3.Name as Manager_2,
t4.Name as Manager_3,
t5.Name as Manager_4
from tmp t1
left join tmp t2 on t2.Emp_ID = t1.Manager_ID
left join tmp t3 on t3.Emp_ID = t2.Manager_ID
left join tmp t4 on t4.Emp_ID = t3.Manager_ID
left join tmp t5 on t5.Emp_ID = t4.Manager_ID;
Pivot option:
SELECT * FROM
(
SELECT emp_id, name, manager_id
FROM employees
)
PIVOT
(
COUNT(manager_id)
FOR manager_id IN ('005', '100', '050')
)
ORDER BY emp_id;

How to get count by using UNION operator

i'm trying to get total count by using UNION operator but it gives wrong count.
select count(*) as companyRatings from (
select count(*) hrs from (
select distinct hrs from companyA
)
union
select count(*) financehrs from (
select distinct finance_hrs from companyB
)
union
select count(*) hrids from (
select regexp_substr(hr_id,'[^/]+',1,3) hrid from companyZ
)
union
select count(*) cities from (
select regexp_substr(city,'[^/]+',1,3) city from companyY
)
);
individual query's working fine but total count not matching.
individual results here: 12 19 3 6
present total count: 31
Actual total count:40.
so there is any alternate solution without UNION operator?
To add values you'd use +. UNION is to add data sets.
select
(select count(distinct hrs) from companyA)
+
(select count(distinct finance_hrs) from companyB)
+
(select count(regexp_substr(hr_id,'[^/]+',1,3)) from companyZ)
+
(select count(regexp_substr(city,'[^/]+',1,3)) from companyY)
as total
from dual;
But I agree with juergen d; you should not have separate tables per company in the first place.
Edit. Updated query using Sum
select sum(cnt) as companyRatings from
(
select count(*) as cnt from (select distinct hrs from companyA)
union all
select count(*) as cnt from (select distinct finance_hrs from companyB)
union all
select count(*) as cnt from (select regexp_substr(hr_id,'[^/]+',1,3) hrid from companyZ)
union all
select count(*) as cnt from (select regexp_substr(city,'[^/]+',1,3) city from companyY)
)
Previous answer:
Try this
SELECT (
SELECT count(*) hrs
FROM (
SELECT DISTINCT hrs
FROM companyA
)
)
+
(
SELECT count(*) financehrs
FROM (
SELECT DISTINCT finance_hrs
FROM companyB
)
)
+
(
SELECT count(*) hrids
FROM (
SELECT regexp_substr(hr_id, '[^/]+', 1, 3) hrid
FROM companyZ
)
)
+
(
SELECT count(*) cities
FROM (
SELECT regexp_substr(city, '[^/]+', 1, 3) city
FROM companyY
)
)
AS total_count
FROM dual

Resources