How to get the current_row in a main query for a subquery? - sqlite

Let assume the following table:
CREATE TABLE t1 (col1 TEXT PRIMARY KEY, value INTEGER);
INSERT INTO t1 VALUES
('IE/a', 1), ('IE/b', 2), ('IE/c', 3) ,
('00/a', 10), ('01/a', 20), ('02/a', 30), ('03/a', 40),
('00/b', 100), ('01/b', 200), ('02/b', 300), ('03/b', 400),
('00/c', 1000), ('01/c', 2000), ('02/c', 3000), ('03/c', 4000);
The content of the table is:
col1 value
IE/a 1
IE/b 2
IE/c 3
00/a 10
01/a 20
02/a 30
03/a 40
00/b 100
01/b 200
02/b 300
03/b 400
00/c 1000
01/c 2000
02/c 3000
03/c 4000
I want to get the following output:
IE/a 100
IE/b 1000
IE/c 10000
So, IE/a is the sum of the values of 00/a + 01/a + 02/a + 03/a.
My first approach looks like this, where current_row_id as my pseudeo_code_variable to demonstrate that a would like to consider the current row:
SELECT
col1
, (SELECT sum(value) FROM t1
WHERE col1 = '00' || SUBSTRING( current_row_col1, 3)
OR col1 = '01' || SUBSTRING( current_row_col1, 3)
OR col1 = '02' || SUBSTRING( current_row_col1, 3)
OR col1 = '03' || SUBSTRING( current_row_col1, 3)
) AS value
FROM t1
WHERE col1 LIKE 'IE/%';

Use a self join and aggregation:
SELECT t1.col1,
TOTAL(t2.value) AS total
FROM tablename t1 LEFT JOIN tablename t2
ON SUBSTR(t2.col1, INSTR(t2.col1, '/') + 1) = SUBSTR(t1.col1, INSTR(t1.col1, '/') + 1)
AND t2.rowid <> t1.rowid
WHERE t1.col1 LIKE 'IE/%'
GROUP BY t1.col1;
Or, with conditional aggregation:
SELECT MAX(CASE WHEN col1 LIKE 'IE/%' THEN col1 END) AS col1,
TOTAL(CASE WHEN col1 NOT LIKE 'IE/%' THEN value END) AS total
FROM tablename
GROUP BY SUBSTR(col1, INSTR(col1, '/') + 1);
Or, with window functions:
SELECT DISTINCT
MAX(CASE WHEN col1 LIKE 'IE/%' THEN col1 END) OVER (PARTITION BY SUBSTR(col1, INSTR(col1, '/') + 1)) AS col1,
TOTAL(CASE WHEN col1 NOT LIKE 'IE/%' THEN value END) OVER (PARTITION BY SUBSTR(col1, INSTR(col1, '/') + 1)) AS total
FROM tablename;
See the demo.

This should give you your expected results.
Remember when working with additional categories like ['IE/g'] you will need to add last letters to the conditional statement in where clause.
I used val instead of value as a column name because in many DBMS it's a restricted keyword.
Code:
select 'IE/' || substr(col1,4,1) col, sum(val) my_sum from t1 where col1 LIKE '%/%' and col1 not like 'IE/%' and substr(col1,4,1) in ('a', 'b', 'c') group by substr(col1,4,1);
Output:
col my_sum
IE/a 100
IE/b 1000
IE/c 10000

Related

Tune the second VT which is getting spooled out due to huge volume of data

How to tune the below second VT?
DT_RANGE-PERIOD(STRT_DT,END_DT+1)
CREATE MULTISET VOLATILE TABLE TABLE1 AS
(
SELECT COL1, COL2,COL3
BEGIN(DT_RANGE) AS START_DT,
END(DT_RANGE) -1 AS END_DT,
row_number() over(partition by COL1, COL2,COL3 order by BEGIN(DT_RANGE) ,END(DT_RANGE) -1) as rown
FROM (
SELECT NORMALIZE ON MEETS OR OVERLAPS
MCOL1, COL2,COL3, DT_RANGE
FROM TABLE0
)X
)WITH DATA ON COMMIT PRESERVE ROWS;
--Identify overlapping spans
--overlp_rn - this column determines the overlapping rownumber
CREATE MULTISET VOLATILE TABLE TABLE2_COMBINED_OVERLAPS AS
(
WITH RECURSIVE RecursiveParent( COL1, COL2,COL3,START_DT,END_DT,orig_rn, overlp_rn,query_lvl)
AS
(
SELECT COL1, COL2,COL3,START_DT,END_DT,rown as orig_rn,0 as overlp_rn, 1 as query_lvl
FROM TABLE1
UNION ALL
SELECT a.COL1,a.COL2,a.COL3,a.START_DT,a.END_DT,a.orig_rn as orig_rn,b.rown as overlp_rn, 1+ a.query_lvl as query_lvl
FROM RecursiveParent a inner JOIN TABLE1 b
on a.COL1 = b.COL1
and a.COL2 = b.COL2
and ((a.START_DT between b.START_DT and b.END_DT) or (a.END_DT between b.START_DT and b.END_DT)
or (b.START_DT between a.START_DT and a.END_DT) or (b.END_DT between a.START_DT and a.END_DT)
)
and a.orig_rn < b.rown
where query_lvl <= b.rown
)
SELECT distinct COL1, COL2,COL3,START_DT,END_DT,orig_rn, max(overlp_rn) as overlp_rn
FROM RecursiveParent
group by COL1, COL2,COL3,START_DT,END_DT,orig_rn
)WITH DATA ON COMMIT PRESERVE ROWS;

multiple Insert values in a loop

I thought I was able to do this kind of thing:
BEGIN
FOR r_var IN (select var1 from schema.table1 where var2 = 2)
LOOP
insert into schema.table2(col1,col2,col3,col4)
select r_var.var1, count(*), 'aString', 0
from schema.table2
where col1 = r_var.var1 group by r_var.var1;
END LOOP;
END;
The issue is a "unique constraint violated".
I think the issue comes when I use a select on the same table than the one I use for the inser into.
How can I fix this?
Thx!
EDIT: table2 got a check is not null constraint on col1 and col2, and two foreign keys targetting table1 and another table not used there.
EDIT2: I still not reach a solution, so I split my problem.
First I checked if I was able to get the count correctly with this query:
SET SERVER OUTPUT ON
DECLARE
count_rows NUMBER;
BEGIN
FOR r_var IN (select var1 from schema.table1 where var2 = 2)
LOOP
select count(*) into count_rows from schema.table2 where col1 = r_var .var1;
DBMS_OUTPUT.PUT_LINE('order for var1 : ' || r_var.col1 || ' is ' || count_rows + 1);
END LOOP;
END;
result:
order for var1: 144 is 20
order for var1: 3000 is 10
order for var1: 3001 is 9
order for var1: 3003 is 3
order for var1: 110 is 10
order for var1: 114 is 12
order for var1: 115 is 81
order for var1: 116 is 11
order for var1: 123 is 10
I then tried to insert just one row in the table:
insert into schema.table2(col1, col2, col3, col4) values(144, 20, 'aString', 0);
Which works without issue !
But still, when I run the loop below, I got a 'unique constraint violated' issue:
DECLARE
count_rows NUMBER;
BEGIN
FOR r_var IN (select var1 from schema.table1 where var2 = 2)
LOOP
select count(*) into count_rows from schema.table2 where col1 = r_var.var1;
insert into schema.table2(col1, col2, col3, col4) values(r_var.var1, count_rows + 1, 'aString', 0);
END LOOP;
END;
The issue was from the insert which was trying to insert a row with col1 and col2 of an already similar existing row. A constraint were preventing a row to be inserted when an already existing row with the same col1 and col2 was present.
The query was not doing exactly what I wanted, replacing count(*) by max(col2) solved the issue:
DECLARE
max_rows NUMBER;
BEGIN
FOR r_var IN (select var1 from schema.table1 where var2 = 2)
LOOP
select max(col2) into max_rows from schema.table2 where col1 = r_var.var1;
insert into schema.table2(col1, col2, col3, col4) values(r_var.var1, max_rows + 1, 'aString', 0);
END LOOP;
END;

How to get maximum column values across a row in Teradata sql?

I have a table named cnst_chrctrstc_abc with 10 columns (equ_gender1 - bb_population_flag) each row which contain numeric values (count) .
I want to get maximum 5 values out of each row across those 10 numeric columns.
The query I have looks something like the following ..
SEL
FROM
(
SEL
SUM(CASE WHEN COALESCE(act.equ_gender1,'') = COALESCE(inact.equ_gender1,'') THEN 0 ELSE 1 END ) AS equ_gender1_chg_cnt,
SUM(CASE WHEN COALESCE(act.exp_ex_bmyr1,'') = COALESCE(inact.exp_ex_bmyr1,'') THEN 0 ELSE 1 END ) AS exp_ex_bmyr1_chg_cnt,
SUM(CASE WHEN COALESCE(act.equ_age1,'') = COALESCE(inact.equ_age1,'') THEN 0 ELSE 1 END ) AS equ_age1_chg_cnt,
SUM(CASE WHEN COALESCE(act.maritalstatus1,'') = COALESCE(inact.maritalstatus1,'') THEN 0 ELSE 1 END ) AS maritalstatus1_chg_cnt,
SUM(CASE WHEN COALESCE(act.person_type1,'') = COALESCE(inact.person_type1,'') THEN 0 ELSE 1 END ) AS person_type1_chg_cnt,
SUM(CASE WHEN COALESCE(act.homeowner,'') = COALESCE(inact.homeowner,'') THEN 0 ELSE 1 END ) AS homeowner_chg_cnt,
SUM(CASE WHEN COALESCE(act.dwelling_size,'') = COALESCE(inact.dwelling_size,'') THEN 0 ELSE 1 END ) AS dwelling_size_chg_cnt,
SUM(CASE WHEN COALESCE(act.lengthofresidence,'') = COALESCE(inact.lengthofresidence,'') THEN 0 ELSE 1 END ) AS lengthofresidence_chg_cnt,
SUM(CASE WHEN COALESCE(act.childrenage0_18,'') = COALESCE(inact.childrenage0_18,'') THEN 0 ELSE 1 END ) AS childrenage0_18_chg_cnt,
SUM(CASE WHEN COALESCE(act.bb_population_flag,'') = COALESCE(inact.bb_population_flag,'') THEN 0 ELSE 1 END ) AS bb_population_flag
FROM
(SEL * FROM arc_mdm_Tbls.cnst_chrctrstc_abc WHERE load_id=1024 AND cnst_chrctrstc_end_dt='9999-12-31' (DATE))act
LEFT JOIN
(SEL * FROM arc_mdm_Tbls.cnst_chrctrstc_abc WHERE load_id=1024 AND cnst_chrctrstc_end_dt<'9999-12-31' (DATE)
QUALIFY ROW_NUMBER() OVER (PARTITION BY cnst_mstr_id ORDER BY cnst_chrctrstc_strt_ts DESC)=1
)inact
ON act.cnst_mstr_id = inact.cnst_mstr_id
)X
I know SEL GREATEST would produce the maximum value out of each row . But I want 5 top values and assign a rank to them.
Something like for some row first five columns may hold the top 5 values and for some last five i.e. homeowner to bb_population_flag may hold the top 5 values.
so if the columns and values from cnst_chrctrstc_abc look something like the following
cdi_batch_id | a | b | c | d | e | f | g | h | i |j
1024 |116|105|102|100|117|119|108|104|101|121
so the select query should return me columns j,f,e,a,g having the top 5 values.
And then I would assign a rank to them accordingly .
Should it be done using unpivot or something ?
Thanks in advance.
Yes, you need to unpivot your result.
Before TD14.10 you will need a list of those column names, either as a table
create table ColumnList (col varchar(128));
Insert into ColumnList('equ_gender1' );
Insert into ColumnList('exp_ex_bmyr1' );
Insert into ColumnList('equ_age1' );
Insert into ColumnList('maritalstatus1' );
Insert into ColumnList('person_type1' );
Insert into ColumnList('homeowner' );
Insert into ColumnList('dwelling_size' );
Insert into ColumnList('lengthofresidence' );
Insert into ColumnList('childrenage0_18' );
Insert into ColumnList('bb_population_flag');
or on-thy-fly using a bulky
with ColumnList as
(
select * from (select 'equ_gender1' as Col) as dt union all
select * from (select 'exp_ex_bmyr1' as Col) as dt union all
select * from (select 'equ_age1' as Col) as dt union all
select * from (select 'maritalstatus1' as Col) as dt union all
select * from (select 'person_type1' as Col) as dt union all
select * from (select 'homeowner' as Col) as dt union all
select * from (select 'dwelling_size' as Col) as dt union all
select * from (select 'lengthofresidence' as Col) as dt union all
select * from (select 'childrenage0_18' as Col) as dt union all
select * from (select 'bb_population_flag' as Col) as dt
)
Then you CROSS JOIN to unpivot:
select
col,
case col
when 'equ_gender1' then equ_gender1
when 'exp_ex_bmyr1' then exp_ex_bmyr1
when 'equ_age1' then equ_age1
when 'maritalstatus1' then maritalstatus1
when 'person_type1' then person_type1
when 'homeowner' then homeowner
when 'dwelling_size' then dwelling_size
when 'lengthofresidence' then lengthofresidence
when 'childrenage0_18' then childrenage0_18
when 'bb_population_flag' then bb_population_flag
end as Counts,
rank() over (order by Counts desc) as rnk
FROM
(
your current select
) as dt
cross join ColumnList
qualify rnk <= 5
In TD14.10 you could utilize the TD_UNPIVOT function:
SELECT Col, rank() over (order by Counts desc) as rnk
from TD_UNPIVOT(
ON (
your current select
)
USING
VALUE_COLUMNS('Counts')
UNPIVOT_COLUMN('Col')
COLUMN_LIST('equ_gender1'
,'exp_ex_bmyr1'
,'equ_age1'
,'maritalstatus1'
,'person_type1'
,'homeowner'
,'dwelling_size'
,'lengthofresidence'
,'childrenage0_18'
,'bb_population_flag')
) dt
qualify rnk <= 5;
Edit:
Additionally you might replace your LEFT JOIN with a single OLAP-function. Depending on the number of rows per cnst_mstr_id this might be more efficient as you need a ROW_NUMBER anyway:
SEL
SUM(CASE WHEN COALESCE(equ_gender1,'') = COALESCE(last_equ_gender1,'') THEN 0 ELSE 1 END ) AS equ_gender1_chg_cnt,
...
FROM
( SELECT
min(equ_gender1) OVER (PARTITION BY cnst_mstr_id ORDER BY cnst_chrctrstc_strt_ts DESC rows between 1 following and 1 following) as equ_gender1,
...
FROM arc_mdm_Tbls.cnst_chrctrstc_abc
WHERE load_id=1024
qualify cnst_chrctrstc_end_dt= date '9999-12-31'
)act

linq to sql: 2 counts as result

I want to do the following in LINQ to SQL:
Select count(*) as count_1,
(select count(*) from tableName2) as count_2 FROM tableName
Where x = y
The result should be
Column 1 | column 2
--------------------
50 34
What you need to do is something like this:
select
(select count(*)
from tableName
where x = y) as count_1,
(select count(*)
from tableName2) as count_2

Get the most recent record for each user where value is 'K', action id is null or its state is 1

I have the following tables in SQL Server:
user_id, value, date, action_id
----------------------------------
1 A 1/3/2012 null
1 K 1/4/2012 null
1 B 1/5/2012 null
2 X 1/3/2012 null
2 K 1/4/2012 1
3 K 1/3/2012 null
3 L 1/4/2012 2
3 K 1/5/2012 3
4 K 1/3/2012 null
action_id, state
----------------------------------
1 0
2 1
3 1
4 0
5 1
I need to return the most recent record for each user where the value is 'K', the action id is either null or its state is set to 1. Here's the result set I want:
user_id, value, date, action_id
----------------------------------
3 K 1/5/2012 3
4 K 1/3/2012 null
For user_id 1, the most recent value is B and its action id is null, so I consider this the most recent record, but it's value is not K.
For user_id 2, the most recent value is K, but action id 1 has state 0, so I fallback to X, but X is not K.
user_id 3 and 4 are straightforward.
I'm interested in Linq to SQL query in ASP.NET, but for now T-SQL is fine too.
The SQL query would be :
Select Top 1 T1.* from Table1 T1
LEFT JOIN Table2 T2
ON T1.action_id = T2.action_id
Where T1.Value = 'K' AND (T1.action_id is null or T2.state = 1)
Order by T1.date desc
LINQ Query :
var result = context.Table1.Where(T1=> T1.Value == "K"
&& (T1.action_id == null ||
context.Table2
.Where(T2=>T2.State == 1)
.Select(T2 => T2.action_id).Contains(T1.action_id)))
.OrderByDescending(T => T.date)
.FirstOrDefault();
Good Luck !!
This query will return desired result set:
SELECT
*
FROM
(
SELECT
user_id
,value
,date
,action_id
,ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY date DESC) RowNum
FROM
testtable
WHERE
value = 'K'
) testtable
WHERE
RowNum = 1
You can also try following approach if user_id and date combination is unique
Make sure to get the order of predicates in the join to be able to use indexes:
SELECT
testtable.*
FROM
(
SELECT
user_id
,MAX(date) LastDate
FROM
testtable
WHERE
value = 'K'
GROUP BY
user_id
) tblLastValue
INNER JOIN
testtable
ON
testtable.user_id = tblLastValue.user_id
AND
testtable.date = tblLastValue.LastDate
This would select the top entries for all users as described in your specification, as opposed to TOP 1 which just selects the most recent entry in the database. I'm assuming here that your tables are named users and actions:
WITH usersactions as
(SELECT
u.user_id,
u.value,
u.date,
u.action_id,
ROW NUMBER() OVER (PARTITION BY u.user_id ORDER BY u.date DESC, u.action_id DESC) as row
FROM users u
LEFT OUTER JOIN actions a ON u.action_id = a.action_id
WHERE
u.value = 'K' AND
(u.action_id IS NULL OR a.state = 1)
)
SELECT * FROM usersactions WHERE row = 1
Or if you don't want to use a CTE:
SELECT * FROM
(SELECT
u.user_id,
u.value,
u.date,
u.action_id,
ROW NUMBER() OVER (PARTITION BY u.user_id ORDER BY u.date DESC, u.action_id DESC) as row
FROM users u
LEFT OUTER JOIN actions a ON u.action_id = a.action_id
WHERE
u.value = 'K' AND
(u.action_id IS NULL OR a.state = 1)
) useractions
WHERE row = 1

Resources