SQL Server query plan using hash vs streamaggregate

SQL Server query plan using hash vs streamaggregate - sql-server

I have three tables
CREATE TABLE [dbo].[caja]
(
[orden] [int] IDENTITY(1,1) NOT NULL,
[ejercicio] [int] NOT NULL,
[numero] [int] NOT NULL,
[tipo] [char](1) NOT NULL CONSTRAINT [DF_caja_tipo] DEFAULT ('N'),
[inicial] [int] NOT NULL,
[final] [int] NOT NULL,
[total] [int] NOT NULL,
CONSTRAINT [PK_caja]
PRIMARY KEY CLUSTERED ([orden] ASC),
CONSTRAINT [IX___caja__ejercicio_numero]
UNIQUE NONCLUSTERED ([ejercicio] ASC, [numero] ASC, [tipo] ASC),
CONSTRAINT [IX___caja__tipo_inicial]
UNIQUE NONCLUSTERED ([tipo] ASC, [inicial] ASC)
) ON [PRIMARY]
CREATE TABLE [dbo].[holograma]
(
[orden] [int] IDENTITY(1,1) NOT NULL,
[taller] [int] NOT NULL,
[tipo] [nchar](1) NOT NULL,
[inicial] [int] NOT NULL,
[final] [int] NOT NULL,
[total] [int] NOT NULL,
[fecha] [smalldatetime] NOT NULL,
CONSTRAINT [PK_holograma]
PRIMARY KEY CLUSTERED ([tipo] ASC, [inicial] ASC)
)
CREATE TABLE [dbo].[Tally]
(
[N] [int] IDENTITY(1,1) NOT NULL
CONSTRAINT [PK_Tally_N]
PRIMARY KEY CLUSTERED ([N] ASC)
)
Tally table contains one million records from N=1 to 1,000,000
Caja table contains a list of valid values to insert into holograma table,
Example:
orden | ejercicio | numero | tipo | inicial | final
888 | 2015 | 74 | R | 50144001 | 50144660
889 | 2015 | 75 | R | 50144661 | 50146660
and holograma:
taller | tipo | inicial | final | total | fecha
160 | A | 50144651 | 50144750 | 100 | 15/04/2015 <--values of two caja's
missing data
49 | A | 50144826 | 50145025 | 200 | 15/04/2015
I'm trying to get the missing data. Using the example must show me from 50144751 to 50144825, counting 75 numbers.
The problem is with the count aggregate, it's taking to much time when I delimit the values. This is my query
declare #tipo nchar(1)
,#numero int
,#ejercicio int
,#largo int
;
SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
select #tipo='A' , #numero=176;
select #ejercicio=2015, #largo=10000000;
with
c as ( /* La caja a buscar */
select tipo, numero, inicial / #largo as serie, inicial as cInicial, final as cFinal, total, entregados, orden as cOrden
from caja
where 1=1
and (tipo=#tipo)
and (numero=#numero)
and ejercicio=#ejercicio
)
, h as ( /* Los hologramas que corresponden a esa caja */
select serie, inicial - (serie*#largo) as hInicial, final - (serie*#largo) as hFinal
, h1.orden as hOrden
, cOrden, cInicial, cFinal
from holograma as h1
inner join c
on h1.tipo=c.tipo
and h1.inicial>=40000000
and (h1.inicial between c.cInicial and c.cFinal or h1.final between c.cInicial and c.cFinal)
)
, t2 as ( /* se usa para corregir */
select n
from tally
inner join c on (n between cInicial- (serie*#largo) and cFinal - (serie*#largo))
)
, t as ( /* Generar los números individuales según la ENTREGA de hologramas */
select serie,n as nHolograma
, hOrden, h.cOrden
from t2
inner join h on (n between hInicial and hFinal)
)
, e as ( /* cuantos hologramas por caja se han entregado. este se usa para corregir tabla caja */
select cOrden, COUNT(nHolograma) as totalG
from t
group by cOrden
)
select * from e
And the query plan is
https://pastee.org/kpt3t
but if I change in the "t" subquery from "t2" table to "tally" table
, t as ( /* Generar los números individuales según la ENTREGA de hologramas */
select serie,n as nHolograma
, hOrden, cOrden
from tally, h
where 1=1
and (n between hInicial and hFinal)
)
the query is almost instant. this is the query plan
https://pastee.org/hfpnz
The difference is the StreamAggregate using tally table to Hash using t2 subquery (line 196) .
I use the "t2"-subquery to delimit to the values of the current caja.
Why does the aggregate change? It's one minute of difference. 1 second with delimiting the numbers to 1:02 minutes delimiting.

Related

JSON Many to Many RelationShip Group By

I'm trying to create an SQL query allowing me to do this:
I have 3 tables in SQL Server 2017:
CREATE TABLE [dbo].[PRODUCTCATEGORY]
(
[PROD_ID] [int] NOT NULL,
[CAT_ID] [int] NOT NULL
CONSTRAINT [PK_PRODUCTCATEGORY]
PRIMARY KEY CLUSTERED ([PROD_ID] ASC, [CAT_ID] ASC)
)
CREATE TABLE [dbo].[CATEGORY]
(
[CAT_ID] [int] IDENTITY(1,1) NOT NULL,
[CAT_TITLE] [varchar](50) NOT NULL
CONSTRAINT [PK_CATEGORY]
PRIMARY KEY CLUSTERED ([CAT_ID] ASC)
)
CREATE TABLE [dbo].[PRODUCT]
(
[PROD_ID] [int] IDENTITY(1,1) NOT NULL,
[PROD_TITLE] [varchar](50) NOT NULL
CONSTRAINT [PK_PRODUCT]
PRIMARY KEY CLUSTERED ([PROD_ID] ASC)
)
A product can have 1 to many categories
A category can have 1 to many products
PROD_ID
PROD_TITLE
1
Book 1
2
Book 2
CAT_ID
CAT_TITLE
1
Cat 1
2
Cat 2
3
Cat 3
PROD_ID
CAT_ID
1
1
1
2
2
1
2
3
I would like to retrieve this:
| CAT_ID |CAT_TITLE | PRODUCTS |
|:------- |:--------:|:------------------------------------------------------------------------|
| 1 | Cat 1 |[{"PROD_ID":1,"PROD_TITLE":"Book 1"},{"PROD_ID":2,"PROD_TITLE":"Book 2"}]|
| 2 | Cat 2 |[{"PROD_ID":1,"PROD_TITLE":"Book 1"}] |
| 3 | Cat 3 |[{"PROD_ID":2,"PROD_TITLE":"Book 2"}] |
Thanks for your help

I just found this, using FOR JSON:
https://learn.microsoft.com/en-us/sql/relational-databases/json/format-query-results-as-json-with-for-json-sql-server?view=sql-server-ver15
I think something like this might work:
SELECT c.CAT_ID, c.CAT_TITLE,
(
SELECT p.PROD_ID, p.PROD_TITLE
FROM PRODUCT p
JOIN PRODUCTCATEGORY pc ON pc.PROD_ID = p.PROD_ID
WHERE pc.CAT_ID = c.CAT_ID
FOR JSON PATH
) AS ProductsAsJson
FROM CATEGORY c

Check in and check out time in SQL

simple data of table
My table is:
SELECT TOP (1000)
[ID]
,[UserName]
,[CheckTime]
,[Checktype]
,[CheckinLocation]
,[lat]
,[lng]
FROM
[dbo].[CheckTime]
INSERT INTO [dbo].[CheckTime] ([UserName], [CheckTime], [Checktype],[CheckinLocation], [lat], [lng])
VALUES (<UserName, nchar(10),>
,<CheckTime, datetime,>
,<Checktype, nvarchar(50),>
,<CheckinLocation, nvarchar(50),>
,<lat, float,>
,<lng, float,>)
GO
Create table script:
CREATE TABLE [dbo].[CheckTime]
(
[ID] [int] IDENTITY(1,1) NOT NULL,
[UserName] [nchar](10) NULL,
[CheckTime] [datetime] NULL,
[Checktype] [nvarchar](50) NULL,
[CheckinLocation] [nvarchar](50) NULL,
[lat] [float] NULL,
[lng] [float] NULL,
CONSTRAINT [PK_CheckTime]
PRIMARY KEY CLUSTERED ([ID] ASC)
WITH (STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY]
I need to select each distinct home holding the maximum value of datetime.
max CheckTime as check out
min CheckTime as check in
I need a result like this:
id | Username | check in | check out
---+----------+-------------------+-------------------
1 | 10 | 2017-1-2 08:02:05 | 2017-1-2 10:02:05
1 | 12 | 2017-1-2 08:02:05 | 2017-1-2 10:02:05
1 | 12 | 2017-1-3 08:02:05 | 2017-1-3 10:02:05
1 | 10 | 2017-1-3 08:02:05 | 2017-1-3 10:02:05
I have tried:

You can try the following query.
Select Username
, Cast(CheckTime as Date) as CheckDate
, min(CheckTime) as [check in]
, max(CheckTime) as check out
From CheckInTable
Group by id, Username, Cast(CheckTime as Date)

Displaying SQL SELECT rows in a table display

My database structure and sample data:
CREATE TABLE [dbo].[users] (
[user_id] [bigint] IDENTITY(1,1) NOT NULL,
[user_name] [nvarchar](50) NULL,
[first_name] [nvarchar](50) NULL,
[last_name] [nvarchar](50) NULL,
[id_number] [nvarchar](50) NULL,
CONSTRAINT [PK_users] PRIMARY KEY CLUSTERED
(
[user_id] ASC
)
)
insert into users (user_name, first_name, last_name, id_number)
select 'user1','John','Brown',7707071231
union all
select 'user2','Mary','Jane',7303034432
union all
select 'user3','Peter','Pan',5503024441
CREATE TABLE [dbo].[quiz_results] (
[result_id] [bigint] IDENTITY(1,1) NOT NULL,
[quiz_id] [bigint] NOT NULL,
[user_id] [bigint] NOT NULL,
[grade] [bigint] NULL,
CONSTRAINT [PK_quizresults] PRIMARY KEY CLUSTERED
(
[result_id] ASC
)
)
insert into quiz_results (quiz_id, user_id, grade)
select 1,1,88
union all
select 2,1,84
union all
select 3,1,33
union all
select 1,2,65
This query gives me the quiz results for user_id = 1:
SELECT
users.first_name + ' ' + users.last_name + ' (' + users.id_number + ')' AS student_name,
quiz.quiz_name,
quiz_results.grade
FROM quiz_results
INNER JOIN quiz ON quiz_results.quiz_id = quiz.quiz_id
INNER JOIN users ON quiz_results.user_id = users.user_id
WHERE users.user_id = 12345
like this:
+-------------------------+-----------+-------+
| student_name | quiz_name | grade |
+-------------------------+-----------+-------+
| John Brown (7707071231) | quiz a | 88 |
| John Brown (7707071231) | quiz b | 84 |
| John Brown (7707071231) | quiz c | 33 |
+-------------------------+-----------+-------+
But I don't want the student_name shown on each row. I want this output:
+-------------------------+
| John Brown (7707071231) |
+-------------------------+
| quiz a | 88 |
| quiz b | 84 |
| quiz c | 33 |
+-------------------------+
The student_name is on the first row followed by one row for each quiz result - I specifically want the student_name on the first row.
The query will only ever be for one student_name. Essentially, I want to produce a "certificate" directly in the SQL.
What is the best SQL way to get the data into that format? Will a CTE or the STUFF() command work? Or is there a better way?

This simply can't be done in SQL.
SQL can only return scalar values or result sets (tabular data), and result sets don't support "column span" - so the only way to do it is in the presentation layer - but you can do some things in SQL Server to make your job in the presentation layer easier.
One option is to create a stored procedure that will return the student name as an output parameter, and the quiz grades as a result set:
CREATE PROCEDURE GetQuizResultByUserId
(
#UserId int,
#UserName nvarchar(154) OUTPUT
)
AS
-- it's 154 because 50 + 1 + 50 + 2 + 50 + 1
SELECT #UserName = first_name + ' ' + last_name + ' (' + id_number + ')'
FROM users
WHERE user_id = #UserId
SELECT
quiz.quiz_name,
quiz_results.grade
FROM quiz_results
INNER JOIN quiz ON quiz_results.quiz_id = quiz.quiz_id
WHERE quiz_results.user_id = #UserId
GO
Another option, since this is 2016 version, is to return the results as Json, using the For Json clause:
SELECT first_name + ' ' + last_name + ' (' + id_number + ')' As UserName,
(
SELECT quiz.quiz_name,
quiz_results.grade
FROM quiz_results
INNER JOIN quiz ON quiz_results.quiz_id = quiz.quiz_id
WHERE quiz_results.user_id = #UserId
FOR JSON AUTO
) As quizResult
FROM users
WHERE user_id = #UserId
FOR JSON AUTO
The result is the following json:
[
{
"UserName": "John Brown (7707071231)",
"quizResult": [
{
"quiz_name": "quiz a",
"grade": 88
},
{
"quiz_name": "quiz b",
"grade": 84
},
{
"quiz_name": "quiz c",
"grade": 33
}
]
}
]

How could I make a series of joins work with max value when aggregates do not work in them?

I'm looking only to get classification ids which are between the valid year range in classification. I'm using left joins because NULLs should be permitted.
I have tables:
CREATE TABLE classifications (
[id] [bigint] IDENTITY(1,1) NOT NULL,
[classification_code] [varchar](20) NOT NULL,
[description] [varchar](255) NULL,
[valid_from] [int] NULL,
[valid_to] [int] NULL
--Rest of constraints...
)
insert into classifications (classification_code, description, valid_from, valid_to)
values ('05012','Classification Number 1',2007,2012),
('05012','Classification Number 1',2013,2016),
('05012','Classification Number 1',2017,2020).
('12043','Classification Number 2',2007,2010),
('12043','Classification Number 2',2011,2020),
('12345','Classification Number 3',2013,2015),
('12345','Classification Number 3',2016,2020),
('54321','Classification Number 4',2007,2009),
('54321','Classification Number 4',2010,2013),
('54321','Classification Number 4',2014,2020)
CREATE TABLE comm_info_a (
[id] [bigint] IDENTITY(1,1) NOT NULL,
[comm_code] [nchar](10) NOT NULL, /*should be unique*/
[classification_code] [nchar](6) NULL,
[thing] [nchar](6) NULL
--Rest of constraints...
)
insert into comm_info_a (comm_code, classification_code)
values ('0100100000','54321'),
('8090010000','05012'),
('5002310010','12043'),
('0987654321','54321')
CREATE TABLE comm_info_b (
[id] [bigint] IDENTITY(1,1) NOT NULL,
[comm_code] [nchar](10) NOT NULL, /*should be unique*/
[classification_code] [nchar](6) NULL
--Rest of constraints...
)
insert into comm_info_b (comm_code, classification_code)
values ('0100100000','12043'),
('8090010000','00000'),
('5002310010','05012'),
('1234567890','12345')
CREATE TABLE transactions (
[comm_code] [varchar](50) NULL,
[year] [varchar](255) NULL
--Rest of constraints...
)
insert into transactions (comm_code, year) values
('0100100000', 2013),
('0100100000', 2015),
('0100100000', 2017),
('8090010000', 2009),
('8090010000', 2010),
('8090010000', 2011),
('8090010000', 2015),
('8090010000', 2017),
('8090010000', 2018),
('5002310010', 2008),
('5002310010', 2014),
And finally:
CREATE TABLE comm (
[id] [bigint] IDENTITY(1,1) NOT NULL,
[comm_code] [varchar](20) NULL, /*should be unique*/
[fk_classification_id_a] [bigint] NULL,
[fk_classification_id_b] [bigint] NULL
--Rest of constraints...
)
I am working on a query to insert comms from transactions, and comms should have unique comm_code
The query is as follows:
INSERT INTO comm
(comm_code,
fk_classification_id_a,
fk_classification_id_b)
SELECT comm_code,
ca.id,
cb.id,
MAX(year)
FROM transactions t
LEFT JOIN comm_info_a mia ON mia.comm_code=t.comm_code
LEFT JOIN comm_info_b mib ON mib.comm_code=t.comm_code
--these next two joins obviously do not work so I'm looking for something like it. Treat them as 'pseudo-code'
LEFT JOIN classifications ca ON ca.classification_code=mia.classification_code AND
MAX(t.year) BETWEEN ca.valid_from AND ca.valid_to
LEFT JOIN classifications cb ON cb.classification_code=mib.classification_code AND
MAX(t.year) BETWEEN cb.valid_from AND cb.valid_to
-- end of the two joins
WHERE NOT EXISTS
(SELECT DISTINCT comm_code FROM comm)
GROUP BY
t.comm_code
t.classification_code
So in the end I'm looking to get something like this as a result:
comm_code | fk_classification_id_a | fk_classification_id_b
-----------|------------------------|-----------------------
0100100000 | 5 | 10
8090010000 | 3 | NULL
5002310010 | 5 | 2
Please note that the comm_code is unique in this table!! Therefore: i want the comms on the newest transactions (thus the aggegate max year), and they should have the ids of the classification that the transaction year is in.
The real query is much more complex and longer but this pretty much covers all bases. Take a look into what is commented. I understand that it should be doable with a sub query of some sort, and I've tried, but so far I haven't found a way to pass aggregates to subqueries.
How could I tackle this problem?

Revised answer uses a common table expression to calculate the maximum year per comm_code and to exclude the comm_codes not wanted in the final result. After that the joins to the classification tables are straight forward as we have the comm_max_year value on each row to use in the joins.
with transCTE as (
select
t.*
, max(t.year) over(partition by comm_code) comm_max_year
from transactions t
left join comm on t.comm_code = comm.comm_code -- this table not in sample given
where comm.comm_code IS NULL -- use instead of NOT EXISTS
)
SELECT DISTINCT
t.comm_code
, ca.id as fk_classification_id_a
, cb.id as fk_classification_id_b
, t.comm_max_year
FROM transCTE t
LEFT JOIN comm_info_a mia ON mia.comm_code = t.comm_code
LEFT JOIN classifications ca ON mia.classification_code = ca.classification_code
AND t.comm_max_year BETWEEN ca.valid_from AND ca.valid_to
LEFT JOIN comm_info_b mib ON mib.comm_code = t.comm_code
LEFT JOIN classifications cb ON mib.classification_code = cb.classification_code
AND t.comm_max_year BETWEEN cb.valid_from AND cb.valid_to
ORDER BY
t.comm_code
;
GO
comm_code | fk_classification_id_a | fk_classification_id_b | comm_max_year
:--------- | :--------------------- | :--------------------- | :------------
0100100000 | 10 | 5 | 2017
5002310010 | 5 | 2 | 2014
8090010000 | 3 | null | 2018
Demo at dbfiddle here
CREATE TABLE transactions (
[comm_code] [varchar](50) NULL,
[year] [varchar](255) NULL
--Rest of constraints...
)
insert into transactions (comm_code, year) values
('0100100000', 2013),
('0100100000', 2015),
('0100100000', 2017),
('8090010000', 2009),
('8090010000', 2010),
('8090010000', 2011),
('8090010000', 2015),
('8090010000', 2017),
('8090010000', 2018),
('5002310010', 2008),
('5002310010', 2014)
;
GO
11 rows affected
CREATE TABLE classifications (
[id] [bigint] IDENTITY(1,1) NOT NULL,
[classification_code] [varchar](20) NOT NULL,
[description] [varchar](255) NULL,
[valid_from] [int] NULL,
[valid_to] [int] NULL
--Rest of constraints...
)
insert into classifications (classification_code, description, valid_from, valid_to)
values ('05012','Classification Number 1',2007,2012),
('05012','Classification Number 1',2013,2016),
('05012','Classification Number 1',2017,2020),
('12043','Classification Number 2',2007,2010),
('12043','Classification Number 2',2011,2020),
('12345','Classification Number 3',2013,2015),
('12345','Classification Number 3',2016,2020),
('54321','Classification Number 4',2007,2009),
('54321','Classification Number 4',2010,2013),
('54321','Classification Number 4',2014,2020)
;
GO
10 rows affected
CREATE TABLE comm_info_a (
[id] [bigint] IDENTITY(1,1) NOT NULL,
[comm_code] [nchar](10) NOT NULL, /*should be unique*/
[classification_code] [nchar](6) NULL,
[thing] [nchar](6) NULL
--Rest of constraints...
);
GO
✓
insert into comm_info_a (comm_code, classification_code)
values ('0100100000','54321'),
('8090010000','05012'),
('5002310010','12043'),
('0987654321','54321')
;
GO
4 rows affected
CREATE TABLE comm_info_b (
[id] [bigint] IDENTITY(1,1) NOT NULL,
[comm_code] [nchar](10) NOT NULL, /*should be unique*/
[classification_code] [nchar](6) NULL
--Rest of constraints...
);
GO
✓
insert into comm_info_b (comm_code, classification_code)
values ('0100100000','12043'),
('8090010000','00000'),
('5002310010','05012'),
('1234567890','12345');
GO
4 rows affected

Generate an excel like report from SQL

I am novice to SQL and
I have two tables Ticket and TicketAttributes with following Schema
CREATE TABLE [dbo].[Ticket](
[TicketID] [int] IDENTITY(1,1) NOT NULL, --Primary key
[Category] [varchar](256) NOT NULL,
[Description] [varchar](256) NULL,
[LibID] [int] NOT NULL,
[Status] [smallint] NULL,
[LogID] [int] NULL)
Ticket Attributes
CREATE TABLE [dbo].[TicketAttributes](
[TicketID] [int] NOT NULL,
[TicketAttrID] [int] IDENTITY(1,1) NOT NULL,
[AttributeID] [int] NOT NULL,
[AttributeGroup] [varchar](255) NULL,
[AttributeValue] [nvarchar](max) NULL,
[Status] [smallint] NULL,
[LogID] [int] NULL)
Where Ticket Attribute is another table that stores different attributes of a ticket like TicketStatus, TicketCategory etc..
Now I need to generate a report that looks like
TicketStatus1 TicketStatus 2 TicketStatus3
-----------------------------------------------------------------
TicketCategory1 7 3
Ticketcategory2 4
TicketCategory3 8
I want to see the count of each of the status of each ticket category.
For Eg:-
I have the following Data in TicketTable
----------------------------------------------
TicketID Name Price Date
------------------------------------------------
155 Ticket4 $20 16 Jan 2016
157 Ticket3 $300 17 Jan 2016
158 Ticket1 $100 18 Jan 2016
159 Ticket2 $500 19 Jan 2016
Now in the TicketAttribute Table
----------------------------------------------
TicketID AttributeID AttributeValue
------------------------------------------------
155 500 Joe
155 600 Reserved
155 700 Economy
155 800 San Jose
where AttributeIDs
500=Nameofthe Passenger
600= Status of Ticket
700= Class
800= Destination
Now lets say I want to see what is the count of number of active tickets in each of the class per status
Booked Cancelled PaymentPending ............
-----------------------------------------------------------------
Economy 7 3
Economy Plus 4
Business 8
Hope I am clear now.
how to go about this using SQL Query

USING PIVOT
;WITH cte AS (
SELECT
c.AttributeValue as Class
,s.AttributeValue as StatusOfTicket
FROM
Ticket t
LEFT JOIN TicketAttributes c
ON t.TicketId = c.TicketId
AND c.AttributeID = 700
LEFT JOIN TicketAttributes s
ON t.TicketId = s.TicketId
AND s.AttributeID = 600
)
SELECT *
FROM
cte
PIVOT (
COUNT(StatusOfTicket) FOR StatusOfTicket IN (Reserved,Cancelled,PaymentPending)
) p
USING Conditional Aggregation:
SELECT
c.AttributeValue as Class
,COUNT(DISTINCT CASE WHEN s.AttributeValue = 'Reserved' THEN c.TicketId END) as Reserved
,COUNT(DISTINCT CASE WHEN s.AttributeValue = 'Cancelled' THEN c.TicketId END) as Cancelled
,COUNT(DISTINCT CASE WHEN s.AttributeValue = 'PaymentPending' THEN c.TicketId END) as PaymentPending
FROM
Ticket t
LEFT JOIN TicketAttributes c
ON t.TicketId = c.TicketId
AND c.AttributeID = 700
LEFT JOIN TicketAttributes s
ON t.TicketId = s.TicketId
AND s.AttributeID = 600
GROUP BY
c.AttributeValue

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

SQL Server query plan using hash vs streamaggregate - sql-server

Related

JSON Many to Many RelationShip Group By

Check in and check out time in SQL

Displaying SQL SELECT rows in a table display

How could I make a series of joins work with max value when aggregates do not work in them?

Generate an excel like report from SQL

Categories

Resources