Grouping based on the match percentage

Grouping based on the match percentage - sql-server

I need to self-join and display the records based on the match percentage
Name | Village
Jones Ashley, MPK
Meyer Peter, JSK
A Jones, MPK
David, ARK
Peter M, JSK
Peter M, JSK
David, ARK
select
x.Name,
y.Name,
dbo.matchname(x.Name, y.Name) 'match'
from cust x, cust y where dbo.matchname(x.Name, y.Name) >= 80
and x.village = y.village
I have written a function which takes two names and calculates the percentage. For ex: Peter M and Meyer Peter is 80%.
I would now like to display the related records order by the match percentage. For ex:
Jones Ashley, MPK
A Jones, MPK
David, ARK
David, ARK
Peter M, JSK
Peter M, JSK
Meyer Peter, JSK
Order by doesn't work here as the initial could be at the last for some names. I need some kind of grouping but not sure how to do.

I don't know what your matchname scalar function does so I just created a generic scalar function that compares two strings and returns a small number.
-- (0) Prep: a matchname function
IF OBJECT_ID('tempdb.dbo.matchname') IS NOT NULL DROP FUNCTION dbo.matchname;
GO
CREATE FUNCTION dbo.matchname(#string1 varchar(40), #string2 varchar(40))
RETURNS int AS
BEGIN RETURN((ABS(ASCII(#string1)+3) - (ASCII(#string2))))*7 END;
Below is some sample data and a solution. The most important thing to note is how I filtered my CROSS JOIN:
WHERE x.someid < y.someid
Doing this prevents you from evaluating the same records twice; e.g. dbo.matchname(John Smith, George Washington) & dbo.matchname(George Washington, John Smith.
Sample data and solution
-- Sample data
DECLARE #yourtable TABLE
(
someid int identity primary key clustered,
[Name] varchar(40),
Village varchar(10)
,index nc_yt nonclustered([Name] ASC)
);
INSERT #yourtable ([Name], Village)
VALUES
('Jones Ashley', 'MPK'),
('Meyer Peter', 'JSK'),
('A Jones', 'MPK'),
('David', 'ARK'),
('Peter M', 'JSK'),
('Peter M', 'JSK'),
('David', 'ARK');
-- Solution
WITH uniqueList AS
(
select
rn = ROW_NUMBER() OVER
(
PARTITION BY x.Name, y.Name, dbo.matchname(x.name, y.name)
ORDER BY (SELECT NULL)
),
Name1 = x.Name,
Name2 = y.Name,
id1 = x.someid, id2 = y.someid,
dbo.matchname(x.name, y.name) AS match
from #yourtable x
CROSS JOIN #yourtable y
WHERE x.someid < y.someid
AND dbo.matchname(x.Name, y.Name) >= 80
)
SELECT Name1, Name2, match
FROM uniqueList
WHERE rn = 1
ORDER BY match;
Now about that scalar valued function... Scalar Valued User Defined Functions (scalar UDF for short) KILL performance, especially how you're using yours!. You can replace scalar UDFs with inline table valued functions (iTVF) for optimal performance.
First the new function:
IF OBJECT_ID('tempdb.dbo.itvf_matchname') IS NOT NULL DROP FUNCTION dbo.itvf_matchname;
GO
CREATE FUNCTION dbo.itvf_matchname(#string1 varchar(40), #string2 varchar(40))
RETURNS TABLE WITH SCHEMABINDING AS
RETURN(SELECT match = (ABS(ASCII(#string1)+3) - (ASCII(#string2)))*7);
Now a solution (note that I commented out the original scalar UDF code):
-- sample data
DECLARE #yourtable TABLE
(
someid int identity primary key clustered,
[Name] varchar(40),
Village varchar(10)
,index nc_yt nonclustered([Name] ASC)
);
INSERT #yourtable ([Name], Village)
VALUES
('Jones Ashley', 'MPK'),
('Meyer Peter', 'JSK'),
('A Jones', 'MPK'),
('David', 'ARK'),
('Peter M', 'JSK'),
('Peter M', 'JSK'),
('David', 'ARK');
-- solution
WITH uniqueList AS
(
select
rn = ROW_NUMBER() OVER
(
PARTITION BY x.Name, y.Name, /*dbo.matchname(x.name, y.name)*/ itvf.match
ORDER BY (SELECT NULL)
),
Name1 = x.Name,
Name2 = y.Name,
--dbo.matchname(x.name, y.name) AS match
itvf.match
from #yourtable x
CROSS JOIN #yourtable y
-- Below: only 1 function call with results referenced multiple times
CROSS APPLY dbo.itvf_matchname(x.Name, y.Name) itvf
WHERE x.someid < y.someid
--AND dbo.matchname(x.Name, y.Name) >= 80
AND itvf.match >= 80
)
SELECT Name1, Name2, match
FROM uniqueList
WHERE rn = 1;
The results are identical but the performance is profoundly better. To better understand why you should replace scalar UDFs with iTVFs let's do a 1500 row test (which means we'll be evaluating 1 million+ rows):
-- (3.1) Sample Data with an ID
SET NOCOUNT ON;
IF OBJECT_ID('tempdb..#yourtable') IS NOT NULL DROP TABLE #yourtable;
CREATE TABLE #yourtable
(
someid int identity primary key clustered,
[Name] varchar(40) NOT NULL,
Village varchar(10) NOT NULL
);
INSERT #yourtable
SELECT TOP (1500) LEFT(REPLACE(newid(),'-',''),10), 'xxx'
FROM sys.all_columns a
CROSS JOIN sys.all_columns b;
GO
CREATE NONCLUSTERED INDEX nc_yt ON #yourTable([Name] ASC);
GO
PRINT 'Scalar function'+char(13)+char(10)+REPLICATE('-',50);
GO
DECLARE #x bit, #st datetime2 = getdate();
WITH uniqueList AS
(
select
rn = ROW_NUMBER() OVER
(
PARTITION BY x.Name, y.Name, dbo.matchname(x.name, y.name)
ORDER BY (SELECT NULL)
),
Name1 = x.Name,
Name2 = y.Name,
dbo.matchname(x.name, y.name) AS match
from #yourtable x
CROSS JOIN #yourtable y
WHERE x.someid < y.someid
AND dbo.matchname(x.Name, y.Name) >= 80
)
SELECT #x = 1
FROM uniqueList
WHERE rn = 1;
PRINT DATEDIFF(MS, #st, getdate());
GO 5
PRINT char(13)+char(10)+'ITVF (serial)'+char(13)+char(10)+REPLICATE('-',50);
GO
DECLARE #x bit, #st datetime2 = getdate();
WITH uniqueList AS
(
select
rn = ROW_NUMBER() OVER
(
PARTITION BY x.Name, y.Name, /*dbo.matchname(x.name, y.name)*/ itvf.match
ORDER BY (SELECT NULL)
),
Name1 = x.Name,
Name2 = y.Name,
--dbo.matchname(x.name, y.name) AS match
itvf.match
from #yourtable x
CROSS JOIN #yourtable y
-- Below: only 1 function call with results referenced multiple times
CROSS APPLY dbo.itvf_matchname(x.Name, y.Name) itvf
WHERE x.someid < y.someid
--AND dbo.matchname(x.Name, y.Name) >= 80
AND itvf.match >= 80
)
SELECT #x = 1
FROM uniqueList
WHERE rn = 1
OPTION (MAXDOP 1);
PRINT DATEDIFF(MS, #st, getdate());
GO 5
PRINT char(13)+char(10)+'ITVF Parallel'+char(13)+char(10)+REPLICATE('-',50);
GO
DECLARE #x bit, #st datetime2 = getdate();
WITH uniqueList AS
(
select
rn = ROW_NUMBER() OVER
(
PARTITION BY x.Name, y.Name, /*dbo.matchname(x.name, y.name)*/ itvf.match
ORDER BY (SELECT NULL)
),
Name1 = x.Name,
Name2 = y.Name,
--dbo.matchname(x.name, y.name) AS match
itvf.match
from #yourtable x
CROSS JOIN #yourtable y
-- Below: only 1 function call with results referenced multiple times
CROSS APPLY dbo.itvf_matchname(x.Name, y.Name) itvf
CROSS APPLY dbo.make_parallel()
WHERE x.someid < y.someid
--AND dbo.matchname(x.Name, y.Name) >= 80
AND itvf.match >= 80
)
SELECT #x = 1
FROM uniqueList
WHERE rn = 1;
PRINT DATEDIFF(MS, #st, getdate());
GO 5
And the results:
Scalar function
--------------------------------------------------
Beginning execution loop
4627
4504
4440
4457
4550
Batch execution completed 5 times.
ITVF (serial)
--------------------------------------------------
Beginning execution loop
1623
1610
1643
1640
1713
Batch execution completed 5 times.
ITVF Parallel
--------------------------------------------------
Beginning execution loop
1306
1067
1077
1127
1047
Batch execution completed 5 times.
The iTVF based solution was ~3X faster when run with a serial plan, 4X faster with a parallel plan.

Related

How to do this in SQL Server query instead of function?

I have a table that has a string in one of its columns.
My table look like this:
RowCnt
Lvl
TargetID
Codes
1000
1
0
1,1,0,1,0,1,...,1,0,0,0,0
1000
1
1
0,0,1,0,1,0,...,0,1,1,1,1
1000
1
2
1,0,0,0,1,1,...,0,0,0,0,0
1000
1
3
0,1,1,1,0,1,...,1,1,1,1,1
1000
1
4
1,1,0,0,1,0,...,0,0,1,0,0
1000
2
0
0,0,1,1,0,1,...,0,1,0,1,1
1000
2
1
0,1,0,1,1,1,...,1,1,1,1,0
1000
2
2
0,0,0,0,0,1,...,0,0,0,0,1
1500
1
0
1,1,1,1,1,0,...,1,1,1,1,0
1500
1
1
1,0,0,0,0,1,...,0,0,0,0,1
I have to compare each line with each line and see how many of digits differ in the Codes column.
So the first record 1,1,0,1,0,1,...,1,0,0,0,0 will be compared with the 2nd 0,0,1,0,1,0,...,0,1,1,1,1 and find that there are 14 out 328 digits different, then compare with the 3rd record 1,0,0,0,1,1,...,0,0,0,0,0 and find 29 / 328 till do all records
Then compare the 2nd record with the 3rd then the 4th and do on till do them all
My table has around 2000 records and I assume that would take around 4 million operations.
I have built a function to do the comparison
ALTER FUNCTION [dbo].[Sim]
(#x varchar(max),
#y varchar(max))
RETURNS decimal(18,10)
AS
BEGIN
DECLARE #Xt AS TABLE (id int identity, x int)
DECLARE #Yt AS TABLE (id int identity, y int)
DECLARE #Match int
DECLARE #All int
INSERT INTO #Xt (x)
SELECT value FROM STRING_SPLIT(#x, ',')
INSERT INTO #Yt (y)
SELECT value FROM STRING_SPLIT(#y, ',')
SELECT #Match = COUNT(*)
FROM #Xt xx
INNER JOIN #Yt yy ON xx.id = yy.id AND xx.x = yy.y
SELECT #All = COUNT(*)
FROM #Xt xx
INNER JOIN #Yt yy ON xx.id = yy.id
RETURN 1.0 * #Match / #All
END
and my query is like this
WITH Y AS (
select a.RowCnt, a.Lvl, a.TargetID a_TargetID, b.targetid b_TargetID, a.codes a_codes, b.codes b_codes, dbo.sim(a.codes, b.codes) sim
from TargetsComp A inner join TargetsComp B
on a.RowCnt = b.RowCnt
and a.TargetID < b.TargetID
)
insert into TargetFilled (RowCnt, Lvl, a_TargetID, b_TargetID, a_codes, b_codes, sim)
SELECT RowCnt, Lvl, a_TargetID, b_TargetID, a_codes, b_codes, sim FROM Y ORDER BY RowCnt,Lvl, sim desc
but my method fills C drive and does not finish!!
Any better way?

A faster version of your function would be an inline Table Valued Function.
CREATE OR ALTER FUNCTION dbo.Similar (#x varchar(max), #y varchar(max))
RETURNS TABLE AS RETURN
SELECT COUNT(CASE WHEN xJ.value <> yJ.value THEN 1 END) * 1.0 / COUNT(*) AS Pct
FROM (
SELECT *,
ROW_NUMBER() OVER (ORDER BY (SELECT 1)) rn
FROM STRING_SPLIT(#x, ',')
) xJ
JOIN (
SELECT *,
ROW_NUMBER() OVER (ORDER BY (SELECT 1)) rn
FROM STRING_SPLIT(#y, ',')
) yJ ON yJ.rn = xJ.rn;
However, STRING_SPLIT with a row-number is not guaranteed to always return results in the actual order of the string. It may do it once, it may do it a million times, but there is always a chance the compiler could rearrange things. So instead you could use OPENJSON
CREATE OR ALTER FUNCTION dbo.Similar (#x varchar(max), #y varchar(max))
RETURNS TABLE AS RETURN
SELECT COUNT(CASE WHEN xJ.value <> yJ.value THEN 1 END) * 1.0 / COUNT(*) AS Pct
FROM OPENJSON('[' + #x + ']') xJ
JOIN OPENJSON('[' + #y + ']') yJ ON yJ.[key] = xJ.[key];
You would use it like this
WITH Y AS (
select
a.RowCnt,
a.Lvl,
a.TargetID a_TargetID,
b.targetid b_TargetID,
a.codes a_codes,
b.codes b_codes,
sim.Pct sim
from TargetsComp A
inner join TargetsComp B
on a.RowCnt = b.RowCnt
and a.TargetID < b.TargetID
CROSS APPLY dbo.sim(a.codes, b.codes) sim
)
insert into TargetFilled
(RowCnt, Lvl, a_TargetID, b_TargetID, a_codes, b_codes, sim)
SELECT RowCnt, Lvl, a_TargetID, b_TargetID, a_codes, b_codes, sim
FROM Y;
-- you may want to add
-- WHERE sim.Pct < 100
I have removed the ORDER BY from the insert as I don't think it's necessary.
You should index your table as follows
CLUSTERED INDEX ON TargetsComp (RowCnt, TargetID)

What is the most effcient way to replace values in a specific column of a table for this specific scenario?

I am using SQL Server 2014 and I have a table in my database called t1 (extract of only 2 columns shown below):
ResaID StayDate
100 2020-02-03
100 2020-02-04
100 2020-02-05
120 2020-04-06
120 2020-04-07
120 2020-04-08
120 2020-04-09
120 2020-04-10
I need to change the dates in the StayDate column based on the following information (extract shown exactly as provided):
ID StartDate EndDate
100 2020-06-04 2020-06-06
120 2021-03-01 2021-03-05
I have started writing my T-SQL query as follows (but it is getting quite tedious as I have to do it for more than 100 ResaID!):
USE MyDatabase
UPDATE t1
SET StayDate = CASE WHEN ResaID = 100 and StayDate = '2020-02-03' THEN '2020-06-04'
WHEN ResaID = 100 and StayDate = '2020-02-04' THEN '2020-06-05'
WHEN ResaID = 100 and StayDate = '2020-02-05' THEN '2020-06-06'
...
ELSE StayDate
END
Is there a more efficient way to tackle this problem?

You can use recursive approach :
with r_cte as (
select id, convert(date, startdate) as startdate, convert(date, enddate) as enddate
from ( values (100, '2020-06-04', '2020-06-06'),
(120, '2021-03-01', '2021-03-03')
) t(id, startdate, enddate)
union all
select id, dateadd(day, 1, startdate), enddate
from cte c
where startdate < enddate
), r_cte_seq as (
select r.cte.*, row_number() over(partition by id order by startdate) as seq
from r_cte
), cte_seq as (
select t1.*, row_number() over (partition by ResaID order by staydate) as seq
from t1
)
update cs
set cs.staydate = rc.startdate
from cte_seq cs inner join
r_cte_seq rc
on rc.id = cs.ResaID and rc.seq = cs.seq;

Here is my approach to this problem. I would use a numbers table to generate a record for each date in the new range for each reservation ID. I would then partition this data by reservation ID, ordered by the date. Doing the same partition logic on the existing data will allow records to be properly joined together.
I would then do a DELETE operation followed by an INSERT operation. This would leave you with the appropriate amount of records. The only manual thing that would need to be done is to populate the auxiliary data for reservations with expanded date ranges. I expanded one of your new ranges to show this scenario.
I've marked where the setup for this demo ends in the code below. Everything below that is my intended solution that should be able to be implemented with your real tables.
--Ranges Table
DECLARE #ranges TABLE
(
ID INT
,StartDate DATETIME
,EndDate DATETIME
)
DECLARE #t1 TABLE
(
ResaID INT
,StayDate DATETIME
,ColA INT
,ColB NVARCHAR(100)
,ColC BIT
)
INSERT INTO #t1
(
ResaID
,StayDate
,ColA
,ColB
,ColC
)
VALUES
(100, '2020-02-03', 1, 'A', 0)
,(100, '2020-02-04', 100, 'B', 1)
,(100, '2020-02-05', 255, 'C', 1)
,(120, '2020-04-06', 34, 'D', 1)
,(120, '2020-04-07', 67, 'E', 0)
,(120, '2020-04-08', 87, 'F', 0)
,(120, '2020-04-09', 545, 'G', 1)
,(120, '2020-04-10', 288, 'H', 0)
INSERT INTO #ranges
(
ID
,StartDate
,EndDate
)
VALUES
(100, '2020-06-04', '2020-06-07')
,(120, '2021-03-01', '2021-03-05')
--END DEMO SETUP
DROP TABLE IF EXISTS #numbers
DROP TABLE IF EXISTS #newRecords
--GENERATE NUMBERS TABLE
;WITH e1(n) AS
(
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), -- 10
e2(n) AS (SELECT 1 FROM e1 CROSS JOIN e1 AS b), -- 10*10
e3(n) AS (SELECT 1 FROM e2 CROSS JOIN e2 AS b), -- 100*100
e4(n) AS (SELECT 1 FROM e3 CROSS JOIN (SELECT TOP 5 n FROM e1) AS b) -- 5*10000
SELECT ROW_NUMBER() OVER (ORDER BY n) as Num
INTO #numbers
FROM e4
ORDER BY n;
;with oldData --PARTITION THE EXISTING RECORDS
AS
(
SELECT *
,ROW_NUMBER() OVER (PARTITION BY ResaID ORDER BY STAYDATE) as ResPartID
FROM #t1
)
,newRanges --GENERATE YOUR NEW RANGES AND PARITITION
AS
(
select
r.ID
,CAST(n.num as DATETIME) as StayDate
,ROW_NUMBER() OVER (PARTITION BY ID ORDER BY n.num) as ResPartID
from #ranges r
inner join #numbers n on CAST(r.StartDate as INT) <= n.Num AND CAST(r.EndDate as INT) >= n.Num
)
SELECT n.ID
,n.StayDate
,o.ColA
,o.ColB
,o.ColC
into #newRecords
FROM newRanges n
left join oldData o on n.ID = o.ResaID and n.ResPartID = o.ResPartID
--DELETE OLD RECORDS
DELETE t
FROM #t1 t
inner join #ranges r on t.ResaID = r.ID
--INSERT NEW DATA
INSERT INTO #t1
(
ResaID
,StayDate
,ColA
,ColB
,ColC
)
SELECT
ID
,StayDate
,ColA
,ColB
,ColC
FROM #newRecords
SELECT * FROM #t1

The following code converts the t1 dates into ranges and then uses the corresponding range dates to calculate new StayDate values. You can swap out the final select for one of the commented statements to see what is going on in the CTEs. The final select can be replaced with an update if you want to change the original table data.
-- Thanks to Aaron Hughes for setting up the sample data.
-- I changed the DateTime columns to Date .
--Ranges Table
DECLARE #ranges TABLE
(
ID INT
,StartDate DATE
,EndDate DATE
)
DECLARE #t1 TABLE
(
ResaID INT
,StayDate DATE
,ColA INT
,ColB NVARCHAR(100)
,ColC BIT
)
INSERT INTO #t1
(
ResaID
,StayDate
,ColA
,ColB
,ColC
)
VALUES
(100, '2020-02-03', 1, 'A', 0)
,(100, '2020-02-04', 100, 'B', 1)
,(100, '2020-02-05', 255, 'C', 1)
,(120, '2020-04-06', 34, 'D', 1)
,(120, '2020-04-07', 67, 'E', 0)
,(120, '2020-04-08', 87, 'F', 0)
,(120, '2020-04-09', 545, 'G', 1)
,(120, '2020-04-10', 288, 'H', 0)
INSERT INTO #ranges
(
ID
,StartDate
,EndDate
)
VALUES
(100, '2020-06-04', '2020-06-07')
,(120, '2021-03-01', '2021-03-05');
with
-- Calculate the date range for each stay in #t1 .
ResaRanges as (
select ResaId, Min( StayDate ) as ResaStartDate, Max( StayDate ) as ResaEndDate
from #t1
group by ResaId ),
-- Match up the #t1 date ranges with the #ranges date ranges.
CombinedRanges as (
select RR.ResaId, RR.ResaStartDate, RR.ResaEndDate, DateDiff( day, RR.ResaStartDate, RR.ResaEndDate ) + 1 as ResaDays,
R.StartDate, R.EndDate, DateDiff( day, R.StartDate, R.EndDate ) + 1 as RangeDays,
DateDiff( day, RR.ResaStartDate, R.StartDate ) as DaysOffset
from ResaRanges as RR inner join
#ranges as R on R.ID = RR.ResaId )
-- Calculate the new StayDate values for all #t1 ranges that are not longer than the corresponding #range .
-- The difference between range starting dates is added to each StayDate .
select T.ResaId, T.StayDate, DateAdd( day, CR.DaysOffset, T.StayDate ) as NewStayDate
from #t1 as T inner join
CombinedRanges as CR on CR.ResaID = T.ResaID
where CR.RangeDays >= CR.ResaDays;
-- To see the steps you can use one of the following select staements to view the intermediate results:
-- select * from ResaRanges;
-- select * from CombinedRanges;

Assign same Group ID to ID with same values - Recursive sql query

The table contains PID and corresponding values. PIDs with same values should be assigned same group id. A, B, D are directly connected (value = ABC) and since C is connected with D (value = GHI), A,B,C & D are all part of the same group.
PID Value
--------------
A101 ABC
A101 XYZ
B101 ABC
C101 GHI
C101 DEF
D101 ABC
D101 GHI
E101 KLM
F101 MNO
Expected Result:
PID GroupID
--------------
A101 1
B101 1
C101 1
D101 1
E101 2
F101 3
I have tried below query without success:
with cte as (
select PID, Value, Rank() OVER(ORDER BY Value) rn
from t
union all
select t.PID, cte.Value, cte.rn
from t join cte
on t.Value = cte.Value and
t.PID < cte.PID
)
select *
from cte
Is there a way to fix the query and also write a more efficient query (about 1 million rows) in SQL Server 2012/2014?

try the following:
declare #tab table (PID varchar(10), Val varchar(100))
insert into #tab select 'A101', 'ABC'
insert into #tab select 'A101', 'XYZ'
insert into #tab select 'B101', 'ABC'
insert into #tab select 'C101', 'GHI'
insert into #tab select 'C101', 'DEF'
insert into #tab select 'D101', 'ABC'
insert into #tab select 'D101', 'GHI'
insert into #tab select 'E101', 'KLM'
insert into #tab select 'F101', 'MNO'
select PID, dense_rank() over (order by group_id) GROUP_ID
from
(
select x.PID, min(t.num) group_id from #tab X
join (select val, dense_rank() over (order by val) num from #tab)t on t.Val = X.Val
group by x.PID
)t2
order by 2

Variable within SQL query

I have this:
SELECT NEWID() as id,
'OwnerReassign' as name,
1 as TypeId,
'MyOrganisation' as OrgName,
'07DA8E53-74BD-459C-AF94-A037897A51E3' as SystemUserId,
0 as StatusId,
GETDATE() as CreatedAt,
'{"EntityName":"account","Ids":["'+CAST(AccountId as varchar(50))+'"],"OwnerId":"0C01C994-1205-E511-988E-26EE4189191B"}' as [Parameters]
FROM Account
WHERE OwnerIdName IN ('John Smith') AND New_AccountType = 1
Within the parameter field is an id (0C01C994-1205-E511-988E-26EE4189191B). Is it possible it could sequentially assign a different id from a list for each row? There are 5 id's in total.
What i'm trying to get to is this result set equally split between the 5 different id's.
Thanks

You can add one more NEWID() in the sub query and handle in the SELECT as below:
SELECT id, [name], TypeId, OrgName, SystemUserId, StatusId, CreatedAt,
'{"EntityName":"account","Ids":["' + AccountId +'"],"OwnerId":"' + ParamId + '"}' as [Parameters]
FROM (
SELECT NEWID() as id,
'OwnerReassign' as name,
1 as TypeId,
'MyOrganisation' as OrgName,
'07DA8E53-74BD-459C-AF94-A037897A51E3' as SystemUserId,
0 as StatusId,
GETDATE() as CreatedAt,
CAST(NEWID() AS VARCHAR (36)) as ParamId,
CAST(AccountId as varchar(50)) as AccountId
FROM Account
WHERE OwnerIdName IN ('John Smith') AND New_AccountType = 1
) A

You can use something like the following. Basically, use a row number for both your IDs and your data table to update, then do a MOD (%) operation with the amount of ID's you want to assign, so your data table to update is split into N groups. Then use that group ID to assign each ID.
IF OBJECT_ID('tempdb..#IDsToAssign') IS NOT NULL
DROP TABLE #IDsToAssign
CREATE TABLE #IDsToAssign (
IDToAssign VARCHAR(100))
-- 3 IDs example
INSERT INTO #IDsToAssign (
IDToAssign)
SELECT IDToAssign = NEWID()
UNION ALL
SELECT IDToAssign = NEWID()
UNION ALL
SELECT IDToAssign = NEWID()
DECLARE #AmountIDsToAssign INT = (SELECT COUNT(1) FROM #IDsToAssign)
IF OBJECT_ID('tempdb..#Account') IS NOT NULL
DROP TABLE #Account
CREATE TABLE #Account (
PrimaryKey INT PRIMARY KEY,
AssignedID VARCHAR(100))
-- 10 Rows example
INSERT INTO #Account (
PrimaryKey)
VALUES
(100),
(200),
(351),
(154),
(194),
(345),
(788),
(127),
(124),
(14)
;WITH DataRowNumber AS
(
SELECT
A.*,
RowNumber = ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM
#Account AS A
),
IDsRowNumbers AS
(
SELECT
D.IDToAssign,
RowNumber = ROW_NUMBER() OVER (ORDER BY D.IDToAssign)
FROM
#IDsToAssign AS D
),
NewIDAssignation AS
(
SELECT
R.*,
IDRowNumberAssignation = (R.RowNumber % #AmountIDsToAssign) + 1
FROM
DataRowNumber AS R
)
UPDATE A SET
AssignedID = R.IDToAssign
FROM
NewIDAssignation AS N
INNER JOIN IDsRowNumbers AS R ON N.IDRowNumberAssignation = R.RowNumber
INNER JOIN #Account AS A ON N.PrimaryKey = A.PrimaryKey
SELECT
*
FROM
#Account AS A
ORDER BY
A.AssignedID
/* Results:
PrimaryKey AssignedID
----------- ------------------------------------
124 1CC7F0F1-7EDE-4F7F-B0A3-739D74A62390
194 1CC7F0F1-7EDE-4F7F-B0A3-739D74A62390
351 1CC7F0F1-7EDE-4F7F-B0A3-739D74A62390
788 2A58A573-EDCB-428E-A87A-6BFCED265A9C
200 2A58A573-EDCB-428E-A87A-6BFCED265A9C
127 2A58A573-EDCB-428E-A87A-6BFCED265A9C
14 2A58A573-EDCB-428E-A87A-6BFCED265A9C
100 FD8036DA-0E15-453E-8A59-FA3C2BDB8FB1
154 FD8036DA-0E15-453E-8A59-FA3C2BDB8FB1
345 FD8036DA-0E15-453E-8A59-FA3C2BDB8FB1
*/
The ordering of the ROW_NUMBER() function will determine how ID's are assigned.

You could potentially do this by using the ROW_NUMBER() field in a subquery; for example:
SELECT NEWID() as id, 'OwnerReassign' as name, 1 as TypeId,
'MyOrganisation' as OrgName,
'07DA8E53-74BD-459C-AF94-A037897A51E3' as SystemUserId,
0 as StatusId, GETDATE() as CreatedAt,
case B / ##ROWCOUNT
when 0 then '0C01C994-1205-E511-988E-26EE4189191B'
when 1 then '12345677-1205-E511-988E-26EE4189191B'
when 2 then '66666666-1205-E511-988E-26EE4189191B'
etc...
end
FROM
(
SELECT ROW_NUMBER() OVER (ORDER BY A.Id)
FROM Account A
WHERE OwnerIdName IN ('John Smith') AND New_AccountType = 1
) AS B
If you want the system to pick those values then you could put then in their own temporary table, too.

SQL Query - Multiple Table Join With Grouping Functions that Keep Branch Structure

I have exhausted my search for a solution and would like to post my question to see if a solution exists.
I need to write a report to show the debits and credits per branch. The report needs also show if branches have had no DBs or CRs.
For simplicity I have scaled down my tables to try highlight my issue.
My first table holds my Branch Data
BranchNo BranchName
1 Main
2 Mgorogoro
3 Arusha
My second table holds all Debit Transactions
txid Narrative Amount Date BranchNo
1 Test 1 50.00 2014/11/26 1
2 Test 2 20.00 2014/11/27 3
I've written a SQL statement that gives me the results I need:-
DECLARE #get_Dates CURSOR;
DECLARE #Date VarChar(10);
DECLARE #tbl TABLE
(
DebitOutCount int,
BranchCode VarChar(250),
TxDate VarChar(10)
)
--DECLARE #tbl TABLE(Idx1 VarChar(50), Idx8 VarChar(50), Idx3 VarChar(50))
SET #get_Dates = CURSOR FOR
Select Debits_OUT.Date FROM Debits_OUT GROUP BY Debits_OUT.Date ORDER BY Debits_OUT.Date
OPEN #get_Dates;
FETCH NEXT FROM #get_Dates into #Date;
WHILE (##FETCH_STATUS = 0)
BEGIN
--INSERT INTO #tbl SELECT Idx1, Idx8, Idx3 FROM SessionDailyControl WHERE Idx1 = #sessionId
INSERT INTO #tbl
SELECT
(SELECT ISNULL(SUM(DB_OUT.Amount), 0) FROM Debits_OUT AS DB_OUT WHERE B.BranchNo = DB_OUT.BranchNo AND DB_OUT.Date = #Date) AS DebitOutValue,
CAST(B.BranchNo As VarChar(10)) + ' ' + B.BranchName As [Branch Names],
#Date
From exBranches As B
FETCH NEXT FROM #get_Dates into #Date
END
CLOSE #get_Dates
DEALLOCATE #get_Dates
SELECT * FROM #tbl
The result is in the format that I need:-
DebitOutCount BranchCode TxDate
50 1 Main 2014/11/26
0 2 Mgorogoro 2014/11/26
0 3 Arusha 2014/11/26
0 1 Main 2014/11/27
0 2 Mgorogoro 2014/11/27
20 3 Arusha 2014/11/27
However, the report tools and Views cannot work with the above. I have tried Left Joins - but the problem is the result set will not keep the branch numbers for dates where there were zero transactions. For Example:-
SELECT
ISNULL(SUM(B.Amount), 0),
CAST(A.BranchNo As VarChar(10)) + ' ' + A.BranchName As [Branch Names],
B.Date
From exBranches As A
LEFT JOIN Debits_OUT AS B ON A.BranchNo = B.BranchNo
GROUP BY B.Date, A.BranchNo, A.BranchName
ORDER BY B.Date, A.BranchNo, A.BranchName
Returns:-
DB_OUT Branch Names Date
0.00 2 Mgorogoro NULL
50.00 1 Main 2014/11/26
20.00 3 Arusha 2014/11/27
In all the JOIN combinations that I try, I cannot get the branches to show ALL the branches for each date that is in the debits table.
Is there a fundamental concept that I have completely missed? I need have a query that can be run in a view that returns the same data as the cursor statement. Is this possible?

The idea is to generate possible combinations of Branches and dates first:
create table exBranches(
BranchNo int,
BranchName varchar(20)
)
create table Debits_OUT(
txId int,
Narrative varchar(20),
Amount decimal (6,2),
[Date] date,
BranchNo int
)
insert into exBranches values (1, 'Main'), (2, 'Mgorogoro'), (3, 'Arusha')
insert into Debits_OUT values (1, 'Test 1', 50.00, '20141126', 1), (2, 'Test 2', 20.00, '20141127', 3);
with BranchDate as(
select
b.BranchNo,
b.BranchName,
d.Date
from exBranches b
cross join (
select distinct [Date] from Debits_OUT
)d
)
select
isnull(DebitOutCount,0),
cast(b.BranchNo as varchar(10)) + ' ' + b.BranchName as BranchName,
b.Date
from BranchDate b
left join (
select
branchNo,
[Date],
sum(Amount) as DebitOutCount
from Debits_OUT
group by
BranchNo, [Date]
)d
on d.BranchNo = b.BranchNo
and d.Date = b.Date
order by b.date, b.BranchNo asc
drop table exBranches
drop table Debits_OUT

Try This it's works.....
select BranchName,amount,date1,BranchNo into #temp from exBranches
cross join (select distinct date1,amount from Debits_OUT)a
select isnull(t.amount,0),a.BranchName,a.date1 from #temp a
left join Debits_OUT t on t.BNo=a.BranchNo and a.date1=t.date1
order by date1
view here..
http://sqlfiddle.com/#!3/ad815/1

Develop Reference

c reactjs sql-server angularjs arrays wpf database batch-file google-app-engine silverlight

Grouping based on the match percentage - sql-server

Related

How to do this in SQL Server query instead of function?

What is the most effcient way to replace values in a specific column of a table for this specific scenario?

Assign same Group ID to ID with same values - Recursive sql query

Variable within SQL query

SQL Query - Multiple Table Join With Grouping Functions that Keep Branch Structure

Categories

Resources