central attachment/file table design - sql-server

UPDATE:
I have generated below tables from sql server.
here is the screen shots of my design:
shows the data after combining three tables, getting total of 12 rows
my request table has 4 rows and attachment table has 3 rows
CREATE TABLE [dbo].[attachment](
[attach_id] [int] IDENTITY(1,1) NOT NULL,
[process_type_id] [int] NULL,
[attach_content_type] [varchar](100) NULL,
[attach_name] [varchar](100) NULL,
CONSTRAINT [PK_attachment] PRIMARY KEY CLUSTERED
(
[attach_id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
CREATE TABLE [dbo].[process_type](
[process_type_id] [int] IDENTITY(1,1) NOT NULL,
[process_type] [varchar](100) NULL,
CONSTRAINT [PK_process_type] PRIMARY KEY CLUSTERED
(
[process_type_id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
CREATE TABLE [dbo].[request](
[request_Id] [int] IDENTITY(1,1) NOT NULL,
[request_desc] [varchar](200) NULL,
[process_type_id] [int] NOT NULL,
CONSTRAINT [PK_request] PRIMARY KEY CLUSTERED
(
[request_Id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
CREATE TABLE [dbo].[request_review](
[review_request_Id] [int] IDENTITY(1,1) NOT NULL,
[review_desc] [varchar](200) NULL,
[process_type_id] [int] NULL,
CONSTRAINT [PK_request_review] PRIMARY KEY CLUSTERED
(
[review_request_Id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
I am busy building a database, and now I have a question about my design.
I have got the following tables:
Customer
Employee
upload_doc
I want to be able to add multiple attachments to Customers, Employees
what is the best design:
design #1
Upload_doc
upload_doc_id (NOT auto generated)
upload_doc_name (uploaded doc name)
upload_content_type
upload_doc_data (blob)
employee
employee_id
first_name
middle_name
last_name
upload_doc_id
customer
customer_id
first_name
last_name
upload_doc_id
design #2
Upload_doc
upload_doc_id (AUTO generated)
upload_doc_name (uploaded doc name)
upload_content_type
upload_doc_data (blob)
employee_id
customer_id
employee
employee_id
first_name
middle_name
last_name
customer
customer_id
first_name
last_name

Design one, but would make upload_doc_id an auto gen id column.
If you are using MS SQL, I would use FILETABLE instead of blobs. Files in MS SQL

The info in your question is somewhat limited, but I'll give it a quick attempt. Personally I'd have a common table for personal information like: names and contact info.
Then have a type lookup table that defines what type of person/contact they are: customer / employee.
Then you can link docs to the common person table. Something like the below, which you can run as is:
CREATE TABLE #temp_person -- common information like name & contact info
(
PersonId INT ,
FirstName VARCHAR(20) ,
MiddleName VARCHAR(20) ,
LastName VARCHAR(20) ,
PersonTypeId INT -- lookup value to person type table
)
CREATE TABLE #temp_person_type -- determines if they are employees or customer
(
PersonTypeId INT ,
PersonType VARCHAR(10)
)
CREATE TABLE #temp_employee
(
EmployeeId INT ,
PersonId INT -- linked to person table
-- other fields would be employee specific
)
CREATE TABLE #temp_customer
(
CustomerId INT ,
PersonId INT -- linked to person table
-- other fields would be customer specific
)
CREATE TABLE #temp_upload_doc
(
UploadDocId INT ,
PersonId INT , -- linked to person table
DocName VARCHAR(20)
)
INSERT INTO #temp_person_type
( PersonTypeId, PersonType )
VALUES ( 1, 'Employee' ),
( 2, 'Customer' )
INSERT INTO #temp_person
( PersonId, FirstName, MiddleName, LastName, PersonTypeId )
VALUES ( 1, 'Bob', 'Jon', 'Smith', 1 ),
( 2, 'David', '', 'Jones', 1 ),
( 3, 'Andy', '', 'Johnson', 2 ),
( 4, 'Richard', '', 'Branson', 2 )
INSERT INTO #temp_customer
( CustomerId, PersonId )
VALUES ( 1, 3 ),
( 2, 4 )
INSERT INTO #temp_employee
( EmployeeId, PersonId )
VALUES ( 1, 1 ),
( 2, 2 )
INSERT INTO #temp_upload_doc
( UploadDocId, PersonId, DocName )
VALUES ( 1, 1, 'Doc 1' ),
( 2, 1, 'Doc 2' ),
( 3, 2, 'Doc blah' ),
( 4, 3, 'Doc dog' ),
( 5, 3, 'Doc images' ),
( 6, 4, 'Doc another' ),
( 7, 4, 'Doc la la la' )
-- EMPLOYEES AND DOCS
SELECT *
FROM #temp_person p
INNER JOIN #temp_person_type pt ON pt.PersonTypeId = p.PersonTypeId
INNER JOIN #temp_employee e ON e.PersonId = p.PersonId
INNER JOIN #temp_upload_doc u ON u.PersonId = e.PersonId
WHERE p.PersonTypeId = 1
-- CUSTOMERS AND DOCS
SELECT *
FROM #temp_person p
INNER JOIN #temp_person_type pt ON pt.PersonTypeId = p.PersonTypeId
INNER JOIN #temp_customer c ON c.PersonId = p.PersonId
INNER JOIN #temp_upload_doc u ON u.PersonId = c.PersonId
WHERE p.PersonTypeId = 2
DROP TABLE #temp_customer
DROP TABLE #temp_employee
DROP TABLE #temp_person
DROP TABLE #temp_upload_doc
DROP TABLE #temp_person_type

Related

SQL Server XML Query Hierarchy not as awaited

There are five or more database tables, that are related to each other like in the following database schema:
Here is the code for creating them:
-- Table 1
CREATE TABLE [dbo].[Table1](
[Id] [INT] NOT NULL,
[Title] [NCHAR](10) NOT NULL,
[Annotation] [NCHAR](10) NOT NULL,
CONSTRAINT [PK_Table1] PRIMARY KEY CLUSTERED
(
[Id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY]
GO
-- Table 2 referencing Table 1
CREATE TABLE [dbo].[Table2](
[Id] [INT] NOT NULL,
[Table1_Id] [INT] NOT NULL,
[Title] [NCHAR](10) NOT NULL,
CONSTRAINT [PK_Table2] PRIMARY KEY CLUSTERED
(
[Id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY]
GO
ALTER TABLE [dbo].[Table2] WITH CHECK ADD CONSTRAINT [FK_Table2_Table1] FOREIGN KEY([Table1_Id])
REFERENCES [dbo].[Table1] ([Id])
GO
ALTER TABLE [dbo].[Table2] CHECK CONSTRAINT [FK_Table2_Table1]
GO
-- Table 2_1 referencing Table 2
CREATE TABLE [dbo].[Table2_1](
[Id] [INT] NOT NULL,
[Table2_Id] [INT] NOT NULL,
[Title] [NCHAR](10) NOT NULL,
CONSTRAINT [PK_Table2_1] PRIMARY KEY CLUSTERED
(
[Id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY]
GO
ALTER TABLE [dbo].[Table2_1] WITH CHECK ADD CONSTRAINT [FK_Table2_1_Table2] FOREIGN KEY([Table2_Id])
REFERENCES [dbo].[Table2] ([Id])
GO
ALTER TABLE [dbo].[Table2_1] CHECK CONSTRAINT [FK_Table2_1_Table2]
GO
-- Table 3 referencing Table 1
CREATE TABLE [dbo].[Table3](
[Id] [INT] NOT NULL,
[Table1_Id] [INT] NOT NULL,
[Title] [NCHAR](10) NOT NULL,
CONSTRAINT [PK_Table3] PRIMARY KEY CLUSTERED
(
[Id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY]
GO
ALTER TABLE [dbo].[Table3] WITH CHECK ADD CONSTRAINT [FK_Table3_Table1] FOREIGN KEY([Table1_Id])
REFERENCES [dbo].[Table1] ([Id])
GO
ALTER TABLE [dbo].[Table3] CHECK CONSTRAINT [FK_Table3_Table1]
GO
-- Table 3_1 referencing Table 3
CREATE TABLE [dbo].[Table3_1](
[Id] [INT] NOT NULL,
[Table3_Id] [INT] NOT NULL,
[Title] [NCHAR](10) NOT NULL,
CONSTRAINT [PK_Table3_1] PRIMARY KEY CLUSTERED
(
[Id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY]
GO
ALTER TABLE [dbo].[Table3_1] WITH CHECK ADD CONSTRAINT [FK_Table3_1_Table3] FOREIGN KEY([Table3_Id])
REFERENCES [dbo].[Table3] ([Id])
GO
ALTER TABLE [dbo].[Table3_1] CHECK CONSTRAINT [FK_Table3_1_Table3]
GO
Now I add the following record samples to the table:
INSERT INTO table1 VALUES (1, 'FirstTitle', 'FirstAnno')
INSERT INTO table2 VALUES (1, 1, 'Tab2Title')
INSERT INTO table3 VALUES (1, 1, 'Tab3Title')
INSERT INTO table2_1 VALUES (1, 1, 'Tab21Sub')
INSERT INTO table3_1 VALUES (1, 1, 'Tab31Sub')
Querying this tables with a JOIN FOR XML like
SELECT * FROM Table1 AS T1
JOIN Table2 AS T2 ON T1.Id = T2.Table1_Id
JOIN Table3 AS T3 ON T1.Id = T3.Table1_Id
JOIN Table2_1 AS T21 ON T2.Id = T21.Table2_Id
JOIN Table3_1 AS T31 ON T3.Id = T31.Table2_Id
FOR XML AUTO
will end in this result
<T1 Id="1" Title="FirstTitle" Annotation="FirstAnno ">
<T2 Id="1" Table1_Id="1" Title="Tab2Title ">
<T3 Id="1" Table1_Id="1" Title="Tab3Title ">
<T21 Id="1" Table2_Id="1" Title="Tab21Sub ">
<T31 Id="1" Table3_Id="1" Title="Tab31Sub " />
</T21>
</T3>
</T2>
</T1>
while I'm expecting this
<T1 Id="1" Title="FirstTitle" Annotation="FirstAnno ">
<T2 Id="1" Table1_Id="1" Title="Tab2Title ">
<T21 Id="1" Table2_Id="1" Title="Tab21Sub" />
</T2>
<T3 Id="1" Table1_Id="1" Title="Tab3Title ">
<T31 Id="1" Table3_Id="1" Title="Tab31Sub" />
</T3>
</T1>
So how can I modify the query, perhaps making subqueries to get the expected result, sorting Table 3 at the same level and not beneath table2, and also sorting the childs of table2_1 and table3_1 beneath there parents?
Given your hierarchical example I've fleshed out the example data to include some more subitems...
INSERT INTO table1 VALUES (1, 'FirstTitle', 'FirstAnno');
INSERT INTO table2 VALUES (1, 1, 'Tab2Title1');
INSERT INTO table2_1 VALUES (1, 1, 'Tab21Sub1.1');
INSERT INTO table2_1 VALUES (2, 1, 'Tab21Sub1.2');
INSERT INTO table2_1 VALUES (3, 1, 'Tab21Sub1.3');
INSERT INTO table2 VALUES (2, 1, 'Tab2Title2');
INSERT INTO table2_1 VALUES (4, 2, 'Tab21Sub2.1');
INSERT INTO table2_1 VALUES (5, 2, 'Tab21Sub2.2');
INSERT INTO table3 VALUES (1, 1, 'Tab3Title1');
INSERT INTO table3_1 VALUES (1, 1, 'Tab31Sub');
INSERT INTO table3 VALUES (2, 1, 'Tab3Title2');
If you use FOR XML AUTO and correlated subqueries that return FOR XML AUTO, TYPE such as the following:
SELECT T1.*,
(
SELECT T2.*,
(
SELECT T21.*
FROM Table2_1 AS T21
WHERE T2.Id = T21.Table2_Id
FOR XML AUTO, TYPE
)
FROM Table2 AS T2
WHERE T1.Id = T2.Table1_Id
FOR XML AUTO, TYPE
),
(
SELECT T3.*,
(
SELECT T31.*
FROM Table3_1 AS T31
WHERE T3.Id = T31.Table3_Id
FOR XML AUTO, TYPE
)
FROM Table3 AS T3
WHERE T1.Id = T3.Table1_Id
FOR XML AUTO, TYPE
)
FROM Table1 AS T1
FOR XML AUTO;
You can return nested XML data such as the following:
<T1 Id="1" Title="FirstTitle" Annotation="FirstAnno ">
<T2 Id="1" Table1_Id="1" Title="Tab2Title1">
<T21 Id="1" Table2_Id="1" Title="Tab21Sub1.1"/>
<T21 Id="2" Table2_Id="1" Title="Tab21Sub1.2"/>
<T21 Id="3" Table2_Id="1" Title="Tab21Sub1.3"/>
</T2>
<T2 Id="2" Table1_Id="1" Title="Tab2Title2">
<T21 Id="4" Table2_Id="2" Title="Tab21Sub2.1"/>
<T21 Id="5" Table2_Id="2" Title="Tab21Sub2.2"/>
</T2>
<T3 Id="1" Table1_Id="1" Title="Tab3Title1">
<T31 Id="1" Table3_Id="1" Title="Tab31Sub "/>
</T3>
<T3 Id="2" Table1_Id="1" Title="Tab3Title2"/>
</T1>

MS SQL Server: Column name or number of supplied values does not match table definition

My table definition:
CREATE TABLE [dbo].[Action](
[ActionId] [int] IDENTITY(1,1) NOT NULL,
[ActionType] [nvarchar](50) NOT NULL,
[Initiator] [nvarchar](256) NOT NULL,
[Date] [datetime] NOT NULL,
CONSTRAINT [PK_Action] PRIMARY KEY CLUSTERED
(
[ActionId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY]
Code I try to execute:
create trigger delete_on_titles on titles
after delete
as
begin
insert into Action (ActionType, Initiator, Date) values ('Add', USER_NAME(), getdate());
declare #id int = ##identity;
insert into Old_titles select * from deleted
update Old_titles set ActionId = #id where ActionId is null
end
But I get the error:
Msg 213, Level 16, State 1, Procedure delete_on_titles, Line 15 [Batch Start Line 2]
Column name or number of supplied values does not match table definition.
at the line "insert into Action (ActionType, Initiator, Date) values ('Add', USER_NAME(), getdate());"
What am I doing wrong?
UPDATED:
Thanks to #Larnu. Yes, the problem was in the line:
insert into Old_titles select * from deleted
I changed it to:
insert into Old_titles (
title_id, title, type, pub_id, price,
advance, royalty, ytd_sales, notes, pubdate)
select * from deleted
where title_id, title, type, pub_id, price, advance, royalty, ytd_sales, notes, pubdate are columns of the table deleted, and it works.

How to add fields from Items Table to Union all to table ItemOtherCode

Query is supposed to collect original items (from Items table) and alternative items (from ItemOtherCode table):
SELECT ItemCode,SelPrice1Default, ItemAraName, ItemLatName,
ItemNotes, UnitCode, ItemClassCode, ItemGroupCode,
ItemSubGroupCode, TaxSet, ExpireDate, ItemType, ItemEquation,
ItemDim, NotActive, UnitCode1
FROM dbo.Items
where ItemCode= 10003
union all
select OtherCode,BarcodeUnitPrice
from ItemOtherCode
where ItemCode= 10003
But when I make this union all of Items table and ItemOtherCode table, I get an error:
All queries combined using a UNION, INTERSECT or EXCEPT operator must have an
equal number of expressions in their target lists.
How to solve this problem?
DDL
Items Table
CREATE TABLE [dbo].[Items](
[ItemCode] [nvarchar](20) NOT NULL,
[ItemAraName] [nvarchar](100) NULL,
[ItemLatName] [nvarchar](100) NULL,
[ItemNotes] [nvarchar](100) NULL,
[UnitCode] [int] NOT NULL,
[ItemClassCode] [int] NULL,
[ItemGroupCode] [int] NULL,
[ItemSubGroupCode] [int] NULL,
[TaxSet] [float] NOT NULL,
[ExpireDate] [bit] NOT NULL,
[ItemType] [int] NULL,
[ItemEquation] [nvarchar](50) NULL,
[ItemDim] [int] NULL,
[NotActive] [bit] NOT NULL,
[UnitCode1] [int] NULL,
[BuyPriceDefault] [float] NOT NULL,
[PriceTypeCode] [int] NULL,
[SelPrice1Default] [float] NOT NULL,
[SelPrice2Default] [float] NOT NULL,
CONSTRAINT [PK_Items] PRIMARY KEY CLUSTERED
(
[ItemCode] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
ItemOtherCode Table
CREATE TABLE [dbo].[ItemOtherCode](
[ItemCode] [nvarchar](20) NOT NULL,
[OtherCode] [nvarchar](20) NOT NULL,
[BarcodeUnitPrice] [float] NULL,
CONSTRAINT [PK_ItemOtherCode] PRIMARY KEY CLUSTERED
(
[ItemCode] ASC,
[OtherCode] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
Relation between them:
SELECT *
FROM dbo.Items i
INNER JOIN dbo.ItemOtherCode o ON o.ItemCode = i.ItemCode
UNION requires same count of fields and same columns name in both queries:
SELECT FIELD1, FIELD2, FIELD3 FIELD4
FROM TABLE1
UNION ALL
SELECT OTHER_FIELD1 AS FIELD1, OTHER_FIELD2 AS FIELD2, '' AS FIELD3, 0 AS FIELD4
FROM TABLE2
I surmise that [OtherCode] also is supposed to refer to parent Items table.
What you have tried:
SELECT OtherCode,BarcodeUnitPrice
FROM ItemOtherCode
WHERE ItemCode= 10003
or the same with join as shown in your question for FK explanation:
SELECT *
FROM dbo.Items i
INNER JOIN dbo.ItemOtherCode o ON o.ItemCode = i.ItemCode
WHERE i.ItemCode = 10003
Now you have found all the alternative items of 10003. But you need info about those items. Where is info about items stored? In Items table of course. So we need to go there once again but with different ItemCode values:
SELECT io.*
FROM dbo.Items i
INNER JOIN dbo.ItemOtherCode o ON o.ItemCode = i.ItemCode
INNER JOIN dbo.Items io on io.ItemCode = o.OtherCode --<<<
WHERE i.ItemCode = 10003
now we have all fields from Items table for all alternative items of 10003. Can do union now.
SELECT ItemCode,SelPrice1Default, ItemAraName, ItemLatName,
ItemNotes, UnitCode, ItemClassCode, ItemGroupCode,
ItemSubGroupCode, TaxSet, ExpireDate, ItemType, ItemEquation,
ItemDim, NotActive, UnitCode1
FROM dbo.Items i
WHERE i.ItemCode = 10003
UNION ALL
SELECT ItemCode,SelPrice1Default, ItemAraName, ItemLatName,
ItemNotes, UnitCode, ItemClassCode, ItemGroupCode,
ItemSubGroupCode, TaxSet, ExpireDate, ItemType, ItemEquation,
ItemDim, NotActive, UnitCode1
FROM dbo.ItemOtherCode o
INNER JOIN dbo.Items io on io.ItemCode = o.OtherCode
WHERE o.ItemCode = 10003
Don't know what is BarcodeUnitPrice supposed to mean so can't say what to do with it.

SQL Server what indexes to create

I have a simple table:
CREATE TABLE DocModHistory
[ID] [int] IDENTITY(1,1) NOT FOR REPLICATION NOT NULL,
[DocID] [int] NOT NULL,
[BranchID] [int] NOT NULL,
[UserID] [int] NOT NULL,
[InsDate] [datetime] NOT NULL,
[Type] [int] NOT NULL,
CONSTRAINT [PK_DocModHistory] PRIMARY KEY CLUSTERED
(
[ID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
and I have two select queries:
SELECT dh.BranchID, MAX(dh.ID) as MaxID FROM DocModHistory dh WHERE dh.UserID = #p_UserID GROUP BY dh.BranchID
and
SELECT dh.DocID, MAX(dh.ID) as MaxID FROM DocModHistory dh WHERE dh.UserID = #p_UserID GROUP BY dh.DocID
Could you tell me please what indexes should I create?
Shall I create individual indexes for UserID, BranchID, DocID, or do I need multi-column indexes?
Thank you!
Create an index for UserID with Included columns BranchID, ID and DocID
So something like
CREATE INDEX IX_UserID ON DocModHistory (UserID) INCLUDE (BranchID, ID ,DocID);

Display rows when scrolling as twitter, using a stored procedure

I have a site that displays posts. I want the site's scrolling to behave like twitter - scrolling down will display more and more posts, endlessly.
Suppose I have the following tables:
A Post table to hold all the posts. Every post is related to a single person
CREATE TABLE [dbo].[Post](
[Id] [bigint] IDENTITY(1,1) NOT NULL,
[PersonId] [int] NOT NULL,
[PublishDate] [datetime] NOT NULL,
CONSTRAINT [PK_Post] PRIMARY KEY CLUSTERED
(
[Id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF,
ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
A PostTag table to hold all the related tags of each post.
CREATE TABLE [dbo].[PostTag](
[PostId] [bigint] NOT NULL,
[TagId] [int] NOT NULL,
CONSTRAINT [PK_PostTag] PRIMARY KEY CLUSTERED
(
[PostId] ASC,
[TagId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF,
ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
For each user of the site, the UserPersonStatistics table holds the number of times he showed interest in a person related post.
CREATE TABLE [dbo].[UserPersonStatistics](
[UserId] [bigint] NOT NULL,
[PersonId] [int] NOT NULL,
[Counter] [bigint] NOT NULL,
CONSTRAINT [PK_UserPersonStatistics] PRIMARY KEY CLUSTERED
(
[UserId] ASC,
[PersonId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF,
ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
For each user of the site, the UserPostStatistics table holds the number of times he showed interest in a post.
CREATE TABLE [dbo].[UserPostStatistics](
[UserId] [bigint] NOT NULL,
[PostId] [bigint] NOT NULL,
[Counter] [bigint] NOT NULL,
CONSTRAINT [PK_UserPostStatistics] PRIMARY KEY CLUSTERED
(
[UserId] ASC,
[PostId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF,
ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
For each user of the site, the UserTagStatistic table holds the number of times he showed interest in a tag related post.
CREATE TABLE [dbo].[UserTagStatistics](
[UserId] [bigint] NOT NULL,
[TagId] [int] NOT NULL,
[Counter] [bigint] NOT NULL,
CONSTRAINT [PK_UserTagStatistics] PRIMARY KEY CLUSTERED
(
[UserId] ASC,
[TagId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF,
ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
What I need is a stored procedure that for each user returns 35 different posts each time, that "remembers" the last 35 post so it will not return the same posts again, and the 35 posts should be consist of:
15 posts for the most popular tag (UserTagStatistics)
15 posts for the most popular person (UserPersonStatistics)
5 most popular posts (UserPostStatistics)
One problem is that the procedure should return 35 different posts each time.
One more problem is that a post can return once as the most popular post, once as a post of the most popular tag, and once as a post for the most popular person. This post should be counted once, not three times.
The performance of the stored procedure is crucial.
I know its a very complicated question.
Any thoughts are appreciated.
kruvi
Add a "LastViewed" datetime field to all tables then use a proc like this. For performance, just make sure to have an index on UserID+LastViewed+Counter and UserID+PersonID for each of the three tables and it should scream. Actually, since UserID+LastViewed+Counter is practically the whole table, if possible I'd recommend you make it the clustered index on each of your tables so that you avoid creating that second index which would basically be the same size as the raw table.
create proc GetInfo(#UserId bigint) as
begin
update userpersonstatistics
set
lastviewed=getdate()
where
userid=#UserID and personid in
(
select top 15 personid from userpersonstatistics
where
userid=#UserID and
(
lastviewed is null or lastviewed !=
(select max(lastviewed) from userpersonstatistics
where userid=#UserID)
)
order by counter desc
)
select * from UserPersonStatistics
where UserID=#UserID and LastViewed =
(select max(lastviewed) from UserTagStatistics)
--**Repeat the above code for UserPostStatistics and UserTagStatistics
end
REVISED PROC BASED ON INPUT:
create proc GetInfo(#UserId bigint) as
begin
declare #lastviewed datetime
declare #results TABLE
(
StatType varchar(10),
Counter int,
PostID
)
set #lastviewed = getdate()
--Person
insert into #results(StatType,Counter,PostID)
select
'Person',counter,PostID
from
UserPersonStatistics
where
userid=#UserID and personid in
(
select top 35 personid from userpersonstatistics
where
userid=#UserID and
(
lastviewed is null or lastviewed !=
(select max(lastviewed) from userpersonstatistics
where userid=#UserID)
)
order by counter desc
)
--Post
insert into #results(StatType,Counter,PostID)
select
'Post',counter,PostID
from
UserPostStatistics
where
userid=#UserID and Postid in
(
select top 35 Postid from userPoststatistics
where
userid=#UserID and
(
lastviewed is null or lastviewed !=
(select max(lastviewed) from userPoststatistics
where userid=#UserID)
)
order by counter desc
)
--Tag
insert into #results(StatType,Counter,TagID)
select
'Tag',counter,TagID
from
UserTagStatistics
where
userid=#UserID and Tagid in
(
select top 35 Tagid from userTagstatistics
where
userid=#UserID and
(
lastviewed is null or lastviewed !=
(select max(lastviewed) from userTagstatistics
where userid=#UserID)
)
order by counter desc
)
--At this point you could have 105 rows of the various types (35*3).
--You can use whatever algorithm you need to decide the top 35.
--That may include some weighting.
--You may want to consider using the Rank() function.
end
If your algorithm should consider the #1 top counter from each category before the #2's, take a look at the Rank() function.

Resources