Insert files from folder (OPENROWSET(BULK...)) into table with details - database

I'm trying to copy multiple files from specific folder and subfolders into MS SQL 2012. I want to get file details and import them in database as well. I want to get following file details:
Date Created
Date Modified
Size
This is the query that I have:
DECLARE #sql nvarchar(max)
, #filename nvarchar(256)
, #filepath varchar(100)
DECLARE #FileList TABLE (filename nvarchar(256), depth int, fl int)
INSERT INTO #FileList
EXEC xp_dirtree #filepath, 10, 1;
DECLARE curDoc CURSOR FOR
SELECT filename FROM #FileList WHERE fl = 1
OPEN curDoc FETCH NEXT FROM curDoc INTO #filename
WHILE (##FETCH_STATUS = 0)
BEGIN
SET #sql = 'INSERT INTO _table SELECT * FROM OPENROWSET(BULK ''' + #filepath + #filename + ''', SINGLE_BLOB) as tbl'
PRINT #sql
EXEC sp_executesql #sql
FETCH NEXT FROM curDoc
INTO #filename
END
CLOSE curDoc
DEALLOCATE curDoc

Related

Is there any easy way to import multiple .txt files to SQL server?

I am trying to import 24 .txt files to the server but I have limited privileges. All files start with the same characters, but '*' this masking cannot be used for this query. I use below script.
SELECT * into TABLE
FROM OPENROWSET( BULK 'c:\path\files\good*.txt', FORMATFILE = 'c:\path\files\import.xml',FIRSTROW = 2) AS DATA;
You would need to create the table first and then insert each file using a loop.
try the following script after creating the destination table:
declare #filepath varchar(100)= 'c:\path\files\'
,#pattern varchar(100)= 'good*.txt'
,#TableName varchar(100)= 'TestTable'
DECLARE #query varchar(1000)
DECLARE #numfiles int
DECLARE #filename varchar(100)
DECLARE #files TABLE (SourceFileName varchar(200) NULL)
SET #query = 'master.dbo.xp_cmdshell "dir ' + #filepath+#pattern + ' /b"'
INSERT #files(SourceFileName)
EXEC (#query)
DECLARE CUR CURSOR FAST_FORWARD FOR
SELECT SourceFileName FROM #files WHERE SourceFileName IS NOT NULL
SET #numfiles =0
OPEN CUR
FETCH NEXT FROM CUR INTO #filename
WHILE (##FETCH_STATUS = 0)
BEGIN
print #filename
SET #numfiles+=1
SET #query = ('BULK INSERT ' + #TableName
+ ' FROM ''' + #Filepath+#filename + ''' WITH(
FORMATFILE = ''c:\path\files\import.xml'',
FIRSTROW = 2
);'
)
PRINT #query
EXEC (#query)
FETCH NEXT FROM CUR INTO #filename
END
CLOSE CUR
DEALLOCATE CUR

Inserting PDFs into an SQL table

So I'm trying to read a bunch of PDFs from a folder into an SQL table, saving them in a varbinary(max) field. This is what I thought would work at first:
CREATE TABLE tempFileName(filnavn VARCHAR(100));
INSERT INTO tempFileName
EXEC xp_cmdshell 'dir /B "C:\temp\Test Folder\"';
--------
DECLARE #path VARCHAR(100) SET #path = 'C:\temp\Test Folder\'
DECLARE #pdf VARBINARY(MAX)
DECLARE #navn varchar(50)
DECLARE #fullpath nvarchar(max)
DECLARE #sql nvarchar(max)
DECLARE c CURSOR FOR
SELECT filnavn
FROM tempFileName
OPEN c
FETCH NEXT FROM c INTO #navn
WHILE(##FETCH_STATUS = 0)
BEGIN
SET #fullpath = #path + #navn
SELECT #pdf = BulkColumn
FROM OPENROWSET(BULK #fullpath, SINGLE_BLOB) AS Document;
--print #sql
INSERT INTO pdftest VALUES(#navn, #pdf)
FETCH NEXT FROM c INTO #navn
END
CLOSE c
DEALLOCATE c
But this doesn't work as it won't allow me to use a variable in this line:
FROM OPENROWSET(BULK #fullpath, SINGLE_BLOB) AS Document;
So I'm pretty sure the trick is to make the whole "select #pdf.." line into a string and then execute it, but I'm not sure how to get the output into a table. I've tried something like this:
SET #fullpath = #path + #navn
SET #sql = 'DECLARE #pdf VARBINARY(MAX) SELECT #pdf = BulkColumn
FROM OPENROWSET(BULK ''' + #fullpath + ''' , SINGLE_BLOB) AS Document;'
--print #sql
--SELECT #pdf, DATALENGTH(#pdf)
--INSERT INTO pdftest VALUES(#navn, #pdf)
EXEC sp_executesql #sql, N'#fil varbinary(max) out', #fil out
But the #fil variable is just empty after this. How do I best go about getting these files into a table?
Why not just skip the variable assignment of the SELECT in your loop and use the OPENROWSET function inside your INSERT? The general idea:
INSERT INTO pdftest SELECT #navn, * FROM OPENROWSET(BULK, 'C:\thefile.txt', SINGLE_BLOB) AS document
And of course turn above into dynamic SQL. I'll probably get a few single-quotes wrong here, but again the general idea:
SET #sql =
'INSERT INTO pdftest
SELECT '' + #navn + '', *
FROM OPENROWSET(BULK, ''' + #fullpath + ''', SINGLE_BLOB) AS document
'

Import Multiple Different .txt Files to SQL Server from a Folder

I found so many queries online to import multiple files to SQL Server into one single table from a folder like the one below but no help online on how to insert multiple different files as different tables in SQL Server. I have 21 files (21 for now, might increase with time) and its really very tedious to import each and every file from the folder.
CREATE TABLE ALLFILENAMES(WHICHPATH VARCHAR(255),WHICHFILE varchar(255))
--some variables
declare #filename varchar(255),
#path varchar(255),
#sql varchar(8000),
#cmd varchar(1000)
--get the list of files to process:
SET #path = 'C:\Users\atp1lip\Desktop\09242017\'
SET #cmd = 'dir ' + #path + '*.txt /b'
INSERT INTO ALLFILENAMES(WHICHFILE)
EXEC Master..xp_cmdShell #cmd
UPDATE ALLFILENAMES SET WHICHPATH = #path where WHICHPATH is null
--cursor loop
declare c1 cursor for SELECT WHICHPATH,WHICHFILE FROM ALLFILENAMES where WHICHFILE like '%.txt%'
open c1
fetch next from c1 into #path,#filename
While ##fetch_status <> -1
begin
set #sql = 'BULK INSERT test FROM ''' + #path + #filename + ''' '
+ ' WITH (
FIELDTERMINATOR = '','',
ROWTERMINATOR = ''\n''
) '
print #sql
exec (#sql)
fetch next from c1 into #path,#filename
end
close c1
deallocate c1
I was wondering if its possible to do this? Any help is appreciated. Thanks!
you also can use function xp_dirtree like this:
CREATE TABLE #FilesList(ID INT IDENTITY(1,1), FileName VARCHAR(1000),Depth INT,isFile INT)
INSERT INTO #FilesList
EXEC xp_dirtree #FilePath, 1, 1

sql cursor insert result into a table

I have created a cursor which iterates through all the databases and displays the 1 record per database.
I would like the records to be inserted into 1 table where I can view it. The query may change which is why I don't want to create the table structure for a specific query and insert it. I wanted to use the "select into" clause but that will fail on the second time the cursor runs
DECLARE #DB_Name varchar(100)
DECLARE #Command nvarchar(200)
DECLARE database_cursor CURSOR FOR SELECT name FROM #DBNAME
OPEN database_cursor
FETCH NEXT FROM database_cursor INTO #DB_Name
WHILE ##FETCH_STATUS = 0
BEGIN
SELECT #Command = 'use [' + #DB_Name + '] Select '''+ #DB_Name + ''' ,'+
--Enter query below
'* from authentication where username like ''%clair#indicater%'' and password = ''Rohan2410'''
-- print #Command
EXEC sp_executesql #Command
FETCH NEXT FROM database_cursor INTO #DB_Name
END
CLOSE database_cursor
DEALLOCATE database_cursor
You should better use INSERT INTO ... instead of SELECT INTO, something like this:
DECLARE #DB_Name varchar(100)
DECLARE #Command nvarchar(200)
DECLARE database_cursor CURSOR FOR SELECT name FROM #DBNAME
OPEN database_cursor
FETCH NEXT FROM database_cursor INTO #DB_Name
WHILE ##FETCH_STATUS = 0
BEGIN
SELECT #Command = 'use [' + #DB_Name + ']
IF OBJECT_ID(''tempdb..##output'') IS NULL
BEGIN
SELECT NULL AS DB_Name,*
INTO ##output
FROM authentication WHERE 1=0
END
INSERT INTO ##output
Select '''+ #DB_Name + ''' ,'+
--Enter query below
'* from authentication where username like ''%clair#indicater%'' and password = ''Rohan2410'''
-- print #Command
EXEC sp_executesql #Command
FETCH NEXT FROM database_cursor INTO #DB_Name
END
CLOSE database_cursor
DEALLOCATE database_cursor
SELECT * FROM ##output
DROP TABLE ##output
Basically, on the first cursor iteration we will create an empty temp table with the correct structure. Then we just insert into that temp table.

Import Multiple CSV Files to SQL Server from a Folder

I have a folder called "Dump." This folder consists of various .CSV Files.
The folder Location is 'C:\Dump'
I want to Import the contents of these files into SQL Server.
I want the rough code along with proper comments so that I understand it.
I have tried a few codes that I found on the Net. But they haven't quite worked out for me for some strange reason.
The steps I would like to have are
Step 1: Copy all the File Names in the folder to a Table
Step 2: Iterate through the table and copy the data from the files using Bulk Insert.
Someone do please help me out on this one. Thanks a lot in advance :)
--BULK INSERT MULTIPLE FILES From a Folder
--a table to loop thru filenames drop table ALLFILENAMES
CREATE TABLE ALLFILENAMES(WHICHPATH VARCHAR(255),WHICHFILE varchar(255))
--some variables
declare #filename varchar(255),
#path varchar(255),
#sql varchar(8000),
#cmd varchar(1000)
--get the list of files to process:
SET #path = 'C:\Dump\'
SET #cmd = 'dir ' + #path + '*.csv /b'
INSERT INTO ALLFILENAMES(WHICHFILE)
EXEC Master..xp_cmdShell #cmd
UPDATE ALLFILENAMES SET WHICHPATH = #path where WHICHPATH is null
--cursor loop
declare c1 cursor for SELECT WHICHPATH,WHICHFILE FROM ALLFILENAMES where WHICHFILE like '%.csv%'
open c1
fetch next from c1 into #path,#filename
While ##fetch_status <> -1
begin
--bulk insert won't take a variable name, so make a sql and execute it instead:
set #sql = 'BULK INSERT Temp FROM ''' + #path + #filename + ''' '
+ ' WITH (
FIELDTERMINATOR = '','',
ROWTERMINATOR = ''\n'',
FIRSTROW = 2
) '
print #sql
exec (#sql)
fetch next from c1 into #path,#filename
end
close c1
deallocate c1
--Extras
--delete from ALLFILENAMES where WHICHFILE is NULL
--select * from ALLFILENAMES
--drop table ALLFILENAMES
This will give you separate tables for each file.
--BULK INSERT MULTIPLE FILES From a Folder
drop table allfilenames
--a table to loop thru filenames drop table ALLFILENAMES
CREATE TABLE ALLFILENAMES(WHICHPATH VARCHAR(255),WHICHFILE varchar(255))
--some variables
declare #filename varchar(255),
#path varchar(255),
#sql varchar(8000),
#cmd varchar(1000)
--get the list of files to process:
SET #path = 'D:\Benihana\backup_csv_benihana_20191128032207_part_1\'
SET #cmd = 'dir ' + #path + '*.csv /b'
INSERT INTO ALLFILENAMES(WHICHFILE)
EXEC Master..xp_cmdShell #cmd
UPDATE ALLFILENAMES SET WHICHPATH = #path where WHICHPATH is null
delete from ALLFILENAMES where WHICHFILE is null
--SELECT replace(whichfile,'.csv',''),* FROM dbo.ALLFILENAMES
--cursor loop
declare c1 cursor for SELECT WHICHPATH,WHICHFILE FROM ALLFILENAMES where WHICHFILE like '%.csv%' order by WHICHFILE desc
open c1
fetch next from c1 into #path,#filename
While ##fetch_status <> -1
begin
--bulk insert won't take a variable name, so make a sql and execute it instead:
set #sql =
'select * into '+ Replace(#filename, '.csv','')+'
from openrowset(''MSDASQL''
,''Driver={Microsoft Access Text Driver (*.txt, *.csv)}''
,''select * from '+#Path+#filename+''')'
print #sql
exec (#sql)
fetch next from c1 into #path,#filename
end
close c1
deallocate c1
For Step 1 Maybe you can look at:
http://www.sql-server-performance.com/forum/threads/copying-filenames-to-sql-table.11546/
or
How to list files inside a folder with SQL Server
and then Step 2
How to cast variables in T-SQL for bulk insert?
HTH
You might need to enable the xp_cmdshell first:
sp_configure 'show advanced options', '1'
RECONFIGURE
go
sp_configure 'xp_cmdshell', '1'
RECONFIGURE
go
And, to enable ad_hoc,
sp_configure 'show advanced options', 1;
RECONFIGURE;
GO
sp_configure 'Ad Hoc Distributed Queries', 1;
RECONFIGURE;
GO
To solve step 1, xp_dirtree can also be used to list all files and folders.
Keep in mind that it is an undocumented function. Security precautions must be considered. Intentionally crafted filenames could be an intrusion vector.
In python you can use d6tstack which makes this simple
import d6tstack
import glob
c = d6tstack.combine_csv.CombinerCSV(glob.glob('*.csv'))
c.to_mssql_combine('mssql+pymssql://usr:pwd#localhost/db', 'tablename')
See SQL examples. It also deals with data schema changes, creates table and allows you to preprocess data. It leverages BULK INSERT so should be just as fast.
to expand upon the answer by SarangArd you can replace temp with the following if your file name matches your table name.
' + Left(#filename, Len(#filename)-4) + '
This code will create a new table per CSV file that is imported.
Best to populate empty database from CSV files.
CREATE TABLE ALLFILENAMES
(
WHICHPATH VARCHAR(255)
,WHICHFILE VARCHAR(255)
)
DECLARE #filename VARCHAR(255),
#path VARCHAR(255),
#sql VARCHAR(8000),
#cmd VARCHAR(1000)
SET #path = 'L:\DATA\SOURCE\CSV\' --PATH TO YOUR CSV FILES (CHANGE TO YOUR PATH)
SET #cmd = 'dir ' + #path + '*.csv /b'
INSERT INTO ALLFILENAMES(WHICHFILE)
EXEC Master..xp_cmdShell #cmd
UPDATE ALLFILENAMES
SET WHICHPATH = #path
WHERE WHICHPATH IS NULL
DECLARE c1 CURSOR
FOR SELECT WHICHPATH
,WHICHFILE
FROM ALLFILENAMES
WHERE WHICHFILE LIKE '%.csv%'
OPEN c1
FETCH NEXT FROM c1 INTO #path,
#filename
WHILE ##fetch_status <> -1
BEGIN
CREATE TABLE #Header
(
HeadString NVARCHAR(MAX)
)
DECLARE #Columns NVARCHAR(MAX) = ''
DECLARE #Query NVARCHAR(MAX) = ''
DECLARE #QUERY2 NVARCHAR(MAX) = ''
DECLARE #HeaderQuery NVARCHAR(MAX) = ''
SELECT #HeaderQuery = #HeaderQuery + 'bulk insert #Header from ''' + #path + #filename + '''
with(firstrow=1,lastrow=1)'
EXEC (#HeaderQuery)
SELECT #Columns = (SELECT QUOTENAME(value) + ' nvarchar(max)' + ','
FROM #Header
CROSS APPLY STRING_SPLIT(HeadString,',') FOR xml PATH(''))
IF ISNULL(#Columns,'') <> ''
BEGIN
SET #Columns = LEFT(#Columns,LEN(#Columns) - 1)
SELECT #Query = #Query + 'CREATE TABLE ' + Replace(#filename,'.csv','') + ' (' + replace(#Columns,'"','') + ')'
PRINT #Query
EXEC (#QUERY)
END
SELECT #QUERY2 = #QUERY2 + 'bulk insert ' + replace(Replace(#filename,'.csv',''),'.TPS','') + ' from ''' + #path + #filename + '''
with(firstrow=2,FORMAT=''csv'',FIELDTERMINATOR='','',ROWTERMINATOR=''\n'')'
EXEC (#QUERY2)
DROP TABLE #Header
FETCH NEXT FROM c1 INTO #path,
#filename
END
CLOSE c1
DEALLOCATE c1

Resources