设为首页收藏本站

 找回密码
 注册

QQ登录

只需一步,快速开始

BCM 门户 IT世界 应用开发 查看内容

Read CSV Files with double quotes and comma into SQL DB

2012-6-12 13:04| 发布者: Test| 查看: 3012| 评论: 0|原作者: Brad Schulz|来自: SELECT Blog FROM Brad.Schulz

摘要: In this post, I’d like to talk about Comma-Separated Values in Files (i.e. CSV Files).I had a client with a need to read in a CSV file and do some subsequent processing with it. This was a very small ...
Complete source code

use TempDB  /* Change to database appropriate for you */
go
 
if object_id('usp_ProcessCSVFile','P') is null
begin
  exec('create procedure usp_ProcessCSVFile as select X=1')
end
go
 
/*
==============================================================================================
 
 Procedure usp_ProcessCSVFile
 
 Description/Purpose:
   Read CSV File data in T-SQL.
 
 Parameters:
   @CSVFileName: Fully-qualified filename containing CSV data (visible by the server).
   @HeaderRowExists:  Bit column specifying if Header Row exists in the file (Default=1).
 
 Example:
   declare @t (CustomerID int,CompanyName varchar(40), Street varchar(40))
   insert @t
   exec usp_ProcessCSVFile 'C:\Temp\MyCustomerData.CSV'
 
 For more details:
   http://bradsruminations.blogspot.com/2011/01/so-you-want-to-read-csv-files-huh.html
 
 Jan14,2011-Brad Schulz
   Written
 Feb14,2011-Brad Schulz
   Thanks to Leonid for catching the problem with empty/null columns.  Changed the XPath
   for exracting column data from .value('(col/text())[n]') to .value('(col[n]/text())[1]').
 
==============================================================================================
*/
alter procedure usp_ProcessCSVFile
   @CSVFilename varchar(1000)
  ,@HeaderRowExists bit = 1
as
 
set nocount on
 
/* Acquire the file contents into a variable */
declare @SqlStmt nvarchar(max)
       ,@CSVContents varchar(max)
set @SqlStmt=N'
select @CSVContents=cast(FileContents as varchar(max))
from openrowset(bulk '''+@CSVFilename+''', single_blob) x(FileContents)'
exec sp_executesql @SqlStmt
                  ,N'@CSVContents varchar(max) output'
                  ,@CSVContents output
 
/* Remove any LineFeed characters that may be in the file */
set @CSVContents=replace(@CSVContents
                        ,char(10)
                        ,'') 
 
/*
XML-Encode the contents... For example:
  Converts & to &
  Converts < to &lt;
  Converts > to &gt;
This is done because so that these special characters will be UNencoded later
when the calling program shreds the XML using the .nodes() and .value() functions.
Important Note:
  It will also encode the Carriage Returns (CHAR(13)) separating the rows to &#x0D;
*/
set @CSVContents=(select [*]=@CSVContents
                  for xml path(''))
 
/* Convert all commas to Tab characters */
set @CSVContents=replace(@CSVContents
                        ,','
                        ,char(9))
 
/* Declare some variables */
declare @CurrentPos int                /* Current character pointer */
       ,@DoubleQuotePos1 int           /* Opening Double-Quote Position */
       ,@DoubleQuotePos2 int           /* Closing Double-Quote Position */
       ,@DoubleDoubleQuotePos int      /* Position of any '""' in the string */
       ,@StringColValue varchar(max)   /* Value of a String Column */
       ,@LenStringColValue int         /* and its length */
 
/* Initialize our current character position */
set @CurrentPos=0      
 
/* Loop forever */
while 1=1
begin
 
  /* First find a double-quote beyond our current position */
  set @DoubleQuotePos1=charindex('"'
                                ,@CSVContents
                                ,@CurrentPos+1)
 
  /* If there isn't one to be found, then we're done */
  if @DoubleQuotePos1=0 break
 
  /*
  From that opening double-quote, find the position of another
  double-quote (hopefully the closing one) and, at the same time,
  see if there are any double double-quotes (i.e. '""')
  */
  select @DoubleDoubleQuotePos=charindex('""'
                                        ,@CSVContents
                                        ,@DoubleQuotePos1+1)
        ,@DoubleQuotePos2=charindex('"'
                                   ,@CSVContents
                                   ,@DoubleQuotePos1+1)
 
  /*
  If the positions are the same, that just means the first
  double-quote we found was just a double-double (i.e. '""').
  So we'll continue looking forward until we find our first lone
  double-quote, which will end up being our closing one.
  */
  while @DoubleQuotePos2=@DoubleDoubleQuotePos
  begin
    select @DoubleDoubleQuotePos=charindex('""'
                                          ,@CSVContents
                                          ,@DoubleQuotePos2+2)
          ,@DoubleQuotePos2=charindex('"'
                                     ,@CSVContents
                                     ,@DoubleQuotePos2+2)
  end
 
  /*
  We found the closing double-quote position, so we can
  now pull out string column value.  Let's also save
  the length of that string.
  */
  select @StringColValue=substring(@CSVContents
                                  ,@DoubleQuotePos1
                                  ,@DoubleQuotePos2-@DoubleQuotePos1+1)
        ,@LenStringColValue=@DoubleQuotePos2-@DoubleQuotePos1+1
       
  /*
  Remember we changed all commas to Tab characters earlier?
  Well, if there are any of those Tabs embedded in our
  string column value, we will change them back to commas.
  */
  set @StringColValue=replace(@StringColValue
                             ,char(9)
                             ,',')
 
  /* Get rid of the opening and closing double-quotes */
  set @StringColValue=substring(@StringColValue
                               ,2
                               ,len(@StringColValue)-2)
 
  /* And change all double double-quotes to single double-quotes */
  set @StringColValue=replace(@StringColValue
                             ,'""'
                             ,'"')
 
  /*
  And update our CSV file contents variable so that our
  old string column value is substituted with the new value.
  */
  set @CSVContents=stuff(@CSVContents
                        ,@DoubleQuotePos1
                        ,@LenStringColValue
                        ,@StringColValue)
 
  /*
  Finally, we set our new character position to point to our
  closing double-quote.  Remember that we have to account for our
  substituted string column value being a different size than
  it originally had been.
  */
  set @CurrentPos=@DoubleQuotePos2
                 -@LenStringColValue   /* Subtract Old Length */
                 +len(@StringColValue) /* Add New Length */
 
end
 
/*
Put XML tags into our file contents variable.
Our Tab characters will tell us where to put the column tags, and our Carriage Return
characters (encoded earlier to &#x0D;) will tell us where to put our row tags.
For example (assuming T is a Tab):
  1TMoeTHoward  --->  <row><col>1</col><col>Moe</col><col>Howard</col></row>
*/
set @CSVContents='<row><col>'
               + replace(replace(@CSVContents
                                ,'&#x0D;'
                                ,'</col></row><row><col>')
                        ,char(9)
                        ,'</col><col>')
               + '</col></row>'
 
/* Get rid of any blank rows (most likely at the end of the file */
set @CSVContents=replace(@CSVContents,'<row><col></col></row>','')
 
/*
Pull out the First Row and count the Number of Columns.
If this first row is a Header Row, then remove it from the contents.
*/
declare @FirstRow varchar(max)
       ,@NumColumns int
set @CurrentPos=charindex('</row>',@CSVContents)
set @FirstRow=left(@CSVContents,@CurrentPos+5)
set @NumColumns=len(@FirstRow)
               -len(replace(@FirstRow,'<col>','<xx>'))
if @HeaderRowExists=1
begin
  set @CSVContents=substring(@CSVContents
                            ,@CurrentPos+6
                            ,len(@CSVContents))
end
 
/*
Build the Select List with Column Names (Column1,Column2,etc up to
the appropriate number of columns) and their XML .value extractions.
For example, for 3 columns, @SelectList will contain:
   Column1=XMLNode.value('(col[1]/text())[1]','varchar(100)')
  ,Column2=XMLNode.value('(col[2]/text())[1]','varchar(100)')
  ,Column3=XMLNode.value('(col[3]/text())[1]','varchar(100)')
*/
declare @SelectList nvarchar(max)
;with ColumnCTE as
(
  select ColNumber=cast(1 as int)
  union all
  select ColNumber=cast(ColNumber+1 as int)
  from ColumnCTE
  where ColNumber<@NumColumns
)
select @SelectList=coalesce(@SelectList+nchar(13)+N'      ,',N'')
                  +N'Column'+cast(ColNumber as nvarchar(10))
                  +N'=XMLNode.value(''(col['
                  +cast(ColNumber as nvarchar(10))
                  +N']/text())[1]'',''varchar(100)'')'
from ColumnCTE
 
/* Build the SQL Statement */
set @SqlStmt=N'
select '+@SelectList+N'
from (select XMLData=cast(@CSVContents as xml).query(''.'')) F_XML
cross apply XMLData.nodes(''/row'') F_Node(XMLNode)'
 
/* Execute it */
print @SQLStmt
exec sp_executeSQL @SqlStmt
                  ,N'@CSVContents varchar(max)'
                  ,@CSVContents
 
go


鲜花

握手

雷人

路过

鸡蛋

相关阅读

最新评论

手机版|小黑屋|BC Morning Website ( Best Deal Inc. 001 )  

GMT-8, 2025-12-13 16:15 , Processed in 0.013730 second(s), 18 queries .

Supported by Best Deal Online X3.5

© 2001-2025 Discuz! Team.

返回顶部