How to clean embedded quotes from CSV files with line breaks
Wednesday, March 31, 2010
I built this to use within an SSIS import:
Dim blnLastFullLine As Boolean = False
Dim blnThisFullLine As Boolean = False
Public Sub Main()
oneFile("file.csv")
Dts.TaskResult = Dts.Results.Success
End Sub
Function oneFile(ByVal file As String) As String
Dim sReader As New System.IO.StreamReader(file)
Dim sWriter As New System.IO.StreamWriter(Replace(file, ".csv", "Clean.csv"))
Do
sWriter.WriteLine(fixQuotes(sReader.ReadLine()))
Loop Until sReader.Peek = -1
sWriter.Close()
sReader.Close()
End Function
Function fixQuotes(ByVal str As String) As String
Dim delim As String = """" ' "
Dim qiStr As String = Chr(34) & "," & Chr(34) ' ","
Dim doubleQuote As String = Chr(34) & Chr(34) ' ""
Dim strStart, strEnd As String
strStart = ""
strEnd = ""
If Left(str, delim.Length) = delim And Left(str, qiStr.Length) <> qiStr And blnLastFullLine Then
str = Right(str, str.Length - delim.Length)
strStart = delim
End If
If Right(str, 2) = "," & delim Then
If Right(str, 3) = qiStr Then
blnThisFullLine = False
Else
blnThisFullLine = True
End If
Else
If (Right(str, 2) = doubleQuote And Right(str, 3) <> "," & doubleQuote) And Right(str, 3) <> delim & doubleQuote Then
blnThisFullLine = False
Else
blnThisFullLine = True
End If
End If
If Right(str, delim.Length) = delim And Right(str, qiStr.Length) <> qiStr And blnThisFullLine Then
str = Left(str, str.Length - delim.Length)
strEnd = delim
blnLastFullLine = True
Else
blnLastFullLine = False
End If
If str.Length > 2 Then
str = Replace(str, qiStr, "|&|")
str = Replace(str, Chr(34), """)
fixQuotes = strStart & Replace(str, "|&|", qiStr) & strEnd
End If
End Function
Comment on this article