Tech Talk

Permanent link to How to clean embedded quotes from CSV files with line breaks How to clean embedded quotes from CSV files with line breaks

Wednesday, March 31, 2010

I built this to use within an SSIS import:

    Dim blnLastFullLine As Boolean = False
    Dim blnThisFullLine As Boolean = False

    Public Sub Main()
        oneFile("file.csv")
        Dts.TaskResult = Dts.Results.Success
    End Sub

    Function oneFile(ByVal file As String) As String
        Dim sReader As New System.IO.StreamReader(file)
        Dim sWriter As New System.IO.StreamWriter(Replace(file, ".csv", "Clean.csv"))
        Do
            sWriter.WriteLine(fixQuotes(sReader.ReadLine()))
        Loop Until sReader.Peek = -1
        sWriter.Close()
        sReader.Close()
    End Function

    Function fixQuotes(ByVal str As String) As String
        Dim delim As String = """"                      ' "
        Dim qiStr As String = Chr(34) & "," & Chr(34)   ' ","
        Dim doubleQuote As String = Chr(34) & Chr(34)   ' ""
        Dim strStart, strEnd As String
        strStart = ""
        strEnd = ""
        If Left(str, delim.Length) = delim And Left(str, qiStr.Length) <> qiStr And blnLastFullLine Then
            str = Right(str, str.Length - delim.Length)
            strStart = delim
        End If

        If Right(str, 2) = "," & delim Then
            If Right(str, 3) = qiStr Then
                blnThisFullLine = False
            Else
                blnThisFullLine = True
            End If
        Else
            If (Right(str, 2) = doubleQuote And Right(str, 3) <> "," & doubleQuote) And Right(str, 3) <> delim & doubleQuote Then
                blnThisFullLine = False
            Else
                blnThisFullLine = True
            End If
        End If


        If Right(str, delim.Length) = delim And Right(str, qiStr.Length) <> qiStr And blnThisFullLine Then
            str = Left(str, str.Length - delim.Length)
            strEnd = delim
            blnLastFullLine = True
        Else
            blnLastFullLine = False
        End If

        If str.Length > 2 Then
            str = Replace(str, qiStr, "|&|")
            str = Replace(str, Chr(34), """)
            fixQuotes = strStart & Replace(str, "|&|", qiStr) & strEnd
        End If
    End Function

0 Comments

LaRocque Family