从其中一个字段中有换行符的URL导入CSV [英] Importing CSV from URL that has line breaks within one of the fields

查看:43
本文介绍了从其中一个字段中有换行符的URL导入CSV的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我创建了一个脚本,该脚本将从URL导入CSV文件到Google表格中.我过去使用Utilities.parseCsv(csv)都没有问题.但是,此特定的CSV文件包含一个具有多个行返回(ALT + Enter)的字段.这将导致解析的文件在遇到这些字段时创建额外的行.我认为最好的方法是在解析前清理CSV文件,但我不知道该怎么做.

I created a script that imports a CSV file from a URL into a Google Sheet. I have used Utilities.parseCsv(csv) in the past with no problems. However, this particular CSV file contains a field that has multiple line returns (ALT+Enter). This causes the parsed file to create extra rows when it encounters these fields. I think the best approach is to sanitize the CSV file before parsing, but I do not know how to do this.

问题字段为说明"(G列).

The problem field is 'Description' (Column G).

当前脚本:

function importCampaignReport() {
var csvUrl = "https://s3.amazonaws.com/redacted.csv";
var csvContent = UrlFetchApp.fetch(csvUrl).getContentText();
var csvData = Utilities.parseCsv(csvContent);
var sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('Perm URL Campaign Dashboard Report');
sheet.getRange(1, 1, csvData.length, csvData[0].length).setValues(csvData);

}

csvContent示例:

Sample csvContent:

"Campaign Status","Billing Status",ID,Mon,Customer,Campaign,Description,"Customer Type",CID,CPL,Archived,"Start date","End date","Sales rep","Total Days Remaining","Monthly Days Left",Pace,"Month Delivered","Month Accepted","Month Remaining","% Delivered","% Accepted","Campaign Goal","Campaign Delivered","Campaign Remaining","% Campaign delivered","Campaign Undelivered","Campaign Awaiting Verification","Campaign Unverified","% Campaign Possible","CL Goal","CL Delivered","CL Remaining","CL Undelivered","CL Awaiting Verification","CL Unverified","#1 Partner Code","#1 Partner Goal","#1 Partner Cost","#2 Partner Code","#2 Partner Goal","#2 Partner Cost","#3 Partner Code","#3 Partner Goal","#3 Partner Cost","CL Mon Delivered","#1 Partner Mon Delivered","#2 Partner Mon Delivered","#3 Partner Mon Delivered","#4 Partner Mon Delivered","#5 Partner Mon Delivered","#6 Partner Mon Delivered","#4 Partner Code","#4 Partner Goal","#4 Partner Cost","#5 Partner Code","#5 Partner Goal","#5 Partner Cost","#6 Partner Code","#6 Partner Goal","#6 Partner Cost"
Completed,Pending,4607,"Mar 2018",XYZ,"Big Software","IT Staff+, 500+, UK, France, Netherlands, CQs",1,,27.50,no,03/01/2018,03/24/2018,RD,0,0,87,97,87,0,111,89,87,97,0,111,0,0,0,111,0,2,0,0,0,0,STW001,87,13.00,,,,,,,2,95,,,,,,,,,,,,,,
Completed,Pending,4938,"Jan 2018",TestCustomer,TestCampaign,"Description Line 1
Description Line 2
Description Line 3",1,,0.00,no,04/09/2017,04/09/2018,RD,0,0,1,0,0,1,0,0,10,0,10,0,0,0,2,20,0,0,0,0,0,0,LC,10,0.00,,,,,,,0,0,,,,,,,,,,,,,,
Completed,Pending,4938,"Feb 2018",TestCustomer,TestCampaign,"Description Line 1
Description Line 2
Description Line 3",1,,0.00,no,04/09/2017,04/09/2018,RD,0,0,1,0,0,1,0,0,10,0,10,0,0,0,2,20,0,0,0,0,0,0,LC,10,0.00,,,,,,,0,0,,,,,,,,,,,,,,
Completed,Pending,4938,"Mar 2018",TestCustomer,TestCampaign,"Description Line 1
Description Line 2
Description Line 3",1,,0.00,no,04/09/2017,04/09/2018,RD,0,0,1,0,0,1,0,0,10,0,10,0,0,0,2,20,0,0,0,0,0,0,LC,10,0.00,,,,,,,0,0,,,,,,,,,,,,,,
Completed,Pending,4938,"Apr 2018",TestCustomer,TestCampaign,"Description Line 1
Description Line 2
Description Line 3",1,,0.00,no,04/09/2017,04/09/2018,RD,0,0,1,0,0,1,0,0,10,0,10,0,0,0,2,20,0,0,0,0,0,0,LC,10,0.00,,,,,,,0,0,,,,,,,,,,,,,,
Active,Pending,4944,"Jan 2018",TestCustomer1,Aberdeen,"Description Line 1
Description Line 2",1,,0.00,no,04/24/2017,04/24/2018,MN,0,0,9,0,0,9,0,0,100,0,100,0,0,1,9,10,0,0,0,0,0,0,ABD,100,0.00,,,,,,,0,0,,,,,,,,,,,,,,
Active,Pending,4944,"Feb 2018",TestCustomer1,Aberdeen,"Description Line 1
Description Line 2",1,,0.00,no,04/24/2017,04/24/2018,MN,0,0,8,0,0,8,0,0,100,0,100,0,0,1,9,10,0,0,0,0,0,0,ABD,100,0.00,,,,,,,0,0,,,,,,,,,,,,,,
Active,Pending,4944,"Mar 2018",TestCustomer1,Aberdeen,"Description Line 1
Description Line 2",1,,0.00,no,04/24/2017,04/24/2018,MN,0,0,9,0,0,9,0,0,100,0,100,0,0,1,9,10,0,0,0,0,0,0,ABD,100,0.00,,,,,,,0,0,,,,,,,,,,,,,,
Active,Pending,4944,"Apr 2018",TestCustomer1,Aberdeen,"Description Line 1
Description Line 2",1,,0.00,no,04/24/2017,04/24/2018,MN,0,0,7,0,0,7,0,0,100,0,100,0,0,1,9,10,0,0,0,0,0,0,ABD,100,0.00,,,,,,,0,0,,,,,,,,,,,,,,
Active,Pending,5035,"Jan 2018",TestCustomer,TestCampaign,"Description Line 1",1,,0.00,no,05/04/2017,05/04/2020,MN,259,0,254,0,0,254,0,0,9002,0,9002,0,0,0,0,0,9000,0,9000,0,0,0,Redacted,1,0.00,B2M001,1,0.00,,,,0,0,0,,,,,,,,,,,,,
Active,Pending,5035,"Feb 2018",TestCustomer,TestCampaign,"Description Line 1",1,,0.00,no,05/04/2017,05/04/2020,MN,259,0,229,0,0,229,0,0,9002,0,9002,0,0,0,0,0,9000,0,9000,0,0,0,Redacted,1,0.00,B2M001,1,0.00,,,,0,0,0,,,,,,,,,,,,,
Active,Pending,5035,"Mar 2018",TestCustomer,TestCampaign,"Description Line 1",1,,0.00,no,05/04/2017,05/04/2020,MN,259,0,254,0,0,254,0,0,9002,0,9002,0,0,0,0,0,9000,0,9000,0,0,0,Redacted,1,0.00,B2M001,1,0.00,,,,0,0,0,,,,,,,,,,,,,
Active,Pending,5035,"Apr 2018",TestCustomer,TestCampaign,"Description Line 1",1,,0.00,no,05/04/2017,05/04/2020,MN,259,0,246,0,0,246,0,0,9002,0,9002,0,0,0,0,0,9000,0,9000,0,0,0,Redacted,1,0.00,B2M001,1,0.00,,,,0,0,0,,,,,,,,,,,,,
Active,Pending,5035,"May 2018",TestCustomer,TestCampaign,"Description Line 1",1,,0.00,no,05/04/2017,05/04/2020,MN,259,0,254,0,0,254,0,0,9002,0,9002,0,0,0,0,0,9000,0,9000,0,0,0,Redacted,1,0.00,B2M001,1,0.00,,,,0,0,0,,,,,,,,,,,,,
Active,Pending,5035,"Jun 2018",TestCustomer,TestCampaign,"Description Line 1",1,,0.00,no,05/04/2017,05/04/2020,MN,259,0,246,0,0,246,0,0,9002,0,9002,0,0,0,0,0,9000,0,9000,0,0,0,Redacted,1,0.00,B2M001,1,0.00,,,,0,0,0,,,,,,,,,,,,,
Active,Pending,5035,"Jul 2018",TestCustomer,TestCampaign,"Description Line 1",1,,0.00,no,05/04/2017,05/04/2020,MN,259,0,254,0,0,254,0,0,9002,0,9002,0,0,0,0,0,9000,0,9000,0,0,0,Redacted,1,0.00,B2M001,1,0.00,,,,0,0,0,,,,,,,,,,,,,
Active,Pending,5035,"Aug 2018",TestCustomer,TestCampaign,"Description Line 1",1,,0.00,no,05/04/2017,05/04/2020,MN,259,0,254,0,0,254,0,0,9002,0,9002,0,0,0,0,0,9000,0,9000,0,0,0,Redacted,1,0.00,B2M001,1,0.00,,,,0,0,0,,,,,,,,,,,,,
Active,Pending,5035,"Sep 2018",TestCustomer,TestCampaign,"Description Line 1",1,,0.00,no,05/04/2017,05/04/2020,MN,259,0,247,0,0,247,0,0,9002,0,9002,0,0,0,0,0,9000,0,9000,0,0,0,Redacted,1,0.00,B2M001,1,0.00,,,,0,0,0,,,,,,,,,,,,,
Active,Pending,5035,"Oct 2018",TestCustomer,TestCampaign,"Description Line 1",1,,0.00,no,05/04/2017,05/04/2020,MN,259,0,255,0,0,255,0,0,9002,0,9002,0,0,0,0,0,9000,0,9000,0,0,0,Redacted,1,0.00,B2M001,1,0.00,,,,0,0,0,,,,,,,,,,,,,
Active,Pending,5035,"Nov 2018",TestCustomer,TestCampaign,"Description Line 1",1,,0.00,no,05/04/2017,05/04/2020,MN,259,0,247,0,0,247,0,0,9002,0,9002,0,0,0,0,0,9000,0,9000,0,0,0,Redacted,1,0.00,B2M001,1,0.00,,,,0,0,0,,,,,,,,,,,,,
Active,Pending,5035,"Dec 2018",TestCustomer,TestCampaign,"Description Line 1",1,,0.00,no,05/04/2017,05/04/2020,MN,259,0,255,0,0,255,0,0,9002,0,9002,0,0,0,0,0,9000,0,9000,0,0,0,Redacted,1,0.00,B2M001,1,0.00,,,,0,0,0,,,,,,,,,,,,,
Active,Pending,5035,"Jan 2019",TestCustomer,TestCampaign,"Description Line 1",1,,0.00,no,05/04/2017,05/04/2020,MN,259,0,255,0,0,255,0,0,9002,0,9002,0,0,0,0,0,9000,0,9000,0,0,0,Redacted,1,0.00,B2M001,1,0.00,,,,0,0,0,,,,,,,,,,,,,
Active,Pending,5035,"Feb 2019",TestCustomer,TestCampaign,"Description Line 1",1,,0.00,no,05/04/2017,05/04/2020,MN,259,0,230,0,0,230,0,0,9002,0,9002,0,0,0,0,0,9000,0,9000,0,0,0,Redacted,1,0.00,B2M001,1,0.00,,,,0,0,0,,,,,,,,,,,,,
Active,Pending,5035,"Mar 2019",TestCustomer,TestCampaign,"Description Line 1",1,,0.00,no,05/04/2017,05/04/2020,MN,259,0,255,0,0,255,0,0,9002,0,9002,0,0,0,0,0,9000,0,9000,0,0,0,Redacted,1,0.00,B2M001,1,0.00,,,,0,0,0,,,,,,,,,,,,,
Active,Pending,5035,"Apr 2019",TestCustomer,TestCampaign,"Description Line 1",1,,0.00,no,05/04/2017,05/04/2020,MN,259,0,247,0,0,247,0,0,9002,0,9002,0,0,0,0,0,9000,0,9000,0,0,0,Redacted,1,0.00,B2M001,1,0.00,,,,0,0,0,,,,,,,,,,,,,
Active,Pending,5035,"May 2019",TestCustomer,TestCampaign,"Description Line 1",1,,0.00,no,05/04/2017,05/04/2020,MN,259,0,255,0,0,255,0,0,9002,0,9002,0,0,0,0,0,9000,0,9000,0,0,0,Redacted,1,0.00,B2M001,1,0.00,,,,0,0,0,,,,,,,,,,,,,
Active,Pending,5035,"Jun 2019",TestCustomer,TestCampaign,"Description Line 1",1,,0.00,no,05/04/2017,05/04/2020,MN,259,0,247,0,0,247,0,0,9002,0,9002,0,0,0,0,0,9000,0,9000,0,0,0,Redacted,1,0.00,B2M001,1,0.00,,,,0,0,0,,,,,,,,,,,,,
Active,Pending,5035,"Jul 2019",TestCustomer,TestCampaign,"Description Line 1",1,,0.00,no,05/04/2017,05/04/2020,MN,259,0,255,0,0,255,0,0,9002,0,9002,0,0,0,0,0,9000,0,9000,0,0,0,Redacted,1,0.00,B2M001,1,0.00,,,,0,0,0,,,,,,,,,,,,,
Active,Pending,5035,"Aug 2019",TestCustomer,TestCampaign,"Description Line 1",1,,0.00,no,05/04/2017,05/04/2020,MN,259,12,255,0,0,255,0,0,9002,0,9002,0,0,0,0,0,9000,0,9000,0,0,0,Redacted,1,0.00,B2M001,1,0.00,,,,0,0,0,,,,,,,,,,,,,
Active,Pending,5035,"Sep 2019",TestCustomer,TestCampaign,"Description Line 1",1,,0.00,no,05/04/2017,05/04/2020,MN,259,30,247,0,0,247,0,0,9002,0,9002,0,0,0,0,0,9000,0,9000,0,0,0,Redacted,1,0.00,B2M001,1,0.00,,,,0,0,0,,,,,,,,,,,,,
Active,Pending,5035,"Oct 2019",TestCustomer,TestCampaign,"Description Line 1",1,,0.00,no,05/04/2017,05/04/2020,MN,259,31,255,0,0,255,0,0,9002,0,9002,0,0,0,0,0,9000,0,9000,0,0,0,Redacted,1,0.00,B2M001,1,0.00,,,,0,0,0,,,,,,,,,,,,,

推荐答案

  • 您要将CSV数据放入电子表格中.
  • CSV数据包括具有换行符的值.
    • 具有换行符的值用双引号引起来.
    • 可以在"G"列看到这些值.
    • 如果我的理解是正确的,那么这个答案如何?请认为这只是几个答案之一.

      If my understanding is correct, how about this answer? Please think of this as just one of several answers.

      不幸的是,在当前阶段,看来 Utilities.parseCsv()不能直接用于包含单元格值(包括换行符)的数据.为了避免这个问题,我想提出以下两种模式.

      Unfortunately, in the current stage, it seems that Utilities.parseCsv() cannot be directly used for the data which has the cell values including the line breaks. In order to avoid this issue, I would like to propose the following 2 patterns.

      在此模式下,使用以下流程.

      In this pattern, the following flow is used.

      1. 为了通过 Utilities.parseCsv()解析数据,首先,将双引号引起来的值替换为时间字符串.
      2. 这些值由 Utilities.parseCsv()解析.
      3. 将时间字符串替换为原始值.
      4. 将数据放入电子表格.
      1. In order to parse the data by Utilities.parseCsv(), at first, The values enclosed by the double quotes are replaced by the temporal string.
      2. The values are parsed by Utilities.parseCsv().
      3. Replace the temporal strings to the original values.
      4. Put the data to Spreadsheet.

      修改后的脚本:

      修改脚本后,请进行以下修改.

      Modified script:

      When your script is modified, please modify as follows.

      var csvData = Utilities.parseCsv(csvContent);
      

      至:

      // The values enclosed by the double quotes are replaced by the temporal string.
      var temp = [];
      var tempStr = "###temp###";
      var t = csvContent.replace(/\"[\w\s\S]+?\"/g, function(m) {
        temp.push(m.replace(/\"/g, ""));
        return tempStr;
      });
      
      // Parse data as CSV data.
      var csvData = Utilities.parseCsv(t);
      
      // eplace the temporal strings to the original values.
      var h = 0;
      csvData.forEach(function(e, i) {
        var j = e.indexOf(tempStr);
        if (j > -1) {
          csvData[i][j] = temp[h];
          h++;
        }
      });
      

      模式2:

      在此模式下,使用以下流程.

      Pattern 2:

      In this pattern, the following flow is used.

      1. 将CSV文件转换为Google Spreadsheet.
        • 此转换后的电子表格用作临时电子表格.

      示例脚本:

      在使用此脚本之前,请在高级Google上启用Drive API服务.

      function importCampaignReport() {
        var csvUrl = "https://s3.amazonaws.com/redacted.csv";
        var blob = UrlFetchApp.fetch(csvUrl).getBlob(); // Modified
      
      
        // I added below script.
        var id = Drive.Files.insert({title: "temporalSpreadsheet", mimeType: MimeType.GOOGLE_SHEETS}, blob).id;
        var csvData = SpreadsheetApp.openById(id).getSheets()[0].getDataRange().getValues();
        Drive.Files.remove(id);
      
      
        var sheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('Perm URL Campaign Dashboard Report');
        sheet.getRange(1, 1, csvData.length, csvData[0].length).setValues(csvData);
      }
      

      注意:

      • 在此修改后的脚本中,我可以使用共享的CSV数据确认该脚本正常工作.
      • 将CSV文件手动导入到电子表格后,我可以确认结果与上述修改后的脚本相同.
      • 我会注意到,当使用Sheets API时,将从URL检索的值直接解析并放入Spreadsheet.因此,作为另一种模式,我想提出这一建议.示例脚本如下.在这种情况下,不使用任何临时文件,并且处理成本将能够低于上述两种模式的成本.

        I could notice that when Sheets API is used, the values retrieved from the URL are directly parsed and put to the Spreadsheet. So as one more pattern, I would like to propose this. The sample script is as follows. In this case, no temporal file is used and the process cost will be able to be lower than those of above 2 patterns.

        在使用此脚本之前,请请在Advanced Google启用Sheets API服务.

        Before you use this script, please enable Sheets API at Advanced Google services.

        function importCampaignReport() {
          var csvUrl = "https://s3.amazonaws.com/redacted.csv";
          var csvContent = UrlFetchApp.fetch(csvUrl).getContentText();
        
          var spreadsheet = SpreadsheetApp.getActiveSpreadsheet();
          var sheet = spreadsheet.getSheetByName('Perm URL Campaign Dashboard Report');
          var resource = {requests: [{pasteData: {data: csvContent, coordinate: {sheetId: sheet.getSheetId()}, delimiter: ","}}]};
          Sheets.Spreadsheets.batchUpdate(resource, spreadsheet.getId());
        }
        

        参考:

        • PasteDataRequest
          • 将值放入电子表格后,您要从值中删除换行符.

          我可以像上面那样理解.如果我的理解是正确的,那么下面的示例脚本怎么样?

          I could understand like above. If my understanding is correct, how about the following sample script?

          在使用此脚本之前,请启用表格API在高级Google服务中.

          Before you use this script, please enable Sheets API at Advanced Google services.

          function importCampaignReport() {
            var csvUrl = "https://s3.amazonaws.com/redacted.csv";
            var data = UrlFetchApp.fetch(csvUrl).getContentText();
          
            var spreadsheet = SpreadsheetApp.getActiveSpreadsheet();
            var sheet = spreadsheet.getSheetByName('Perm URL Campaign Dashboard Report');
            var sheetId = sheet.getSheetId();
            var resource = {requests: [
              {pasteData: {data: data, coordinate: {sheetId: sheetId}, delimiter: ","}},
              {findReplace: {range: {sheetId: sheetId, startColumnIndex: 6, endColumnIndex: 7}, find: "\n", replacement: " ", searchByRegex: true}}
            ]};
            Sheets.Spreadsheets.batchUpdate(resource, spreadsheet.getId());
          }
          

          • 在您的评论中,关于第5列(G),我认为第5列是"E"列.但是您说的是"G"列.我对此感到困惑.因此,在当前脚本中,可以处理列"G".如果要对列"E"执行此操作,请设置 startColumnIndex:4,endColumnIndex:5 .
          • 如果发生错误,请提供脚本以复制问题.由此,我想确认一下.
            • In your comment, about the 5th column (G), I think that the 5th column is the column "E". But you say the column "G". I confuse about this. So in the current script, the column "G" can be processed. If you want to do this for the column "E", please set startColumnIndex: 4, endColumnIndex: 5.
            • If an error occurs, please provide your script for replicating the issue. By this, I would like to confirm it.
            • 这篇关于从其中一个字段中有换行符的URL导入CSV的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆