当读取为 readAsArrayBuffer 时,Filereader 使用正确的编码读取文件 [英] Filereader read file using correct encoding when read as readAsArrayBuffer

查看:48
本文介绍了当读取为 readAsArrayBuffer 时,Filereader 使用正确的编码读取文件的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在阅读使用 javaScript 上传的 .csv/xlsx 文件,并将结果作为 array 包含每一行.我能够使用 FileReader

寻找更多选项,我尝试使用 reader.readAsText 以文本形式读取文件并根据需要更改编码,请检查以下代码:

function is_Hebrew(data){var position = data.search(/[\u0590-\u05FF]/);返回位置 >= 0;}$scope.do_file = 函数(文件){var fullResult = [];var 文件 = 文件 [0];var reader = new FileReader();reader.onload = function(e){var 数据 = e.target.result;if(!is_Hebrew(data.toString())){reader.readAsText(file,'ISO-8859-8');}};reader.readAsText(文件);reader.onloadend = function(){var lines = reader.result.split('\r\n');控制台日志(行);行.forEach(元素=> {var cell = element.split(',');fullResult.push(cell);});控制台日志(阅读器);};};

但是上面的代码不合适,因为它没有读取文件作为识别每个单元格的每一行.如果任何一个单元格包含带有逗号分隔值的字符串(例如,如果一个单元格包含一个字符串值,例如 "25,28,29" ),则数组输出会给出错误的数据,因为它将每个值视为每个单元格.

所以我决定坚持使用第一种方法,但我无法更改编码.在我使用 readAsArrayBuffer 读取文件的第一个代码中,是否有可能更改编码数据?

解决方案

经过很多可能的解决方案后,我发现上述问题的答案是将上述两种方法结合起来.第一种读取xlsx 文件的方法和第二种读取csv 文件的方法.此外,我在第二种方法中使用了一个名为 papaparse 的附加 javaScript 库来解决每个单元格读取数据的问题

$scope.is_Hebrew = function($data){var position = $data.search(/[\u0590-\u05FF]/);返回位置 >= 0;}//新的excel阅读器的代码$scope.do_file = 函数(文件){变量配置 = {分隔符:"",//自动检测换行符:"",//自动检测quoteChar: '"',转义字符:'"',标题:假,修剪标题:假,动态类型:假,预览: 0,编码:"",工人:假的,评论:假的,步骤:未定义,完整:未定义,错误:未定义,下载:假,跳过空行:假,块:未定义,快速模式:未定义,beforeFirstChunk:未定义,withCredentials:未定义};$scope.fileContent = [];var f = 文件[0];var fileExtension = f.name.replace(/^.*\./, '');如果(文件扩展名 == 'xlsx'){无功 X = XLSX;var global_wb;var reader = new FileReader();reader.onload = function(e){var 数据 = e.target.result;global_wb = X.read(data, {type: 'array'});变量结果 = {};global_wb.SheetNames.forEach(function(sheetName) {var roa = X.utils.sheet_to_json(global_wb.Sheets[sheetName], {header:1});if(roa.length) 结果[sheetName] = roa;});$scope.fileContent = result["Sheet1"];if(!result["Sheet1"]){$scope.fileContent = result["contacts"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });}};reader.readAsArrayBuffer(f);}否则如果(文件扩展名 == 'csv'){var reader = new FileReader();reader.onload = function(e){var 数据 = e.target.result;控制台日志(f);console.log($scope.is_Hebrew(data.toString()));if(!$scope.is_Hebrew(data.toString())){reader.readAsText(f,'ISO-8859-8');}};reader.readAsText(f);reader.onloadend = function(e){var c = Papa.parse(reader.result,[ config])控制台日志(c);$scope.fileContent = c["data"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });};}别的{alert("文件不支持!");}$scope.fileContent.push([]);};

I am working on reading .csv /xlsx file uploaded using javaScript and get the result as array containing each row . I was able to read the file and get data using FileReader and SheetJs with following code.

// code for the new excel reader
$scope.do_file =  function(files)
{
    $scope.fileContent  = [];
    var X = XLSX;
    var global_wb;
    var f = files[0];
    var reader = new FileReader();
    reader.onload = function(e)
    {
        var data = e.target.result;console.log(data);
        global_wb = X.read(data, {type: 'array'});
        var output = "";
        var result = {};
        global_wb.SheetNames.forEach(function(sheetName) {
            var roa = X.utils.sheet_to_json(global_wb.Sheets[sheetName], {header:1});
            if(roa.length) result[sheetName] = roa;
        });
        $scope.fileContent =  result["Sheet1"];
        if(!result["Sheet1"])
        {
            $scope.fileContent =  result["contacts"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });
        }
    };
    reader.readAsArrayBuffer(f);
};

For reading most of the files the code works , but when file containing Hebrew text with Windows-1255 encoding i get gibberish data.

Looking in for more options i tried to read the file as text using reader.readAsText and change the encoding as necessary , check the following code:

function is_Hebrew(data)
{
    var position = data.search(/[\u0590-\u05FF]/);
    return position >= 0;
}

 $scope.do_file =  function(files)
 {
    var fullResult = [];
    var file =files[0];
      var reader = new FileReader();
        reader.onload = function(e){
            var data = e.target.result;
                if(!is_Hebrew(data.toString()))
                {
                  reader.readAsText(file,'ISO-8859-8');   
                }
            };
        reader.readAsText(file);
        reader.onloadend = function(){
            var lines = reader.result.split('\r\n');
            console.log(lines);
            lines.forEach(element => {
                var cell = element.split(',');
                fullResult.push(cell);
            });

             console.log(reader);
        };
    };

but the above code is not suitable as it does not read the file as each row identifying each cell. if any one of cell contains string with coma separated value (for example if a cell contains a string value such as "25,28,29" ) the array output gives wrong data as it considers each values as each cell.

So i decided to stick with first method but i am not able to change the encoding .Is there a possible way to change encoding in the first code where i have used the readAsArrayBuffer to read the file data ?

解决方案

After going through lot of possible solutions i found that answer to the above question was to combine the above two methods. The first method for reading the xlsx files and second method for reading csv files. Also i have used an additional javaScript library called papaparse in the second method to solve the problem of reading data in each cell

$scope.is_Hebrew = function($data){
var position = $data.search(/[\u0590-\u05FF]/);
return position >= 0;
}

// code for the new excel reader
$scope.do_file =  function(files)
{
    var config = {
    delimiter: "",  // auto-detect
    newline: "",    // auto-detect
    quoteChar: '"',
    escapeChar: '"',
    header: false,
    trimHeader: false,
    dynamicTyping: false,
    preview: 0,
    encoding: "",
    worker: false,
    comments: false,
    step: undefined,
    complete: undefined,
    error: undefined,
    download: false,
    skipEmptyLines: false,
    chunk: undefined,
    fastMode: undefined,
    beforeFirstChunk: undefined,
    withCredentials: undefined
    };

    $scope.fileContent  = [];
    var f = files[0];
    var fileExtension = f.name.replace(/^.*\./, '');
    if(fileExtension == 'xlsx')
    {
        var X = XLSX;
        var global_wb;
        var reader = new FileReader();
        reader.onload = function(e)
        {
            var data = e.target.result;
            global_wb = X.read(data, {type: 'array'});
            var result = {};
            global_wb.SheetNames.forEach(function(sheetName) {
               var roa = X.utils.sheet_to_json(global_wb.Sheets[sheetName], {header:1});
               if(roa.length) result[sheetName] = roa;
            });
            $scope.fileContent =  result["Sheet1"];
            if(!result["Sheet1"])
            {
               $scope.fileContent =  result["contacts"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });
            }

        };
        reader.readAsArrayBuffer(f);

    }
    else if(fileExtension == 'csv')
    {
    var reader = new FileReader();
    reader.onload = function(e)
    {
        var data = e.target.result;
        console.log(f);
        console.log($scope.is_Hebrew(data.toString()));
        if(!$scope.is_Hebrew(data.toString()))
        {
           reader.readAsText(f,'ISO-8859-8');   
        }
    };

    reader.readAsText(f);
    reader.onloadend = function(e){
        var c =  Papa.parse(reader.result,[ config])
        console.log(c);
        $scope.fileContent =  c["data"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });

    };

    }
    else
    {
       alert("File Not supported!");
    }

$scope.fileContent.push([]);
};

这篇关于当读取为 readAsArrayBuffer 时,Filereader 使用正确的编码读取文件的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆