当读取为readAsArrayBuffer时,Filereader使用正确的编码读取文件 [英] Filereader read file using correct encoding when read as readAsArrayBuffer

查看:491
本文介绍了当读取为readAsArrayBuffer时,Filereader使用正确的编码读取文件的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在阅读使用javaScript上传的.csv / xlsx文件,并获得包含每行的 array 的结果。我能够使用 FileReader



寻找更多选项我尝试使用 reader.readAsText 将文件作为文本读取并根据需要更改编码,请检查以下代码:

  function is_Hebrew(data)
{
var position = data.search(/ [\ u0590-\\\ 005FF] /);
返回位置> = 0;
}

$ scope.do_file = function(files)
{
var fullResult = [];
var file = files [0];
var reader = new FileReader();
reader.onload = function(e){
var data = e.target.result;
if(!is_Hebrew(data.toString()))
{
reader.readAsText(file,'ISO-8859-8');
}
};
reader.readAsText(file);
reader.onloadend = function(){
var lines = reader.result.split('\\\\'n');
console.log(行);
lines.forEach(element => {
var cell = element.split(',');
fullResult.push(cell);
});

console.log(读者);
};
};

但上述代码不合适,因为它不会读取文件,因为每行标识每个单元格。如果任何一个单元格包含具有逗号分隔值的字符串(例如,如果单元格包含字符串值,例如25,28,29),则数组输出会提供错误的数据,因为它将每个值视为每个单元格。



所以我决定坚持使用第一种方法,但我无法改变编码。是否有可能在第一个代码中更改编码,我使用了 readAsArrayBuffer 读取文件数据?

解决方案

经过大量可能的解决方案后,我找到了对上述问题的回答是结合上述两种方法。读取 xlsx 文件的第一种方法和读取 csv 文件的第二种方法。我还使用了另一个名为 papaparse javaScript 库。在解决每个单元格中读取数据问题的第二种方法

  $ scope.is_Hebrew = function($ data){
var position = $ data.search(/ [\ u0590-\ u05FF] /);
返回位置> = 0;
}

//新excel阅读器的代码
$ scope.do_file = function(files)
{
var config = {
分隔符:,//自动检测
换行符:,//自动检测
quoteChar:'',
escapeChar:'',
标题:false,
trimHeader:false,
dynamicTyping:false,
preview:0,
encoding:,
worker:false,
comments: false,
step:undefined,
complete:undefined,
error:undefined,
download:false,
skipEmptyLines:false,
chunk:undefined,
fastMode:undefined,
beforeFirstChunk:undefined,
withCredentials:undefined
};

$ scope.fileContent = [];
var f = files [0];
var fileExtension = f.name.replace(/^.* \。/,'');
if(fileExtension =='xlsx')
{
var X = XLSX;
var global_wb;
var reader = new FileReader();
reader.onload = function(e)
{
var data = e.target.result;
global_wb = X.read(data,{type:'array'});
var result = {};
global_wb.SheetNames.forEach(function(sheetName){
var roa = X.utils.sheet_to_json(global_wb.Sheets [sheetName],{header:1});
if(roa。结果[sheetName] = roa;
});
$ scope.fileContent = result [Sheet1];
if(!result [Sheet1])
{
$ scope.fileContent = result [contacts]。filter(function(el){return typeof el!=object || Array.isArray(el)|| Object.keys(el)。length> 0;});
}

};
reader.readAsArrayBuffer(f);

}
else if(fileExtension =='csv')
{
var reader = new FileReader();
reader.onload = function(e)
{
var data = e.target.result;
console.log(f);
console.log($ scope.is_Hebrew(data.toString()));
if(!$ scope.is_Hebrew(data.toString()))
{
reader.readAsText(f,'ISO-8859-8');
}
};

reader.readAsText(f);
reader.onloadend = function(e){
var c = Papa.parse(reader.result,[config])
console.log(c);
$ scope.fileContent = c [data]。filter(function(el){return typeof el!=object|| Array.isArray(el)|| Object.keys(el).length> ; 0;});

};

}
其他
{
alert(不支持文件!);
}

$ scope.fileContent.push([]);
};


I am working on reading .csv /xlsx file uploaded using javaScript and get the result as array containing each row . I was able to read the file and get data using FileReader and SheetJs with following code.

// code for the new excel reader
$scope.do_file =  function(files)
{
    $scope.fileContent  = [];
    var X = XLSX;
    var global_wb;
    var f = files[0];
    var reader = new FileReader();
    reader.onload = function(e)
    {
        var data = e.target.result;console.log(data);
        global_wb = X.read(data, {type: 'array'});
        var output = "";
        var result = {};
        global_wb.SheetNames.forEach(function(sheetName) {
            var roa = X.utils.sheet_to_json(global_wb.Sheets[sheetName], {header:1});
            if(roa.length) result[sheetName] = roa;
        });
        $scope.fileContent =  result["Sheet1"];
        if(!result["Sheet1"])
        {
            $scope.fileContent =  result["contacts"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });
        }
    };
    reader.readAsArrayBuffer(f);
};

For reading most of the files the code works , but when file containing Hebrew text with Windows-1255 encoding i get gibberish data.

Looking in for more options i tried to read the file as text using reader.readAsText and change the encoding as necessary , check the following code:

function is_Hebrew(data)
{
    var position = data.search(/[\u0590-\u05FF]/);
    return position >= 0;
}

 $scope.do_file =  function(files)
 {
    var fullResult = [];
    var file =files[0];
      var reader = new FileReader();
        reader.onload = function(e){
            var data = e.target.result;
                if(!is_Hebrew(data.toString()))
                {
                  reader.readAsText(file,'ISO-8859-8');   
                }
            };
        reader.readAsText(file);
        reader.onloadend = function(){
            var lines = reader.result.split('\r\n');
            console.log(lines);
            lines.forEach(element => {
                var cell = element.split(',');
                fullResult.push(cell);
            });

             console.log(reader);
        };
    };

but the above code is not suitable as it does not read the file as each row identifying each cell. if any one of cell contains string with coma separated value (for example if a cell contains a string value such as "25,28,29" ) the array output gives wrong data as it considers each values as each cell.

So i decided to stick with first method but i am not able to change the encoding .Is there a possible way to change encoding in the first code where i have used the readAsArrayBuffer to read the file data ?

解决方案

After going through lot of possible solutions i found that answer to the above question was to combine the above two methods. The first method for reading the xlsx files and second method for reading csv files. Also i have used an additional javaScript library called papaparse in the second method to solve the problem of reading data in each cell

$scope.is_Hebrew = function($data){
var position = $data.search(/[\u0590-\u05FF]/);
return position >= 0;
}

// code for the new excel reader
$scope.do_file =  function(files)
{
    var config = {
    delimiter: "",  // auto-detect
    newline: "",    // auto-detect
    quoteChar: '"',
    escapeChar: '"',
    header: false,
    trimHeader: false,
    dynamicTyping: false,
    preview: 0,
    encoding: "",
    worker: false,
    comments: false,
    step: undefined,
    complete: undefined,
    error: undefined,
    download: false,
    skipEmptyLines: false,
    chunk: undefined,
    fastMode: undefined,
    beforeFirstChunk: undefined,
    withCredentials: undefined
    };

    $scope.fileContent  = [];
    var f = files[0];
    var fileExtension = f.name.replace(/^.*\./, '');
    if(fileExtension == 'xlsx')
    {
        var X = XLSX;
        var global_wb;
        var reader = new FileReader();
        reader.onload = function(e)
        {
            var data = e.target.result;
            global_wb = X.read(data, {type: 'array'});
            var result = {};
            global_wb.SheetNames.forEach(function(sheetName) {
               var roa = X.utils.sheet_to_json(global_wb.Sheets[sheetName], {header:1});
               if(roa.length) result[sheetName] = roa;
            });
            $scope.fileContent =  result["Sheet1"];
            if(!result["Sheet1"])
            {
               $scope.fileContent =  result["contacts"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });
            }

        };
        reader.readAsArrayBuffer(f);

    }
    else if(fileExtension == 'csv')
    {
    var reader = new FileReader();
    reader.onload = function(e)
    {
        var data = e.target.result;
        console.log(f);
        console.log($scope.is_Hebrew(data.toString()));
        if(!$scope.is_Hebrew(data.toString()))
        {
           reader.readAsText(f,'ISO-8859-8');   
        }
    };

    reader.readAsText(f);
    reader.onloadend = function(e){
        var c =  Papa.parse(reader.result,[ config])
        console.log(c);
        $scope.fileContent =  c["data"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });

    };

    }
    else
    {
       alert("File Not supported!");
    }

$scope.fileContent.push([]);
};

这篇关于当读取为readAsArrayBuffer时,Filereader使用正确的编码读取文件的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆