当读取为readAsArrayBuffer时,Filereader使用正确的编码读取文件 [英] Filereader read file using correct encoding when read as readAsArrayBuffer
问题描述
我正在阅读使用javaScript上传的.csv / xlsx文件,并获得包含每行的 array
的结果。我能够使用 FileReader
和
寻找更多选项我尝试使用 reader.readAsText
将文件作为文本读取并根据需要更改编码,请检查以下代码:
function is_Hebrew(data)
{
var position = data.search(/ [\ u0590-\\\ 005FF] /);
返回位置> = 0;
}
$ scope.do_file = function(files)
{
var fullResult = [];
var file = files [0];
var reader = new FileReader();
reader.onload = function(e){
var data = e.target.result;
if(!is_Hebrew(data.toString()))
{
reader.readAsText(file,'ISO-8859-8');
}
};
reader.readAsText(file);
reader.onloadend = function(){
var lines = reader.result.split('\\\\'n');
console.log(行);
lines.forEach(element => {
var cell = element.split(',');
fullResult.push(cell);
});
console.log(读者);
};
};
但上述代码不合适,因为它不会读取文件,因为每行标识每个单元格。如果任何一个单元格包含具有逗号分隔值的字符串(例如,如果单元格包含字符串值,例如25,28,29),则数组输出会提供错误的数据,因为它将每个值视为每个单元格。
所以我决定坚持使用第一种方法,但我无法改变编码。是否有可能在第一个代码中更改编码,我使用了 readAsArrayBuffer
读取文件数据?
经过大量可能的解决方案后,我找到了对上述问题的回答是结合上述两种方法。读取 xlsx
文件的第一种方法和读取 csv
文件的第二种方法。我还使用了另一个名为 papaparse 的 javaScript
库。在解决每个单元格中读取数据问题的第二种方法
$ scope.is_Hebrew = function($ data){
var position = $ data.search(/ [\ u0590-\ u05FF] /);
返回位置> = 0;
}
//新excel阅读器的代码
$ scope.do_file = function(files)
{
var config = {
分隔符:,//自动检测
换行符:,//自动检测
quoteChar:'',
escapeChar:'',
标题:false,
trimHeader:false,
dynamicTyping:false,
preview:0,
encoding:,
worker:false,
comments: false,
step:undefined,
complete:undefined,
error:undefined,
download:false,
skipEmptyLines:false,
chunk:undefined,
fastMode:undefined,
beforeFirstChunk:undefined,
withCredentials:undefined
};
$ scope.fileContent = [];
var f = files [0];
var fileExtension = f.name.replace(/^.* \。/,'');
if(fileExtension =='xlsx')
{
var X = XLSX;
var global_wb;
var reader = new FileReader();
reader.onload = function(e)
{
var data = e.target.result;
global_wb = X.read(data,{type:'array'});
var result = {};
global_wb.SheetNames.forEach(function(sheetName){
var roa = X.utils.sheet_to_json(global_wb.Sheets [sheetName],{header:1});
if(roa。结果[sheetName] = roa;
});
$ scope.fileContent = result [Sheet1];
if(!result [Sheet1])
{
$ scope.fileContent = result [contacts]。filter(function(el){return typeof el!=object || Array.isArray(el)|| Object.keys(el)。length> 0;});
}
};
reader.readAsArrayBuffer(f);
}
else if(fileExtension =='csv')
{
var reader = new FileReader();
reader.onload = function(e)
{
var data = e.target.result;
console.log(f);
console.log($ scope.is_Hebrew(data.toString()));
if(!$ scope.is_Hebrew(data.toString()))
{
reader.readAsText(f,'ISO-8859-8');
}
};
reader.readAsText(f);
reader.onloadend = function(e){
var c = Papa.parse(reader.result,[config])
console.log(c);
$ scope.fileContent = c [data]。filter(function(el){return typeof el!=object|| Array.isArray(el)|| Object.keys(el).length> ; 0;});
};
}
其他
{
alert(不支持文件!);
}
$ scope.fileContent.push([]);
};
I am working on reading .csv /xlsx file uploaded using javaScript and get the result as array
containing each row . I was able to read the file and get data using FileReader
and SheetJs with following code.
// code for the new excel reader
$scope.do_file = function(files)
{
$scope.fileContent = [];
var X = XLSX;
var global_wb;
var f = files[0];
var reader = new FileReader();
reader.onload = function(e)
{
var data = e.target.result;console.log(data);
global_wb = X.read(data, {type: 'array'});
var output = "";
var result = {};
global_wb.SheetNames.forEach(function(sheetName) {
var roa = X.utils.sheet_to_json(global_wb.Sheets[sheetName], {header:1});
if(roa.length) result[sheetName] = roa;
});
$scope.fileContent = result["Sheet1"];
if(!result["Sheet1"])
{
$scope.fileContent = result["contacts"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });
}
};
reader.readAsArrayBuffer(f);
};
For reading most of the files the code works , but when file containing Hebrew text with Windows-1255 encoding i get gibberish data.
Looking in for more options i tried to read the file as text using reader.readAsText
and change the encoding as necessary , check the following code:
function is_Hebrew(data)
{
var position = data.search(/[\u0590-\u05FF]/);
return position >= 0;
}
$scope.do_file = function(files)
{
var fullResult = [];
var file =files[0];
var reader = new FileReader();
reader.onload = function(e){
var data = e.target.result;
if(!is_Hebrew(data.toString()))
{
reader.readAsText(file,'ISO-8859-8');
}
};
reader.readAsText(file);
reader.onloadend = function(){
var lines = reader.result.split('\r\n');
console.log(lines);
lines.forEach(element => {
var cell = element.split(',');
fullResult.push(cell);
});
console.log(reader);
};
};
but the above code is not suitable as it does not read the file as each row identifying each cell. if any one of cell contains string with coma separated value (for example if a cell contains a string value such as "25,28,29" ) the array output gives wrong data as it considers each values as each cell.
So i decided to stick with first method but i am not able to change the encoding .Is there a possible way to change encoding in the first code where i have used the readAsArrayBuffer
to read the file data ?
After going through lot of possible solutions i found that answer to the above question was to combine the above two methods. The first method for reading the xlsx
files and second method for reading csv
files. Also i have used an additional javaScript
library called papaparse in the second method to solve the problem of reading data in each cell
$scope.is_Hebrew = function($data){
var position = $data.search(/[\u0590-\u05FF]/);
return position >= 0;
}
// code for the new excel reader
$scope.do_file = function(files)
{
var config = {
delimiter: "", // auto-detect
newline: "", // auto-detect
quoteChar: '"',
escapeChar: '"',
header: false,
trimHeader: false,
dynamicTyping: false,
preview: 0,
encoding: "",
worker: false,
comments: false,
step: undefined,
complete: undefined,
error: undefined,
download: false,
skipEmptyLines: false,
chunk: undefined,
fastMode: undefined,
beforeFirstChunk: undefined,
withCredentials: undefined
};
$scope.fileContent = [];
var f = files[0];
var fileExtension = f.name.replace(/^.*\./, '');
if(fileExtension == 'xlsx')
{
var X = XLSX;
var global_wb;
var reader = new FileReader();
reader.onload = function(e)
{
var data = e.target.result;
global_wb = X.read(data, {type: 'array'});
var result = {};
global_wb.SheetNames.forEach(function(sheetName) {
var roa = X.utils.sheet_to_json(global_wb.Sheets[sheetName], {header:1});
if(roa.length) result[sheetName] = roa;
});
$scope.fileContent = result["Sheet1"];
if(!result["Sheet1"])
{
$scope.fileContent = result["contacts"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });
}
};
reader.readAsArrayBuffer(f);
}
else if(fileExtension == 'csv')
{
var reader = new FileReader();
reader.onload = function(e)
{
var data = e.target.result;
console.log(f);
console.log($scope.is_Hebrew(data.toString()));
if(!$scope.is_Hebrew(data.toString()))
{
reader.readAsText(f,'ISO-8859-8');
}
};
reader.readAsText(f);
reader.onloadend = function(e){
var c = Papa.parse(reader.result,[ config])
console.log(c);
$scope.fileContent = c["data"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });
};
}
else
{
alert("File Not supported!");
}
$scope.fileContent.push([]);
};
这篇关于当读取为readAsArrayBuffer时,Filereader使用正确的编码读取文件的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!