当读取为 readAsArrayBuffer 时,Filereader 使用正确的编码读取文件 [英] Filereader read file using correct encoding when read as readAsArrayBuffer
问题描述
我正在阅读使用 javaScript 上传的 .csv/xlsx 文件,并将结果作为 array
包含每一行.我能够使用 FileReader
和
寻找更多选项,我尝试使用 reader.readAsText
以文本形式读取文件并根据需要更改编码,请检查以下代码:
function is_Hebrew(data){var position = data.search(/[\u0590-\u05FF]/);返回位置 >= 0;}$scope.do_file = 函数(文件){var fullResult = [];var 文件 = 文件 [0];var reader = new FileReader();reader.onload = function(e){var 数据 = e.target.result;if(!is_Hebrew(data.toString())){reader.readAsText(file,'ISO-8859-8');}};reader.readAsText(文件);reader.onloadend = function(){var lines = reader.result.split('\r\n');控制台日志(行);行.forEach(元素=> {var cell = element.split(',');fullResult.push(cell);});控制台日志(阅读器);};};
但是上面的代码不合适,因为它没有读取文件作为识别每个单元格的每一行.如果任何一个单元格包含带有逗号分隔值的字符串(例如,如果一个单元格包含一个字符串值,例如 "25,28,29" ),则数组输出会给出错误的数据,因为它将每个值视为每个单元格.
所以我决定坚持使用第一种方法,但我无法更改编码.在我使用 readAsArrayBuffer
读取文件的第一个代码中,是否有可能更改编码数据?
经过很多可能的解决方案后,我发现上述问题的答案是将上述两种方法结合起来.第一种读取xlsx
文件的方法和第二种读取csv
文件的方法.此外,我在第二种方法中使用了一个名为 papaparse 的附加 javaScript
库来解决每个单元格读取数据的问题
$scope.is_Hebrew = function($data){var position = $data.search(/[\u0590-\u05FF]/);返回位置 >= 0;}//新的excel阅读器的代码$scope.do_file = 函数(文件){变量配置 = {分隔符:"",//自动检测换行符:"",//自动检测quoteChar: '"',转义字符:'"',标题:假,修剪标题:假,动态类型:假,预览: 0,编码:"",工人:假的,评论:假的,步骤:未定义,完整:未定义,错误:未定义,下载:假,跳过空行:假,块:未定义,快速模式:未定义,beforeFirstChunk:未定义,withCredentials:未定义};$scope.fileContent = [];var f = 文件[0];var fileExtension = f.name.replace(/^.*\./, '');如果(文件扩展名 == 'xlsx'){无功 X = XLSX;var global_wb;var reader = new FileReader();reader.onload = function(e){var 数据 = e.target.result;global_wb = X.read(data, {type: 'array'});变量结果 = {};global_wb.SheetNames.forEach(function(sheetName) {var roa = X.utils.sheet_to_json(global_wb.Sheets[sheetName], {header:1});if(roa.length) 结果[sheetName] = roa;});$scope.fileContent = result["Sheet1"];if(!result["Sheet1"]){$scope.fileContent = result["contacts"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });}};reader.readAsArrayBuffer(f);}否则如果(文件扩展名 == 'csv'){var reader = new FileReader();reader.onload = function(e){var 数据 = e.target.result;控制台日志(f);console.log($scope.is_Hebrew(data.toString()));if(!$scope.is_Hebrew(data.toString())){reader.readAsText(f,'ISO-8859-8');}};reader.readAsText(f);reader.onloadend = function(e){var c = Papa.parse(reader.result,[ config])控制台日志(c);$scope.fileContent = c["data"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });};}别的{alert("文件不支持!");}$scope.fileContent.push([]);};
I am working on reading .csv /xlsx file uploaded using javaScript and get the result as array
containing each row . I was able to read the file and get data using FileReader
and SheetJs with following code.
// code for the new excel reader
$scope.do_file = function(files)
{
$scope.fileContent = [];
var X = XLSX;
var global_wb;
var f = files[0];
var reader = new FileReader();
reader.onload = function(e)
{
var data = e.target.result;console.log(data);
global_wb = X.read(data, {type: 'array'});
var output = "";
var result = {};
global_wb.SheetNames.forEach(function(sheetName) {
var roa = X.utils.sheet_to_json(global_wb.Sheets[sheetName], {header:1});
if(roa.length) result[sheetName] = roa;
});
$scope.fileContent = result["Sheet1"];
if(!result["Sheet1"])
{
$scope.fileContent = result["contacts"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });
}
};
reader.readAsArrayBuffer(f);
};
For reading most of the files the code works , but when file containing Hebrew text with Windows-1255 encoding i get gibberish data.
Looking in for more options i tried to read the file as text using reader.readAsText
and change the encoding as necessary , check the following code:
function is_Hebrew(data)
{
var position = data.search(/[\u0590-\u05FF]/);
return position >= 0;
}
$scope.do_file = function(files)
{
var fullResult = [];
var file =files[0];
var reader = new FileReader();
reader.onload = function(e){
var data = e.target.result;
if(!is_Hebrew(data.toString()))
{
reader.readAsText(file,'ISO-8859-8');
}
};
reader.readAsText(file);
reader.onloadend = function(){
var lines = reader.result.split('\r\n');
console.log(lines);
lines.forEach(element => {
var cell = element.split(',');
fullResult.push(cell);
});
console.log(reader);
};
};
but the above code is not suitable as it does not read the file as each row identifying each cell. if any one of cell contains string with coma separated value (for example if a cell contains a string value such as "25,28,29" ) the array output gives wrong data as it considers each values as each cell.
So i decided to stick with first method but i am not able to change the encoding .Is there a possible way to change encoding in the first code where i have used the readAsArrayBuffer
to read the file data ?
After going through lot of possible solutions i found that answer to the above question was to combine the above two methods. The first method for reading the xlsx
files and second method for reading csv
files. Also i have used an additional javaScript
library called papaparse in the second method to solve the problem of reading data in each cell
$scope.is_Hebrew = function($data){
var position = $data.search(/[\u0590-\u05FF]/);
return position >= 0;
}
// code for the new excel reader
$scope.do_file = function(files)
{
var config = {
delimiter: "", // auto-detect
newline: "", // auto-detect
quoteChar: '"',
escapeChar: '"',
header: false,
trimHeader: false,
dynamicTyping: false,
preview: 0,
encoding: "",
worker: false,
comments: false,
step: undefined,
complete: undefined,
error: undefined,
download: false,
skipEmptyLines: false,
chunk: undefined,
fastMode: undefined,
beforeFirstChunk: undefined,
withCredentials: undefined
};
$scope.fileContent = [];
var f = files[0];
var fileExtension = f.name.replace(/^.*\./, '');
if(fileExtension == 'xlsx')
{
var X = XLSX;
var global_wb;
var reader = new FileReader();
reader.onload = function(e)
{
var data = e.target.result;
global_wb = X.read(data, {type: 'array'});
var result = {};
global_wb.SheetNames.forEach(function(sheetName) {
var roa = X.utils.sheet_to_json(global_wb.Sheets[sheetName], {header:1});
if(roa.length) result[sheetName] = roa;
});
$scope.fileContent = result["Sheet1"];
if(!result["Sheet1"])
{
$scope.fileContent = result["contacts"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });
}
};
reader.readAsArrayBuffer(f);
}
else if(fileExtension == 'csv')
{
var reader = new FileReader();
reader.onload = function(e)
{
var data = e.target.result;
console.log(f);
console.log($scope.is_Hebrew(data.toString()));
if(!$scope.is_Hebrew(data.toString()))
{
reader.readAsText(f,'ISO-8859-8');
}
};
reader.readAsText(f);
reader.onloadend = function(e){
var c = Papa.parse(reader.result,[ config])
console.log(c);
$scope.fileContent = c["data"].filter(function(el) { return typeof el != "object" || Array.isArray(el) || Object.keys(el).length > 0; });
};
}
else
{
alert("File Not supported!");
}
$scope.fileContent.push([]);
};
这篇关于当读取为 readAsArrayBuffer 时,Filereader 使用正确的编码读取文件的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!