JavaScript文件哈希值生成部分文件 [英] JavaScript File Hash Value Generate with Part of the file

查看:123
本文介绍了JavaScript文件哈希值生成部分文件的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我正在使用JavaScript为独特的文件值生成文件HASH值。请检查下面的代码哈希生成机制哪个好。

 < script type =text / javascript> 
//参考:https://code.google.com/p/crypto-js/#MD5
函数handleFileSelect(evt)
{
var files = evt.target .files; // FileList对象
//循环遍历FileList并以缩略图形式呈现图像文件。
for(var i = 0,f; f = files [i]; i ++)
{
var reader = new FileReader();
//关闭以捕获文件信息。
reader.onload =(function(theFile)
{
return function(e)
{
var span = document.createElement('span');
var test = e.target.result;
// var hash = hex_md5(test);
var hash = CryptoJS.MD5(test);
var elem = document.getElementById( hashValue);
elem.value = hash;
};
})(f);
//读取图像文件中的数据URL。
reader.readAsBinaryString(f);

$ b $ document.getElementById('videoupload')。addEventListener('change',handleFileSelect,false);
< / script>

然而,在客户端浏览器崩溃时,我为大文件生成HASH VALUE时遇到了问题。 / p>

直到30MB HASHING运行良好,但如果我尝试上传大于系统崩溃的数据。



我的问题是:



  1. 我可以为零件生成HASH值文件比阅读大文件和崩溃?如果是,我可以知道如何做这个宽度
    'FileReader';


  2. 我可以指定任意数量的字节,例如2000字符文件来生成HASH值,然后生成大文件。

  3. 两种解决方案将适用于大文件和小文件。还有其他的选择吗?



    我的小提琴演示

    解决方案


    1. 我可以为文件的一部分生成HASH值,而不是读取LARGE文件并导致崩溃?如果是,我可以知道如何做这个宽度'FileReader';


是的,您可以做到这一点,它被称为渐进式哈希

  var md5 = CryptoJS.algo.MD5.create(); 

md5.update(file part 1);
md5.update(file part 2);
md5.update(file part 3);

var hash = md5.finalize();





  1. 指定任意数量的字节(例如文件的2000字符)以生成HASH值,然后为大文件生成。


HTML5Rocks文章有关如何可以使用 File.slice 将切片文件传递到 FileReader

  var blob = file.slice( startingByte,endindByte); 
reader.readAsArrayBuffer(blob);



完整解决方案



。棘手的部分是同步文件阅读,因为 FileReader.readAsArrayBuffer() 是异步的。我写了一个<系列函数,该函数在 系列 async.js的功能。它必须一个接一个地完成,因为没有办法进入CryptoJS的哈希函数的内部状态。



另外,CryptoJS不会了解 ArrayBuffer 是什么,因此必须将其转换为其本机数据表示形式,即所谓的WordArray:

 函数arrayBufferToWordArray(ab){
var i8a = new Uint8Array(ab);
var a = [];对于(var i = 0; i< i8a.length; i + = 4){
.p.p(i8a [i] <24 | i8a [i + 1] < 16 | i8a [i + 2]<< 8 | i8a [i + 3]);
}
返回CryptoJS.lib.WordArray.create(a,i8a.length);
}

另一件事是哈希是一个同步操作,其中没有 yield 继续在别处执行。因为这个原因,JavaScript是单线程的,因此浏览器会冻结。解决方案是使用 Web Workers 将散列卸载到不同的线程,以便UI线程保持响应。

Web工作人员期望脚本文件在其构造函数中,所以我使用了此解决方案通过Rob W获得内联脚本。

 函数系列(任务完成){
if (!tasks || tasks.length === 0){
done();
} else {
tasks [0](function(){
series(tasks.slice(1),done);
});



函数webWorkerOnMessage(e){
if(e.data.type ===create){
md5 = CryptoJS .algo.MD5.create();
postMessage({type:create});
} else if(e.data.type ===update){
function arrayBufferToWordArray(ab){
var i8a = new Uint8Array(ab);
var a = [];对于(var i = 0; i< i8a.length; i + = 4){
.p.p(i8a [i] <24 | i8a [i + 1] < 16 | i8a [i + 2]<< 8 | i8a [i + 3]);
}
返回CryptoJS.lib.WordArray.create(a,i8a.length);
}
md5.update(arrayBufferToWordArray(e.data.chunk));
postMessage({type:update});
} else if(e.data.type ===finish){
postMessage({type:finish,hash:+ md5.finalize()});
}
}

// URL.createObjectURL
window.URL = window.URL || window.webkitURL;

//所有示例中使用的服务器响应
var response =
importScripts('https://cdn.rawgit.com/CryptoStore/crypto-js/ 3.1.2 / build / rollups / md5.js');+
var md5;+
self.onmessage =+ webWorkerOnMessage.toString();

var blob;
尝试{
blob = new Blob([response],{type:'application / javascript'});
} catch(e){//向后兼容
window.BlobBuilder = window.BlobBuilder || window.WebKitBlobBuilder || window.MozBlobBuilder;
blob = new BlobBuilder();
blob.append(response);
blob = blob.getBlob();
}
var worker = new Worker(URL.createObjectURL(blob));


var files = evt.target.files; // FileList对象
var chunksize = 1000000; //块大小没有区别
var i = 0,
f = files [i],
chunks = Math.ceil(f.size / chunksize),
chunkTasks = [],
startTime =(new Date())。getTime();
worker.onmessage = function(e){
//创建回调

(var j = 0; j (function (j,f){
chunkTasks.push(function(next){
var blob = f.slice(j * chunksize,Math.min((j + 1)* chunksize,f.size) );
var reader = new FileReader();

reader.onload = function(e){
var chunk = e.target.result;
worker。 onmessage = function(e){
//更新回调
document.getElementById('num')。innerHTML =+(j + 1)+/+ chunks;
next ();
};
worker.postMessage({type:update,chunk:chunk});
};
reader.readAsArrayBuffer(blob);
});
})(j,f);
}
series(chunkTasks,function(){
var elem = document.getElementById(hashValueSplit);
var telem = document.getElementById(time);
worker.onmessage = function(e){
//完成回调
elem.value = e.data.hash;
telem.innerHTML =in+ Math.ceil( ((new Date()).getTime() - startTime)/ 1000)+seconds;
};
worker.postMessage({type:finish});
} );

//阻止前进...
if(document.getElementById(singleHash)。checked){
var reader = new FileReader();

//关闭捕获文件信息。
reader.onloadend =(函数(theFile){
函数arrayBufferToWordArray(ab){
var i8a = new Uint8Array(ab);
var a = []; $ b $对于(var i = 0; i a.push(i8a [i] <24 | i8a [i + 1] <<16 | i8a [i + 2]<< 8 | i8a [i + 3]);
}
返回CryptoJS.lib.WordArray.create(a,i8a.length);
}
返回函数(e){
var test = e.target.result;
var hash = CryptoJS.MD5(arrayBufferToWordArray(test));
// var hash = none;
var elem = document.getElementById(hashValue);
elem.value = hash;
};
})(f);

//将图像文件读入数据URL。
reader.readAsArrayBuffer(f);
}
};
worker.postMessage({type:create});

DEMO 似乎适用于大文件,但需要相当长的时间。也许这可以通过使用更快的MD5实现来改进。花了大约23分钟来散列3 GB文件。

我的显示了一个没有网络工作人员的SHA-256示例。


I am working with JavaScript to generate File HASH VALUE for unique file values. Kindly check the below code for the Hash Generation Mechanism Which works good.

<script type="text/javascript">
// Reference: https://code.google.com/p/crypto-js/#MD5
function handleFileSelect(evt) 
{   
    var files = evt.target.files; // FileList object
    // Loop through the FileList and render image files as thumbnails.
    for (var i = 0, f; f = files[i]; i++) 
    {
        var reader = new FileReader();
        // Closure to capture the file information.
        reader.onload = (function(theFile) 
        {
            return function(e) 
            {
                var span = document.createElement('span');
                var test = e.target.result;                 
                //var hash = hex_md5(test);
                var hash = CryptoJS.MD5(test);
                var elem = document.getElementById("hashValue");
                elem.value = hash;
            };
        })(f);
        // Read in the image file as a data URL.
        reader.readAsBinaryString(f);
    }
}
document.getElementById('videoupload').addEventListener('change', handleFileSelect, false);
</script>

However I am facing problem when generating HASH VALUE for large files as in client side the browser Crashed.

Up-till 30MB the HASHING works well but if i try to upload larger than that the system crashes.

My Question is:

  1. Can I generate HASH Value for part of file than reading the LARGE files and getting crashes? If yes, Can I know how to do that width 'FileReader';

  2. Can I specify any amount of Byte such as 2000 Character of a file to generate HASH Value then generating for large files.

I hope the above two solution will work for larger and small files. Is there any other options?

My Fiddle Demo

解决方案

  1. Can I generate HASH Value for part of file than reading the LARGE files and getting crashes? If yes, Can I know how to do that width 'FileReader';

Yes, you can do that and it is called Progressive Hashing.

var md5 = CryptoJS.algo.MD5.create();

md5.update("file part 1");
md5.update("file part 2");
md5.update("file part 3");

var hash = md5.finalize();

  1. Can I specify any amount of Byte such as 2000 Character of a file to generate HASH Value then generating for large files.

There's an HTML5Rocks article on how one can use File.slice to pass a sliced file to the FileReader:

var blob = file.slice(startingByte, endindByte);
reader.readAsArrayBuffer(blob);

Full solution

I have combined both. The tricky part was to synchronize the file reading, because FileReader.readAsArrayBuffer() is asynchronous. I've written a small series function which is modeled after the series function of async.js. It has to be done one after the other, because there is is no way to get to the internal state of the hashing function of CryptoJS.

Additionally, CryptoJS doesn't understand what an ArrayBuffer is, so it has to be converted to its native data representation, which is the so-called WordArray:

function arrayBufferToWordArray(ab) {
  var i8a = new Uint8Array(ab);
  var a = [];
  for (var i = 0; i < i8a.length; i += 4) {
    a.push(i8a[i] << 24 | i8a[i + 1] << 16 | i8a[i + 2] << 8 | i8a[i + 3]);
  }
  return CryptoJS.lib.WordArray.create(a, i8a.length);
}

The other thing is that hashing is a synchronous operation where there is no yield to continue execution elsewhere. Because of this, the browser will freeze since JavaScript is single threaded. The solution is to use Web Workers to off-load the hashing to a different thread so that the UI thread keeps responsive.
Web workers expect the script file in their constructors, so I used this solution by Rob W to have an inline script.

function series(tasks, done){
    if(!tasks || tasks.length === 0) {
        done();
    } else {
        tasks[0](function(){
            series(tasks.slice(1), done);
        });
    }
}

function webWorkerOnMessage(e){
    if (e.data.type === "create") {
        md5 = CryptoJS.algo.MD5.create();
        postMessage({type: "create"});
    } else if (e.data.type === "update") {
        function arrayBufferToWordArray(ab) {
            var i8a = new Uint8Array(ab);
            var a = [];
            for (var i = 0; i < i8a.length; i += 4) {
                a.push(i8a[i] << 24 | i8a[i + 1] << 16 | i8a[i + 2] << 8 | i8a[i + 3]);
            }
            return CryptoJS.lib.WordArray.create(a, i8a.length);
        }
        md5.update(arrayBufferToWordArray(e.data.chunk));
        postMessage({type: "update"});
    } else if (e.data.type === "finish") {
        postMessage({type: "finish", hash: ""+md5.finalize()});
    }
}

// URL.createObjectURL
window.URL = window.URL || window.webkitURL;

// "Server response", used in all examples
var response = 
    "importScripts('https://cdn.rawgit.com/CryptoStore/crypto-js/3.1.2/build/rollups/md5.js');"+
    "var md5;"+
    "self.onmessage = "+webWorkerOnMessage.toString();

var blob;
try {
    blob = new Blob([response], {type: 'application/javascript'});
} catch (e) { // Backwards-compatibility
    window.BlobBuilder = window.BlobBuilder || window.WebKitBlobBuilder || window.MozBlobBuilder;
    blob = new BlobBuilder();
    blob.append(response);
    blob = blob.getBlob();
}
var worker = new Worker(URL.createObjectURL(blob));


var files = evt.target.files; // FileList object    
var chunksize = 1000000; // the chunk size doesn't make a difference
var i = 0, 
    f = files[i],
    chunks = Math.ceil(f.size / chunksize),
    chunkTasks = [],
    startTime = (new Date()).getTime();
worker.onmessage = function(e) {
    // create callback

    for(var j = 0; j < chunks; j++){
        (function(j, f){
            chunkTasks.push(function(next){
                var blob = f.slice(j * chunksize, Math.min((j+1) * chunksize, f.size));
                var reader = new FileReader();

                reader.onload = function(e) {
                    var chunk = e.target.result;
                    worker.onmessage = function(e) {
                        // update callback
                        document.getElementById('num').innerHTML = ""+(j+1)+"/"+chunks;
                        next();
                    };
                    worker.postMessage({type: "update", chunk: chunk});
                };
                reader.readAsArrayBuffer(blob);
            });
        })(j, f);
    }
    series(chunkTasks, function(){
        var elem = document.getElementById("hashValueSplit");
        var telem = document.getElementById("time");
        worker.onmessage = function(e) {
            // finish callback
            elem.value = e.data.hash;
            telem.innerHTML = "in " + Math.ceil(((new Date()).getTime() - startTime) / 1000) + " seconds";
        };
        worker.postMessage({type: "finish"});
    });

    // blocking way ahead...
    if (document.getElementById("singleHash").checked) {
        var reader = new FileReader();

        // Closure to capture the file information.
        reader.onloadend = (function(theFile) {
            function arrayBufferToWordArray(ab) {
                var i8a = new Uint8Array(ab);
                var a = [];
                for (var i = 0; i < i8a.length; i += 4) {
                    a.push(i8a[i] << 24 | i8a[i + 1] << 16 | i8a[i + 2] << 8 | i8a[i + 3]);
                }
                return CryptoJS.lib.WordArray.create(a, i8a.length);
            }
            return function(e) {
                var test = e.target.result;
                var hash = CryptoJS.MD5(arrayBufferToWordArray(test));
                //var hash = "none";
                var elem = document.getElementById("hashValue");
                elem.value = hash;
            };
        })(f);

        // Read in the image file as a data URL.
        reader.readAsArrayBuffer(f);
    }
};
worker.postMessage({type: "create"});

DEMO seems to work for big files, but it takes quite a lot of time. Maybe this can be improved using a faster MD5 implementation. It took around 23 minutes to hash a 3 GB file.

This answer of mine shows an example without webworkers for SHA-256.

这篇关于JavaScript文件哈希值生成部分文件的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆