坚持hashlib状态 [英] Persisting hashlib state

查看:109
本文介绍了坚持hashlib状态的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!

问题描述

我想创建一个 hashlib 实例, update()它,然后在某些地方坚持它的状态办法。后来,我想用这个状态数据重新创建对象,并继续 update()它。最后,我希望获得总数累计运行的 hexdigest()。例如:

 状态持续性必须在多次运行中存活。

import hashlib
m = hashlib.sha1()
m.update('one')
m.update('two')
#以某种方式,在这里坚持m的状态

#later,可能在另一个进程中
#从持久状态重新创建m
m.update('three')
m.update('four')
print m.hexdigest()
#此时,m.hexdigest()应该等于hashlib.sha1()。update('onetwothreefour')。hextdigest()

编辑:

一个在2010年用python做这件事的好方法,并最终在C中编写一个小帮手应用程序来完成这个任务。然而,下面有一些很好的答案,当时我还没有得到或者知道。

你可以做到这一点使用 ctypes 的方式,不需要 C 中的帮助程序: -

> rehash.py

 #! / usr / bin / env python 
$ b $'''使用ctypes和OpenSSL加密库的SHA-256的可恢复实现

由PM 2Ring 2014.11.13编写
'''
$ b $ c from ctypes import *

SHA_LBLOCK = 16
SHA256_DIGEST_LENGTH = 32

class SHA256_CTX(Structure):
$ b(N,c_long),
(Nh,c_long),
(data ,c_long * SHA_LBLOCK),
(num,c_uint),
(md_len,c_uint)
]

HashBuffType = c_ubyte * SHA256_DIGEST_LENGTH

#crypto = cdll.LoadLibrary(libcrypto.so)
crypto = cdll.LoadLibrary(libeay32.dllif os.name ==ntelselibssl.so)

class sha256(object):
digest_size = SHA256_DIGEST_LENGTH
$ b $ def __init __(self,datastr = None):
self.ctx = SHA256_CTX()
crypto.SHA256_Init(byref(self.ctx))
如果datastr:
self.upd ate(datastr)

def update(self,datastr):
crypto.SHA256_Update(byref(self.ctx),datastr,c_int(len(datastr)))

#克隆当前上下文
def _copy_ctx(self):
ctx = SHA256_CTX()
指针(ctx)[0] = self.ctx
返回ctx

def copy(self):
other = sha256()
other.ctx = self._copy_ctx()
返回其他

def摘要(self):
#Preserve上下文以防在散列之前调用
#真的完成,因为SHA256_Final()清除SHA256_CTX
ctx = self._copy_ctx()
hashbuff = HashBuffType()
crypto.SHA256_Final(hashbuff,byref(self.ctx))
self.ctx = ctx
return str(bytearray(hashbuff))

def hexdigest(self):
return self.digest()。encode('hex')

#Tests
def main():
import cPickle
导入hashlib

data =(没有人期待,垃圾虫,拼版!)

printrehash\\\


shaA = sha256(''。join (数据))
print shaA.hexdigest()
print repr(shaA.digest())
printdigest size =,shaA.digest_size
print

shaB = sha256()
shaB.update(data [0])
print shaB.hexdigest()

#Test酸洗
sha_pickle = cPickle .dumps(shaB,-1)
printPickle length:,len(sha_pickle)
shaC = cPickle.loads(sha_pickle)

shaC.update(data [1 ])
print shaC.hexdigest()

#Test拷贝。请注意,可以复制副本
shaD = shaC.copy()

shaC.update(data [2])
print shaC.hexdigest()


#Verify hashlib.sha256()
print\\\
hashlib\\\


shaD = hashlib.sha256(''。join(data))
print shaD.hexdigest()
print repr(shaD.digest())
printdigest size =,shaD.digest_size
print

shaE = hashlib.sha256(data [0])
print shaE.hexdigest()

shaE.update(data [1])$ ​​b $ b print shaE.hexdigest()

#测试复制。请注意hashlib副本不能被腌制
shaF = shaE.copy()
shaF.update(data [2])
print shaF.hexdigest()


if __name__ =='__main__':
main()

resumable_SHA-256.py

 #! / usr / bin / env python 

'''使用OpenSSL加密库的大文件的可恢复SHA-256散列

散列过程可能被Control-C SIGINT)或SIGTERM。
当接收到一个信号时,散列一直持续到
当前块的结尾,然后当前文件位置,总文件大小和$​​ b $ b sha对象被保存到一个文件中。该文件的名称由
形成,将'.hash'添加到被哈希的文件的名称。

只需重新运行该程序即可恢复散列。散列完成后,'.hash'文件将被删除


作者PM 2Ring 2014.11.14
'''

导入cPickle作为pickle
导入os
导入信号
import sys

import rehash

quit = False

blocksize = 1 <16#64kB
blocksperchunk = 1 < 8

chunksize = blocksize * blocksperchunk

def处理程序(signum,frame):
全局退出
打印\\\
Got信号%d,清理向上。 %signum
quit = True


def do_hash(fname,filesize):
hashname = fname +'.hash'
如果os.path。存在(hashname):
with open(hashname,'rb')as f:
pos,fsize,sha = pickle.load(f)
if fsize!= filesize:
打印错误:'%s'的文件大小与'%s'中记录的大小不符'%(fname,hashname)
打印%d!=%d。中止%(fsize, )
exit(1)
else:
pos,fsize,sha = 0,filesize,rehash.sha256()

finished = False
with打开(fname,'rb')作为f:
f.seek(pos)
而不是(退出或完成):
for xrange(blocksperchunk):
block = f.read(blocksize)
if block =='':
finished = True
break
sha.update(block)

pos + =大块
sys.stderr.write(%6.2f %%%d \r%(100.0 * pos / fsize,fsize))
完成或退出:
break

如果退出:
打开(hashname,'wb')作为f:
pickle.dump((pos,fsize,sha),f,-1)
elif os.path.exists(hashname):
os.remove(hashname)

return(not quit),pos,sha.hexdigest()


def main():
如果len(sys.argv)!= 2:
print文件的可恢复的SHA-256哈希。
print用法:\ npython%s filename\\\
%sys.argv [0]
exit(1)

fname = sys.argv [1]
filesize = os.path.getsize(fname)

signal.signal(signal.SIGINT,handler)
signal.signal(signal.SIGTERM,handler)

已完成,pos,hexdigest = do_hash(fname,filesize)
如果完成:
print%s%s%(十六进制,fname)
其他:
print %s'不完整的%s%
打印%s%十六进制
打印%d /%d字节已处理的sha-256哈希值。 %(pos,filesize)


if __name__ =='__main__':
main()

演示

  import rehash 
导入pickle
sha = rehash.sha256(Hello)
s = pickle.dumps(sha.ctx)
sha = rehash.sha256()
sha.ctx = pickle。加载
sha.update(World)
print sha.hexdigest()

输出

  a591a6d40bf420404a011733cfb7b190d62c65bf0bcda32b57b277d9ad9f146e 

注意:我要感谢PM2Ring提供的精彩代码。


I'd like to create a hashlib instance, update() it, then persist its state in some way. Later, I'd like to recreate the object using this state data, and continue to update() it. Finally, I'd like to get the hexdigest() of the total cumulative run of data. State persistence has to survive across multiple runs.

Example:

import hashlib
m = hashlib.sha1()
m.update('one')
m.update('two')
# somehow, persist the state of m here

#later, possibly in another process
# recreate m from the persisted state
m.update('three')
m.update('four')
print m.hexdigest()
# at this point, m.hexdigest() should be equal to hashlib.sha1().update('onetwothreefour').hextdigest()

EDIT:

I did not find a good way to do this with python in 2010 and ended up writing a small helper app in C to accomplish this. However, there are some great answers below that were not available or known to me at the time.

解决方案

You can do it this way using ctypes, no helper app in C is needed:-

rehash.py

#! /usr/bin/env python

''' A resumable implementation of SHA-256 using ctypes with the OpenSSL crypto library

    Written by PM 2Ring 2014.11.13
'''

from ctypes import *

SHA_LBLOCK = 16
SHA256_DIGEST_LENGTH = 32

class SHA256_CTX(Structure):
    _fields_ = [
        ("h", c_long * 8),
        ("Nl", c_long),
        ("Nh", c_long),
        ("data", c_long * SHA_LBLOCK),
        ("num", c_uint),
        ("md_len", c_uint)
    ]

HashBuffType = c_ubyte * SHA256_DIGEST_LENGTH

#crypto = cdll.LoadLibrary("libcrypto.so")
crypto = cdll.LoadLibrary("libeay32.dll" if os.name == "nt" else "libssl.so")

class sha256(object):
    digest_size = SHA256_DIGEST_LENGTH

    def __init__(self, datastr=None):
        self.ctx = SHA256_CTX()
        crypto.SHA256_Init(byref(self.ctx))
        if datastr:
            self.update(datastr)

    def update(self, datastr):
        crypto.SHA256_Update(byref(self.ctx), datastr, c_int(len(datastr)))

    #Clone the current context
    def _copy_ctx(self):
        ctx = SHA256_CTX()
        pointer(ctx)[0] = self.ctx
        return ctx

    def copy(self):
        other = sha256()
        other.ctx = self._copy_ctx()
        return other

    def digest(self):
        #Preserve context in case we get called before hashing is
        # really finished, since SHA256_Final() clears the SHA256_CTX
        ctx = self._copy_ctx()
        hashbuff = HashBuffType()
        crypto.SHA256_Final(hashbuff, byref(self.ctx))
        self.ctx = ctx
        return str(bytearray(hashbuff))

    def hexdigest(self):
        return self.digest().encode('hex')

#Tests
def main():
    import cPickle
    import hashlib

    data = ("Nobody expects ", "the spammish ", "imposition!")

    print "rehash\n"

    shaA = sha256(''.join(data))
    print shaA.hexdigest()
    print repr(shaA.digest())
    print "digest size =", shaA.digest_size
    print

    shaB = sha256()
    shaB.update(data[0])
    print shaB.hexdigest()

    #Test pickling
    sha_pickle = cPickle.dumps(shaB, -1)
    print "Pickle length:", len(sha_pickle)
    shaC = cPickle.loads(sha_pickle)

    shaC.update(data[1])
    print shaC.hexdigest()

    #Test copying. Note that copy can be pickled
    shaD = shaC.copy()

    shaC.update(data[2])
    print shaC.hexdigest()


    #Verify against hashlib.sha256()
    print "\nhashlib\n"

    shaD = hashlib.sha256(''.join(data))
    print shaD.hexdigest()
    print repr(shaD.digest())
    print "digest size =", shaD.digest_size
    print

    shaE = hashlib.sha256(data[0])
    print shaE.hexdigest()

    shaE.update(data[1])
    print shaE.hexdigest()

    #Test copying. Note that hashlib copy can NOT be pickled
    shaF = shaE.copy()
    shaF.update(data[2])
    print shaF.hexdigest()


if __name__ == '__main__':
    main()

resumable_SHA-256.py

#! /usr/bin/env python

''' Resumable SHA-256 hash for large files using the OpenSSL crypto library

    The hashing process may be interrupted by Control-C (SIGINT) or SIGTERM.
    When a signal is received, hashing continues until the end of the
    current chunk, then the current file position, total file size, and
    the sha object is saved to a file. The name of this file is formed by
    appending '.hash' to the name of the file being hashed.

    Just re-run the program to resume hashing. The '.hash' file will be deleted
    once hashing is completed.

    Written by PM 2Ring 2014.11.14
'''

import cPickle as pickle
import os
import signal
import sys

import rehash

quit = False

blocksize = 1<<16   # 64kB
blocksperchunk = 1<<8

chunksize = blocksize * blocksperchunk

def handler(signum, frame):
    global quit
    print "\nGot signal %d, cleaning up." % signum
    quit = True


def do_hash(fname, filesize):
    hashname = fname + '.hash'
    if os.path.exists(hashname):
        with open(hashname, 'rb') as f:
            pos, fsize, sha = pickle.load(f)
        if fsize != filesize:
            print "Error: file size of '%s' doesn't match size recorded in '%s'" % (fname, hashname)
            print "%d != %d. Aborting" % (fsize, filesize)
            exit(1)
    else:
        pos, fsize, sha = 0, filesize, rehash.sha256()

    finished = False
    with open(fname, 'rb') as f:
        f.seek(pos)
        while not (quit or finished):
            for _ in xrange(blocksperchunk):
                block = f.read(blocksize)
                if block == '':
                    finished = True
                    break
                sha.update(block)

            pos += chunksize
            sys.stderr.write(" %6.2f%% of %d\r" % (100.0 * pos / fsize, fsize))
            if finished or quit:
                break

    if quit:
        with open(hashname, 'wb') as f:
            pickle.dump((pos, fsize, sha), f, -1)
    elif os.path.exists(hashname):
        os.remove(hashname)

    return (not quit), pos, sha.hexdigest()


def main():
    if len(sys.argv) != 2:
        print "Resumable SHA-256 hash of a file."
        print "Usage:\npython %s filename\n" % sys.argv[0]
        exit(1)

    fname = sys.argv[1]
    filesize = os.path.getsize(fname)

    signal.signal(signal.SIGINT, handler)
    signal.signal(signal.SIGTERM, handler)

    finished, pos, hexdigest = do_hash(fname, filesize)
    if finished:
        print "%s  %s" % (hexdigest, fname)
    else:
        print "sha-256 hash of '%s' incomplete" % fname
        print "%s" % hexdigest
        print "%d / %d bytes processed." % (pos, filesize)


if __name__ == '__main__':
    main()

demo

import rehash
import pickle
sha=rehash.sha256("Hello ")
s=pickle.dumps(sha.ctx)
sha=rehash.sha256()
sha.ctx=pickle.loads(s)
sha.update("World")
print sha.hexdigest()

output

a591a6d40bf420404a011733cfb7b190d62c65bf0bcda32b57b277d9ad9f146e

Note: I would like to thank PM2Ring for his wonderful code.

这篇关于坚持hashlib状态的文章就介绍到这了,希望我们推荐的答案对大家有所帮助,也希望大家多多支持IT屋!

查看全文
登录 关闭
扫码关注1秒登录
发送“验证码”获取 | 15天全站免登陆