"""File hashing functionsUses hashlib to perform either md5 or sha1 hashing in a memory controlled manner,with a progress bar for larger files."""importhashlibfrompathlibimportPathimportnumpyasnpfromtqdmimporttqdmBUF_SIZE=2**28# 256 megs
[docs]defblake2b(file_path,*args,**kwargs):""" Computes blake2b hash in a memory reasoned way blake2b_hash = hashfile.blake2b(file_path) """return_hash_file(file_path,hashlib.blake2b(),*args,**kwargs)
[docs]defmd5(file_path,*args,**kwargs):""" Computes md5 hash in a memory reasoned way md5_hash = hashfile.md5(file_path) """return_hash_file(file_path,hashlib.md5(),*args,**kwargs)
[docs]defsha1(file_path,*args,**kwargs):""" Computes sha1 hash in a memory reasoned way sha1_hash = hashfile.sha1(file_path) """return_hash_file(file_path,hashlib.sha1(),*args,**kwargs)
def_hash_file(file_path,hash_obj,progress_bar=None):file_path=Path(file_path)file_size=file_path.stat().st_size# by default prints a progress bar only for files above 512 Mbifprogress_barisNone:progress_bar=file_size>(512*1024*1024)b=bytearray(BUF_SIZE)mv=memoryview(b)pbar=tqdm(total=np.ceil(file_size/BUF_SIZE),disable=notprogress_bar)withopen(file_path,'rb',buffering=0)asf:forniniter(lambda:f.readinto(mv),0):hash_obj.update(mv[:n])pbar.update(1)pbar.close()returnhash_obj.hexdigest()