md5sum 好像作不到這件事,所以寫了一個 Python script:
#!/usr/bin/env python # $Id: hashdir.py 24 2005-11-08 09:17:44Z yungyuc $ import sys import os def getDigestPython( fn ): import md5 chunklen = 1024*4 m = md5.new() f = open( fn, 'rb' ) while True: data = f.read( chunklen ) m.update( data ) if len(data) != chunklen: f.close() break digest = "" for val in m.digest(): digest += "%02x" % ord(val) return digest def getDigestMd5sum( fn ): import popen2 stdout, stdin = popen2.popen4( "md5sum -b \"%s\"" % fn ) digest = stdout.read().split()[0] return digest def main(): try: findpath = sys.argv[1] logfn = sys.argv[2] except: sys.stdout.write( "Usage: %s <path to hash> <log file>\n" % \ os.path.basename(sys.argv[0]) ) sys.exit(0) try: f = open( logfn, 'w' ) f.close() except: sys.stdout.write( "Unable to open logfile: %s\n" % \ logfn ) sys.exit(1) if not os.path.isdir( findpath ): sys.stdout.write( "Is not a directory: %s\n" % \ findpath ) sys.exit(2) for root, dirs, files in os.walk( findpath ): for file in files: thisfn = os.path.join( root, file ) try: digest = getDigestPython( thisfn ) except: digest = " "*16 logmsg = "%s %s" % (digest, thisfn) f = open( logfn, 'a' ) f.write( "%s\n" % logmsg ) f.close() sys.stdout.write( "%s\n" % logmsg ) if __name__ == '__main__': main() # vim: cino=>4 et nu ts=4 sw=4:
給兩個參數:第一個是要 digest 的目錄,第二個是 digest 結果要存的紀錄檔。這個 script 跑過一遍以後,將來就可以用 md5sum -b -c logfile 來核對檔案的 md5 checksum。
Python 裡的 md5 模組跑得和 md5sum 差不多快。然而一次從檔案讀取的區塊不要太多,4kB 差不多是 optimum。