md5sum 好像作不到這件事,所以寫了一個 Python script:
#!/usr/bin/env python
# $Id: hashdir.py 24 2005-11-08 09:17:44Z yungyuc $
import sys
import os
def getDigestPython( fn ):
import md5
chunklen = 1024*4
m = md5.new()
f = open( fn, 'rb' )
while True:
data = f.read( chunklen )
m.update( data )
if len(data) != chunklen:
f.close()
break
digest = ""
for val in m.digest():
digest += "%02x" % ord(val)
return digest
def getDigestMd5sum( fn ):
import popen2
stdout, stdin = popen2.popen4( "md5sum -b \"%s\"" % fn )
digest = stdout.read().split()[0]
return digest
def main():
try:
findpath = sys.argv[1]
logfn = sys.argv[2]
except:
sys.stdout.write( "Usage: %s <path to hash> <log file>\n" % \
os.path.basename(sys.argv[0]) )
sys.exit(0)
try:
f = open( logfn, 'w' )
f.close()
except:
sys.stdout.write( "Unable to open logfile: %s\n" % \
logfn )
sys.exit(1)
if not os.path.isdir( findpath ):
sys.stdout.write( "Is not a directory: %s\n" % \
findpath )
sys.exit(2)
for root, dirs, files in os.walk( findpath ):
for file in files:
thisfn = os.path.join( root, file )
try:
digest = getDigestPython( thisfn )
except:
digest = " "*16
logmsg = "%s %s" % (digest, thisfn)
f = open( logfn, 'a' )
f.write( "%s\n" % logmsg )
f.close()
sys.stdout.write( "%s\n" % logmsg )
if __name__ == '__main__':
main()
# vim: cino=>4 et nu ts=4 sw=4:
給兩個參數:第一個是要 digest 的目錄,第二個是 digest 結果要存的紀錄檔。這個 script 跑過一遍以後,將來就可以用 md5sum -b -c logfile 來核對檔案的 md5 checksum。
Python 裡的 md5 模組跑得和 md5sum 差不多快。然而一次從檔案讀取的區塊不要太多,4kB 差不多是 optimum。

