md5sum 好像作不到這件事,所以寫了一個 Python script:

#!/usr/bin/env python
# $Id: hashdir.py 24 2005-11-08 09:17:44Z yungyuc $

import sys
import os

def getDigestPython( fn ):
    import md5
    chunklen = 1024*4
    m = md5.new()
    f = open( fn, 'rb' )
    while True:
        data = f.read( chunklen )
        m.update( data )
        if len(data) != chunklen:
            f.close()
            break
    digest = ""
    for val in m.digest():
        digest += "%02x" % ord(val)
    return digest

def getDigestMd5sum( fn ):
    import popen2
    stdout, stdin = popen2.popen4( "md5sum -b \"%s\"" % fn )
    digest = stdout.read().split()[0]
    return digest

def main():
    try:
        findpath = sys.argv[1]
        logfn    = sys.argv[2]
    except:
        sys.stdout.write( "Usage: %s <path to hash> <log file>\n" % \
                          os.path.basename(sys.argv[0]) )
        sys.exit(0)
    try:
        f = open( logfn, 'w' )
        f.close()
    except:
        sys.stdout.write( "Unable to open logfile: %s\n" % \
                          logfn )
        sys.exit(1)
    if not os.path.isdir( findpath ):
        sys.stdout.write( "Is not a directory: %s\n" % \
                          findpath )
        sys.exit(2)
    for root, dirs, files in os.walk( findpath ):
        for file in files:
            thisfn = os.path.join( root, file )
            try:
                digest = getDigestPython( thisfn )
            except:
                digest = " "*16
            logmsg = "%s  %s" % (digest, thisfn)
            f = open( logfn, 'a' )
            f.write( "%s\n" % logmsg )
            f.close()
            sys.stdout.write( "%s\n" % logmsg )

if __name__ == '__main__':
    main()

# vim: cino=>4 et nu ts=4 sw=4:

給兩個參數:第一個是要 digest 的目錄,第二個是 digest 結果要存的紀錄檔。這個 script 跑過一遍以後,將來就可以用 md5sum -b -c logfile 來核對檔案的 md5 checksum。

Python 裡的 md5 模組跑得和 md5sum 差不多快。然而一次從檔案讀取的區塊不要太多,4kB 差不多是 optimum。

Posted by yungyuc at 20:33, 2 comments, 0 trackback.
Change to page (10 entries in each page): 1
© hover year to navigate month: powered by django