因為 mldonkey 需要所寫的檔名轉換程式,剛剛又加了些功能。
可以在任兩種編碼間轉換檔名;可以把 %hh 和 _000 這種格式的數字編碼轉換成 UTF-8;可以只轉換編碼,不改變檔名。
#!/usr/bin/env python # -*- coding: UTF-8 -*- """$Id: fnconv.py,v 1.4 2005/06/02 13:56:21 yungyuc Exp $ 把檔案系統內的檔名 從某個 encoding 轉換到另一個 encoding Written by Yung-Yu Chen <yyc@seety.org> copyright 2004, All Rights Reserved.""" #################################### # import sys, os, codecs # #################################### def conv( str, code_from, code_to ): #################################### # decode = codecs.getdecoder( code_from ) encode = codecs.getencoder( code_to ) return encode( decode( str )[0] )[0] # #################################### def ndecode( encoded, leading, base ): #################################### # """解開數字編碼""" # 根據基底決定編碼步進的數目 if base == 10: step = 3 elif base == 16: step = 2 else: raise RuntimeError # 處理編碼 res = ""; i = 0 while i < len(encoded): if encoded[i] != leading: res += encoded[i] i+=1 else: try: res += chr( int(encoded[i+1:i+(1+step)], base) ) i+=(1+step) except: res += encoded[i] i+=1 return res # #################################### def getnewname( str, options ): #################################### # # 判定要處理的字串;調整 encoding 參數 if options.pdecode: # 對用百分比符號 (``%'') 編成 16 進位數字的字元進行解碼 str = ndecode( str, "%%", 16 ) options.code_from = "UTF-8" elif options.udecode: # 對用底線符號 (``_'') 編成 10 進位數字的字元進行解碼 str = ndecode( str, "_", 10 ) options.code_from = "UTF-8" # 進行 encoding 轉換 str = conv( str, options.code_from, options.code_to ) # 傳回結果 return str # #################################### def process_string( options, args ): #################################### # str = getnewname( options.stringonly, options ) sys.stdout.write( "%s\n" % str ) # #################################### def process_filename( options, args ): #################################### # from glob import glob # 如果沒有指定 filter;預設為 "*" if len(args) < 1: args.append( "*" ) # 過濾要處理的檔名 fns = [] for filter in args: fns.extend( glob( filter ) ) # 處理所有過濾出的檔名 for fn in fns: # 列出 encoding 轉換前的檔名 sys.stdout.write( "%s -> " % fn ) # 取得轉換後的名稱 fn_cved = getnewname( fn, options ) # 列出 encoding 轉換後的檔名 (並詢問是否接受) sys.stdout.write( "%s [y/N]" % fn_cved ) # 為了確認是否接受 encoding 轉換後的字串 # 讀取回答 tokens = sys.stdin.readline().split() if len(tokens) > 0: answer = tokens[0] else : answer = 'n' # 如果回答 [yY] 才用 encoding 轉換後的字串更改檔名 if answer[0] == 'y' or answer[0] == 'Y': os.renames( fn, fn_cved ) sys.stdout.write( "%s converted.\n" % fn_cved ) # #################################### def main(): #################################### # # 取得命令列參數 op, options, args = process_argument() # 判斷應處理檔 if options.stringonly != "": process_string( options, args ) else: process_filename( options, args ) # #################################### def process_argument(): #################################### # from optparse import OptionParser, OptionGroup op = OptionParser( usage="usage: %prog [-p] [-u] -f <encoding> -t <encoding> \n" " -s <string>", version="%prog $Revision: 1.4 $" ) op.add_option( "-p", action="store_true", dest="pdecode", default=False, help="percetage encoded" ) op.add_option( "-u", action="store_true", dest="udecode", default=False, help="underline encoded" ) op.add_option( "-s", action="store", type="string", dest="stringonly", default="", help="decode/encode supplied string only" ) op.add_option( "-f", action="store", type="string", dest="code_from", default='big5hkscs', help="python encoding to convert from " "(default 'big5hkscs')" ) op.add_option( "-t", action="store", type="string", dest="code_to", default='utf_8', help="python encoding to convert to " "(default 'utf_8')" ) options, args = op.parse_args() return op, options, args # #################################### if __name__ == '__main__': main()
Posted by yungyuc
at 22:05,
0 comment,
0 trackback.
Add a trackback
Please send trackback to: http://blog.seety.org/everydaywork/2005/6/2/343/trackback/.
Add a comment