因為 mldonkey 需要所寫的檔名轉換程式,剛剛又加了些功能。
可以在任兩種編碼間轉換檔名;可以把 %hh 和 _000 這種格式的數字編碼轉換成 UTF-8;可以只轉換編碼,不改變檔名。
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""$Id: fnconv.py,v 1.4 2005/06/02 13:56:21 yungyuc Exp $
把檔案系統內的檔名
從某個 encoding 轉換到另一個 encoding
Written by Yung-Yu Chen <yyc@seety.org>
copyright 2004, All Rights Reserved."""
####################################
#
import sys, os, codecs
#
####################################
def conv( str, code_from, code_to ):
####################################
#
decode = codecs.getdecoder( code_from )
encode = codecs.getencoder( code_to )
return encode( decode( str )[0] )[0]
#
####################################
def ndecode( encoded, leading, base ):
####################################
#
"""解開數字編碼"""
# 根據基底決定編碼步進的數目
if base == 10: step = 3
elif base == 16: step = 2
else: raise RuntimeError
# 處理編碼
res = ""; i = 0
while i < len(encoded):
if encoded[i] != leading:
res += encoded[i]
i+=1
else:
try:
res += chr( int(encoded[i+1:i+(1+step)], base) )
i+=(1+step)
except:
res += encoded[i]
i+=1
return res
#
####################################
def getnewname( str, options ):
####################################
#
# 判定要處理的字串;調整 encoding 參數
if options.pdecode:
# 對用百分比符號 (``%'') 編成 16 進位數字的字元進行解碼
str = ndecode( str, "%%", 16 )
options.code_from = "UTF-8"
elif options.udecode:
# 對用底線符號 (``_'') 編成 10 進位數字的字元進行解碼
str = ndecode( str, "_", 10 )
options.code_from = "UTF-8"
# 進行 encoding 轉換
str = conv( str, options.code_from, options.code_to )
# 傳回結果
return str
#
####################################
def process_string( options, args ):
####################################
#
str = getnewname( options.stringonly, options )
sys.stdout.write( "%s\n" % str )
#
####################################
def process_filename( options, args ):
####################################
#
from glob import glob
# 如果沒有指定 filter;預設為 "*"
if len(args) < 1: args.append( "*" )
# 過濾要處理的檔名
fns = []
for filter in args: fns.extend( glob( filter ) )
# 處理所有過濾出的檔名
for fn in fns:
# 列出 encoding 轉換前的檔名
sys.stdout.write( "%s -> " % fn )
# 取得轉換後的名稱
fn_cved = getnewname( fn, options )
# 列出 encoding 轉換後的檔名 (並詢問是否接受)
sys.stdout.write( "%s [y/N]" % fn_cved )
# 為了確認是否接受 encoding 轉換後的字串
# 讀取回答
tokens = sys.stdin.readline().split()
if len(tokens) > 0: answer = tokens[0]
else : answer = 'n'
# 如果回答 [yY] 才用 encoding 轉換後的字串更改檔名
if answer[0] == 'y' or answer[0] == 'Y':
os.renames( fn, fn_cved )
sys.stdout.write( "%s converted.\n" % fn_cved )
#
####################################
def main():
####################################
#
# 取得命令列參數
op, options, args = process_argument()
# 判斷應處理檔
if options.stringonly != "":
process_string( options, args )
else:
process_filename( options, args )
#
####################################
def process_argument():
####################################
#
from optparse import OptionParser, OptionGroup
op = OptionParser(
usage="usage: %prog [-p] [-u] -f <encoding> -t <encoding> \n"
" -s <string>",
version="%prog $Revision: 1.4 $" )
op.add_option( "-p", action="store_true",
dest="pdecode", default=False,
help="percetage encoded" )
op.add_option( "-u", action="store_true",
dest="udecode", default=False,
help="underline encoded" )
op.add_option( "-s", action="store", type="string",
dest="stringonly", default="",
help="decode/encode supplied string only" )
op.add_option( "-f", action="store", type="string",
dest="code_from", default='big5hkscs',
help="python encoding to convert from "
"(default 'big5hkscs')" )
op.add_option( "-t", action="store", type="string",
dest="code_to", default='utf_8',
help="python encoding to convert to "
"(default 'utf_8')" )
options, args = op.parse_args()
return op, options, args
#
####################################
if __name__ == '__main__':
main()
Posted by yungyuc
at 22:05,
0 comment,
0 trackback.
Add a trackback
Please send trackback to: http://blog.seety.org/everydaywork/2005/6/2/343/trackback/.
Add a comment

