因為 mldonkey 需要所寫的檔名轉換程式,剛剛又加了些功能。

可以在任兩種編碼間轉換檔名;可以把 %hh_000 這種格式的數字編碼轉換成 UTF-8;可以只轉換編碼,不改變檔名。

#!/usr/bin/env python
# -*- coding: UTF-8 -*-
"""$Id: fnconv.py,v 1.4 2005/06/02 13:56:21 yungyuc Exp $

把檔案系統內的檔名
從某個 encoding 轉換到另一個 encoding

Written by Yung-Yu Chen <yyc@seety.org>
copyright 2004, All Rights Reserved."""

####################################
#
import sys, os, codecs
#
####################################

def conv( str, code_from, code_to ):
####################################
#
  decode = codecs.getdecoder( code_from )
  encode = codecs.getencoder( code_to )
  return encode( decode( str )[0] )[0]
#
####################################

def ndecode( encoded, leading, base ):
####################################
#
  """解開數字編碼"""
  # 根據基底決定編碼步進的數目
  if   base == 10: step = 3
  elif base == 16: step = 2
  else: raise RuntimeError
  # 處理編碼
  res = ""; i = 0
  while i < len(encoded):
    if encoded[i] != leading:
      res += encoded[i]
      i+=1
    else:
      try:
        res += chr( int(encoded[i+1:i+(1+step)], base) )
        i+=(1+step)
      except:
        res += encoded[i]
        i+=1
  return res
#
####################################

def getnewname( str, options ):
####################################
#
  # 判定要處理的字串;調整 encoding 參數
  if options.pdecode:
    # 對用百分比符號 (``%'') 編成 16 進位數字的字元進行解碼
    str = ndecode( str, "%%", 16 )
    options.code_from = "UTF-8"
  elif options.udecode:
    # 對用底線符號 (``_'') 編成 10 進位數字的字元進行解碼
    str = ndecode( str, "_", 10 )
    options.code_from = "UTF-8"
  # 進行 encoding 轉換
  str = conv( str, options.code_from, options.code_to )
  # 傳回結果
  return str
#
####################################

def process_string( options, args ):
####################################
#
  str = getnewname( options.stringonly, options )
  sys.stdout.write( "%s\n" % str )
#
####################################

def process_filename( options, args ):
####################################
#
  from glob import glob
  # 如果沒有指定 filter;預設為 "*"
  if len(args) < 1: args.append( "*" )
  # 過濾要處理的檔名
  fns = []
  for filter in args: fns.extend( glob( filter ) )
  # 處理所有過濾出的檔名
  for fn in fns:
    # 列出 encoding 轉換前的檔名
    sys.stdout.write( "%s -> " % fn )
    # 取得轉換後的名稱
    fn_cved = getnewname( fn, options )
    # 列出 encoding 轉換後的檔名 (並詢問是否接受)
    sys.stdout.write( "%s [y/N]" % fn_cved )
    # 為了確認是否接受 encoding 轉換後的字串
    #  讀取回答
    tokens = sys.stdin.readline().split()
    if len(tokens) > 0: answer = tokens[0]
    else              : answer = 'n'
    # 如果回答 [yY] 才用 encoding 轉換後的字串更改檔名
    if answer[0] == 'y' or answer[0] == 'Y':
      os.renames( fn, fn_cved )
      sys.stdout.write( "%s converted.\n" % fn_cved )
#
####################################

def main():
####################################
#
  # 取得命令列參數
  op, options, args = process_argument()
  # 判斷應處理檔
  if options.stringonly != "":
    process_string( options, args )
  else:
    process_filename( options, args )
#
####################################

def process_argument():
####################################
#
  from optparse import OptionParser, OptionGroup
  op = OptionParser(
        usage="usage: %prog [-p] [-u] -f <encoding> -t <encoding> \n"
              "        -s <string>",
        version="%prog $Revision: 1.4 $" )
  op.add_option( "-p", action="store_true",
                 dest="pdecode", default=False,
                 help="percetage encoded" )
  op.add_option( "-u", action="store_true",
                 dest="udecode", default=False,
                 help="underline encoded" )
  op.add_option( "-s", action="store", type="string",
                 dest="stringonly", default="",
                 help="decode/encode supplied string only" )
  op.add_option( "-f", action="store", type="string",
                 dest="code_from", default='big5hkscs',
                 help="python encoding to convert from "
                      "(default 'big5hkscs')" )
  op.add_option( "-t", action="store", type="string",
                 dest="code_to", default='utf_8',
                 help="python encoding to convert to "
                      "(default 'utf_8')" )
  options, args = op.parse_args()
  return op, options, args
#
####################################

if __name__ == '__main__':
  main()
Posted by yungyuc at 22:05, 0 comment, 0 trackback.
Navigate
Add a trackback
Add a comment

Your name. (required)

Your personal website. (optional)

Your email address. Will not show in page. (suggested, but optional)

Text format is "Plain Text".

Enter "VySpD"
© hover year to navigate month: powered by django