通过 samba 将文件从 MacOS 系统复制到 Windows 共享后,我得到了如下文件名:
Сђ•вл
К†в†ЂЃ¶≠л• Ђ®бвл.pdf
П†бѓЃав.doc
正常情况下它们应该是这样的:
Сметы
Каталожные листы.pdf
Паспорт.doc
在某些情况下,我在名称末尾也得到了 U+F028 符号:
Новые
有没有办法在 Windows 机器上自动识别并转换此类文件?
答案1
我最终编写了自己的脚本...
代码很糟糕并且没有经过太多测试,但就我的情况而言是可行的。
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
(Not) Simple MacCyrillic -> IBM866 converting script.
"""
import sys
if sys.version_info.major == 2:
print("Please run it with Python 3.4 or better.")
exit()
import os
if os.name != 'posix':
print("This script works only on GNU/Linux.")
exit(1)
import subprocess
BadChars = ['©', '•', '≠', '£', '¢', '†', 'ѓ', 'Ѓ', 'Ђ', '§', '¶', 'ђ', '®', '°', 'Ґ']
replaceTable = {
'©': 'й',
'ж': 'ц',
'г': 'у',
'™': 'к',
'•': 'е',
'≠': 'н',
'£': 'г',
'и': 'ш',
'й': 'щ',
'І': 'з',
'е': 'х',
'к': 'ъ',
'д': 'ф',
'л': 'ы',
'¢': 'в',
'†': 'а',
'ѓ': 'п',
'а': 'р',
'Ѓ': 'о',
'Ђ': 'л',
'§': 'д',
'¶': 'ж',
'н': 'э',
'п': 'я',
'з': 'ч',
'б': 'с',
'ђ': 'м',
'®': 'и',
'в': 'т',
'м': 'ь',
'°': 'б',
'о': 'ю',
'Ґ': 'в'
}
def check_all_path_string(path, BadChars):
for symbol in BadChars:
if symbol in path:
return True
return False
def check_part_of_name(part_of_name, BadChars):
for letter in part_of_name:
if letter in BadChars:
return True
return False
def replace_symbols(part_of_name, replaceTable):
changed_part_of_name = ""
for sym in part_of_name:
if sym in list(replaceTable.keys()):
changed_part_of_name += replaceTable[sym]
else:
changed_part_of_name += sym
return changed_part_of_name
def check_part_of_bad_path(bad_file_name_list, BadChars, replaceTable):
replaced_path = ""
for part_of_name in bad_file_name_list:
if not check_part_of_name(part_of_name, BadChars):
replaced_path += "/" + part_of_name
else:
replaced_part_of_name = replace_symbols(part_of_name, replaceTable)
replaced_path += "/" + replaced_part_of_name
if "//" in replaced_path:
replaced_path = replaced_path.replace("//", "/")
return replaced_path
def main_validation(files, BadChars, replaceTable):
validated_list = []
for file_name in files:
if check_all_path_string(file_name, BadChars):
bad_file_name = file_name
bad_file_name_list = bad_file_name.split("/")
replaced_path = check_part_of_bad_path(bad_file_name_list,
BadChars,
replaceTable)
validated_list.append(replaced_path)
else:
validated_list.append(file_name)
return validated_list
def grab_files(folder, find_type):
files = subprocess.run(["find", folder, "-type", find_type], stdout=subprocess.PIPE).stdout.decode('utf-8')
files = files.splitlines()
return files
if __name__ == "__main__":
folder = os.getcwd()
find_type = "d" # 'd' for directories or 'f' for files
print("Grab files from", folder)
files = grab_files(folder, find_type)
print("Starting validation...")
validated = main_validation(files, BadChars, replaceTable)
print("Computing diff... (this can take a long time)")
files_diff = [elem for elem in files if elem not in validated ]
validated_diff = [elem for elem in validated if elem not in files ]
print("Overall source count:", len(files))
print("Validated diff:", len(validated_diff))
i = 0
while i < len(validated_diff):
print(files_diff[i], '->', validated_diff[i])
i = i + 1
print("\nProceed?")
choice = input("[Y]es | [N]o > ")
if choice == 'y' or choice == 'Y':
i = 0
while i < len(validated_diff):
source = '"' + files_diff[i] + '"'
dest = '"' + validated_diff[i] + '"'
os.system('mv -i ' + source + ' ' + dest)
i = i + 1
elif choice == 'n' or choice == 'N':
print("Sure, it's okay. Thanks for playing!")
exit()
else:
print("Sorry, I don't understand you.")
print("Assuming as negative, exiting...")
exit()