File: PC-Phone USB Sync/normalize-unicode-filenames.py
#!/usr/bin/env python3 """ ====================================================================================== normalize-unicode-filenames.py - a PC-Phone USB Sync app utility script. This script has the same terms of use as that of the app. It's available online at quixotely.com/PC-Phone USB Sync/normalize-unicode-filenames.py. Convert all filenames in a folder to NFC or NFD Unicode form as needed. Run before processing content on platforms that require specific Unicode code-point forms for filename characters that have multiple variants. In particular, macOS may not recognize NFD names on exFAT that originated on Windows. If these names produce error message in logfiles on macOS, run this on Windows to convert NFD filenames to NFC form before propagating them to macOS. You can't fix the NFD filenames by running this script on macOS itself, because such filenames are unusable there: they show up in os.listdir() results but fail when used in any other way. Run with a command line like this (use "py -3" for "python3" on Windows): python3 folderroot (NFC|NFD) listonly? Where folderroot is absolute or relative, and listonly is any value. Example: py -3 D:\MY-STUFF NFC This converts all NFD filenames with variant characters in MY-STUFF to NFC Unicode code-point form. Pass listonly to print such names without renaming. This can be run in Terminal on Linux, and Command Prompt, Power Shell, and WSL on Windows. You must have a Python 3.x installed where you run this. Unicode variants, like symlinks, are an interoperability hurdle; they can be repaired by remaining, much like the app's NAME for nonportable filenames, but crop up too rarely to warrant in-app support. See also: example ahead. ====================================================================================== """ import os, sys from unicodedata import normalize # command-line args try: FOLDERROOT = sys.argv[1] # abs or rel: folder to walk assert os.path.isdir(FOLDERROOT) NORMTYPETO = sys.argv[2] # what form to convert to assert NORMTYPETO in ('NFC', 'NFD') LISTONLY = len(sys.argv) > 3 # any arg3 means listonly except: print(sys.exc_info()) print('Usage: python3 folderroot (NFC|NFD) listonly?') print('Where folderroot is absolute or relative, listonly is any value.') sys.exit(1) # normalize one name def normone(name, dirhere): normname = normalize(NORMTYPETO, name) if name == normname: return 0 else: print('In', dirhere, '\n\t', name.encode('utf8'), '=>', normname.encode('utf8')) if not LISTONLY: namepath = os.path.join(dirhere, name) normpath = os.path.join(dirhere, normname) if os.path.exists(normpath): print('**Cannot rename: normalzed form exists') else: # ==> Fails on macOS: FileNotFoundError: [Errno 2] No such file or directory # ==> Must run on Windows, not macOS os.rename(namepath, normpath) return 1 # bottom up to allow folder renames walker = os.walk(FOLDERROOT, topdown=False, followlinks=False) # find+mod? file, symlink, folder names numnorm = 0 for (dirhere, subshere, fileshere) in walker: for name in fileshere: numnorm += normone(name, dirhere) # symlinks in subs of files for name in subshere: numnorm += normone(name, dirhere) closer = 'Done: number filenames with unnormalized variants %s: %s' print(closer % ('found' if LISTONLY else 'renamed', numnorm)) r""" ====================================================================================== Example Usage: on Windows, fix 3 NFD names that are not recogniced by macOS on eFAT C:\Users\me> py -3 normalize-unicode-filenames.py D:\MY-STUFF\unicode NFC - In D:\MY-STUFF\unicode\_thumbspage b'pymailgui-spA\xcc\x88A\xcc\x88A\xcc\x88m.png' => b'pymailgui-sp\xc3\x84\xc3\x84\xc3\x84m.png' In D:\MY-STUFF\unicode\_thumbspage b'pymailgui-spA\xcc\x88A\xcc\x88A\xcc\x88m.png.html' => b'pymailgui-sp\xc3\x84\xc3\x84\xc3\x84m.png.html' In D:\MY-STUFF\unicode b'pymailgui-spA\xcc\x88A\xcc\x88A\xcc\x88m.png' => b'pymailgui-sp\xc3\x84\xc3\x84\xc3\x84m.png' Done: number filenames with unnormalized variants found: 3 C:\Users\me> py -3 normalize-unicode-filenames.py D:\MY-STUFF\unicode NFD - Done: number filenames with unnormalized variants found: 0 C:\Users\me> py -3 normalize-unicode-filenames.py D:\MY-STUFF\unicode NFC In D:\MY-STUFF\unicode\_thumbspage b'pymailgui-spA\xcc\x88A\xcc\x88A\xcc\x88m.png' => b'pymailgui-sp\xc3\x84\xc3\x84\xc3\x84m.png' In D:\MY-STUFF\unicode\_thumbspage b'pymailgui-spA\xcc\x88A\xcc\x88A\xcc\x88m.png.html' => b'pymailgui-sp\xc3\x84\xc3\x84\xc3\x84m.png.html' In D:\MY-STUFF\unicode b'pymailgui-spA\xcc\x88A\xcc\x88A\xcc\x88m.png' => b'pymailgui-sp\xc3\x84\xc3\x84\xc3\x84m.png' Done: number filenames with unnormalized variants renamed: 3 ====================================================================================== """