nzbToMedia/core/utils/encoding.py
2020-01-05 13:39:23 +13:00

130 lines
4.7 KiB
Python

from __future__ import (
absolute_import,
division,
print_function,
unicode_literals,
)
import os
from six import text_type
from six import PY2
import core
from core import logger
if not PY2:
from builtins import bytes
def char_replace(name_in):
# Special character hex range:
# CP850: 0x80-0xA5 (fortunately not used in ISO-8859-15)
# UTF-8: 1st hex code 0xC2-0xC3 followed by a 2nd hex code 0xA1-0xFF
# ISO-8859-15: 0xA6-0xFF
# The function will detect if Name contains a special character
# If there is special character, detects if it is a UTF-8, CP850 or ISO-8859-15 encoding
encoded = False
encoding = None
if isinstance(name_in, text_type):
return encoded, name_in
if PY2:
name = name_in
for Idx in range(len(name)):
# print('Trying to intuit the encoding')
# /!\ detection is done 2char by 2char for UTF-8 special character
if (len(name) != 1) & (Idx < (len(name) - 1)):
# Detect UTF-8
if ((name[Idx] == '\xC2') | (name[Idx] == '\xC3')) & (
(name[Idx + 1] >= '\xA0') & (name[Idx + 1] <= '\xFF')):
encoding = 'utf-8'
break
# Detect CP850
elif (name[Idx] >= '\x80') & (name[Idx] <= '\xA5'):
encoding = 'cp850'
break
# Detect ISO-8859-15
elif (name[Idx] >= '\xA6') & (name[Idx] <= '\xFF'):
encoding = 'iso-8859-15'
break
else:
# Detect CP850
if (name[Idx] >= '\x80') & (name[Idx] <= '\xA5'):
encoding = 'cp850'
break
# Detect ISO-8859-15
elif (name[Idx] >= '\xA6') & (name[Idx] <= '\xFF'):
encoding = 'iso-8859-15'
break
else:
name = bytes(name_in)
for Idx in range(len(name)):
# print('Trying to intuit the encoding')
# /!\ detection is done 2char by 2char for UTF-8 special character
if (len(name) != 1) & (Idx < (len(name) - 1)):
# Detect UTF-8
if ((name[Idx] == 0xC2) | (name[Idx] == 0xC3)) & (
(name[Idx + 1] >= 0xA0) & (name[Idx + 1] <= 0xFF)):
encoding = 'utf-8'
break
# Detect CP850
elif (name[Idx] >= 0x80) & (name[Idx] <= 0xA5):
encoding = 'cp850'
break
# Detect ISO-8859-15
elif (name[Idx] >= 0xA6) & (name[Idx] <= 0xFF):
encoding = 'iso-8859-15'
break
else:
# Detect CP850
if (name[Idx] >= 0x80) & (name[Idx] <= 0xA5):
encoding = 'cp850'
break
# Detect ISO-8859-15
elif (name[Idx] >= 0xA6) & (name[Idx] <= 0xFF):
encoding = 'iso-8859-15'
break
if encoding:
encoded = True
name = name.decode(encoding)
elif not PY2:
name = name.decode()
return encoded, name
def convert_to_ascii(input_name, dir_name):
ascii_convert = int(core.CFG['ASCII']['convert'])
if ascii_convert == 0 or os.name == 'nt': # just return if we don't want to convert or on windows os and '\' is replaced!.
return input_name, dir_name
encoded, input_name = char_replace(input_name)
directory, base = os.path.split(dir_name)
if not base: # ended with '/'
directory, base = os.path.split(directory)
encoded, base2 = char_replace(base)
if encoded:
dir_name = os.path.join(directory, base2)
logger.info('Renaming directory to: {0}.'.format(base2), 'ENCODER')
os.rename(os.path.join(directory, base), dir_name)
if 'NZBOP_SCRIPTDIR' in os.environ:
print('[NZB] DIRECTORY={0}'.format(dir_name))
for dirname, dirnames, _ in os.walk(dir_name, topdown=False):
for subdirname in dirnames:
encoded, subdirname2 = char_replace(subdirname)
if encoded:
logger.info('Renaming directory to: {0}.'.format(subdirname2), 'ENCODER')
os.rename(os.path.join(dirname, subdirname), os.path.join(dirname, subdirname2))
for dirname, _, filenames in os.walk(dir_name):
for filename in filenames:
encoded, filename2 = char_replace(filename)
if encoded:
logger.info('Renaming file to: {0}.'.format(filename2), 'ENCODER')
os.rename(os.path.join(dirname, filename), os.path.join(dirname, filename2))
return input_name, dir_name