Spaces:
Running
Running
""" Standard "encodings" Package | |
Standard Python encoding modules are stored in this package | |
directory. | |
Codec modules must have names corresponding to normalized encoding | |
names as defined in the normalize_encoding() function below, e.g. | |
'utf-8' must be implemented by the module 'utf_8.py'. | |
Each codec module must export the following interface: | |
* getregentry() -> codecs.CodecInfo object | |
The getregentry() API must return a CodecInfo object with encoder, decoder, | |
incrementalencoder, incrementaldecoder, streamwriter and streamreader | |
attributes which adhere to the Python Codec Interface Standard. | |
In addition, a module may optionally also define the following | |
APIs which are then used by the package's codec search function: | |
* getaliases() -> sequence of encoding name strings to use as aliases | |
Alias names returned by getaliases() must be normalized encoding | |
names as defined by normalize_encoding(). | |
Written by Marc-Andre Lemburg ([email protected]). | |
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. | |
"""#" | |
import codecs | |
import sys | |
from . import aliases | |
_cache = {} | |
_unknown = '--unknown--' | |
_import_tail = ['*'] | |
_aliases = aliases.aliases | |
class CodecRegistryError(LookupError, SystemError): | |
pass | |
def normalize_encoding(encoding): | |
""" Normalize an encoding name. | |
Normalization works as follows: all non-alphanumeric | |
characters except the dot used for Python package names are | |
collapsed and replaced with a single underscore, e.g. ' -;#' | |
becomes '_'. Leading and trailing underscores are removed. | |
Note that encoding names should be ASCII only. | |
""" | |
if isinstance(encoding, bytes): | |
encoding = str(encoding, "ascii") | |
chars = [] | |
punct = False | |
for c in encoding: | |
if c.isalnum() or c == '.': | |
if punct and chars: | |
chars.append('_') | |
if c.isascii(): | |
chars.append(c) | |
punct = False | |
else: | |
punct = True | |
return ''.join(chars) | |
def search_function(encoding): | |
# Cache lookup | |
entry = _cache.get(encoding, _unknown) | |
if entry is not _unknown: | |
return entry | |
# Import the module: | |
# | |
# First try to find an alias for the normalized encoding | |
# name and lookup the module using the aliased name, then try to | |
# lookup the module using the standard import scheme, i.e. first | |
# try in the encodings package, then at top-level. | |
# | |
norm_encoding = normalize_encoding(encoding) | |
aliased_encoding = _aliases.get(norm_encoding) or \ | |
_aliases.get(norm_encoding.replace('.', '_')) | |
if aliased_encoding is not None: | |
modnames = [aliased_encoding, | |
norm_encoding] | |
else: | |
modnames = [norm_encoding] | |
for modname in modnames: | |
if not modname or '.' in modname: | |
continue | |
try: | |
# Import is absolute to prevent the possibly malicious import of a | |
# module with side-effects that is not in the 'encodings' package. | |
mod = __import__('encodings.' + modname, fromlist=_import_tail, | |
level=0) | |
except ImportError: | |
# ImportError may occur because 'encodings.(modname)' does not exist, | |
# or because it imports a name that does not exist (see mbcs and oem) | |
pass | |
else: | |
break | |
else: | |
mod = None | |
try: | |
getregentry = mod.getregentry | |
except AttributeError: | |
# Not a codec module | |
mod = None | |
if mod is None: | |
# Cache misses | |
_cache[encoding] = None | |
return None | |
# Now ask the module for the registry entry | |
entry = getregentry() | |
if not isinstance(entry, codecs.CodecInfo): | |
if not 4 <= len(entry) <= 7: | |
raise CodecRegistryError('module "%s" (%s) failed to register' | |
% (mod.__name__, mod.__file__)) | |
if not callable(entry[0]) or not callable(entry[1]) or \ | |
(entry[2] is not None and not callable(entry[2])) or \ | |
(entry[3] is not None and not callable(entry[3])) or \ | |
(len(entry) > 4 and entry[4] is not None and not callable(entry[4])) or \ | |
(len(entry) > 5 and entry[5] is not None and not callable(entry[5])): | |
raise CodecRegistryError('incompatible codecs in module "%s" (%s)' | |
% (mod.__name__, mod.__file__)) | |
if len(entry)<7 or entry[6] is None: | |
entry += (None,)*(6-len(entry)) + (mod.__name__.split(".", 1)[1],) | |
entry = codecs.CodecInfo(*entry) | |
# Cache the codec registry entry | |
_cache[encoding] = entry | |
# Register its aliases (without overwriting previously registered | |
# aliases) | |
try: | |
codecaliases = mod.getaliases() | |
except AttributeError: | |
pass | |
else: | |
for alias in codecaliases: | |
if alias not in _aliases: | |
_aliases[alias] = modname | |
# Return the registry entry | |
return entry | |
# Register the search_function in the Python codec registry | |
codecs.register(search_function) | |
if sys.platform == 'win32': | |
def _alias_mbcs(encoding): | |
try: | |
import _winapi | |
ansi_code_page = "cp%s" % _winapi.GetACP() | |
if encoding == ansi_code_page: | |
import encodings.mbcs | |
return encodings.mbcs.getregentry() | |
except ImportError: | |
# Imports may fail while we are shutting down | |
pass | |
codecs.register(_alias_mbcs) | |