Spaces:
Running
Running
File size: 5,620 Bytes
1d777c4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
""" Standard "encodings" Package
Standard Python encoding modules are stored in this package
directory.
Codec modules must have names corresponding to normalized encoding
names as defined in the normalize_encoding() function below, e.g.
'utf-8' must be implemented by the module 'utf_8.py'.
Each codec module must export the following interface:
* getregentry() -> codecs.CodecInfo object
The getregentry() API must return a CodecInfo object with encoder, decoder,
incrementalencoder, incrementaldecoder, streamwriter and streamreader
attributes which adhere to the Python Codec Interface Standard.
In addition, a module may optionally also define the following
APIs which are then used by the package's codec search function:
* getaliases() -> sequence of encoding name strings to use as aliases
Alias names returned by getaliases() must be normalized encoding
names as defined by normalize_encoding().
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
import codecs
import sys
from . import aliases
_cache = {}
_unknown = '--unknown--'
_import_tail = ['*']
_aliases = aliases.aliases
class CodecRegistryError(LookupError, SystemError):
pass
def normalize_encoding(encoding):
""" Normalize an encoding name.
Normalization works as follows: all non-alphanumeric
characters except the dot used for Python package names are
collapsed and replaced with a single underscore, e.g. ' -;#'
becomes '_'. Leading and trailing underscores are removed.
Note that encoding names should be ASCII only.
"""
if isinstance(encoding, bytes):
encoding = str(encoding, "ascii")
chars = []
punct = False
for c in encoding:
if c.isalnum() or c == '.':
if punct and chars:
chars.append('_')
if c.isascii():
chars.append(c)
punct = False
else:
punct = True
return ''.join(chars)
def search_function(encoding):
# Cache lookup
entry = _cache.get(encoding, _unknown)
if entry is not _unknown:
return entry
# Import the module:
#
# First try to find an alias for the normalized encoding
# name and lookup the module using the aliased name, then try to
# lookup the module using the standard import scheme, i.e. first
# try in the encodings package, then at top-level.
#
norm_encoding = normalize_encoding(encoding)
aliased_encoding = _aliases.get(norm_encoding) or \
_aliases.get(norm_encoding.replace('.', '_'))
if aliased_encoding is not None:
modnames = [aliased_encoding,
norm_encoding]
else:
modnames = [norm_encoding]
for modname in modnames:
if not modname or '.' in modname:
continue
try:
# Import is absolute to prevent the possibly malicious import of a
# module with side-effects that is not in the 'encodings' package.
mod = __import__('encodings.' + modname, fromlist=_import_tail,
level=0)
except ImportError:
# ImportError may occur because 'encodings.(modname)' does not exist,
# or because it imports a name that does not exist (see mbcs and oem)
pass
else:
break
else:
mod = None
try:
getregentry = mod.getregentry
except AttributeError:
# Not a codec module
mod = None
if mod is None:
# Cache misses
_cache[encoding] = None
return None
# Now ask the module for the registry entry
entry = getregentry()
if not isinstance(entry, codecs.CodecInfo):
if not 4 <= len(entry) <= 7:
raise CodecRegistryError('module "%s" (%s) failed to register'
% (mod.__name__, mod.__file__))
if not callable(entry[0]) or not callable(entry[1]) or \
(entry[2] is not None and not callable(entry[2])) or \
(entry[3] is not None and not callable(entry[3])) or \
(len(entry) > 4 and entry[4] is not None and not callable(entry[4])) or \
(len(entry) > 5 and entry[5] is not None and not callable(entry[5])):
raise CodecRegistryError('incompatible codecs in module "%s" (%s)'
% (mod.__name__, mod.__file__))
if len(entry)<7 or entry[6] is None:
entry += (None,)*(6-len(entry)) + (mod.__name__.split(".", 1)[1],)
entry = codecs.CodecInfo(*entry)
# Cache the codec registry entry
_cache[encoding] = entry
# Register its aliases (without overwriting previously registered
# aliases)
try:
codecaliases = mod.getaliases()
except AttributeError:
pass
else:
for alias in codecaliases:
if alias not in _aliases:
_aliases[alias] = modname
# Return the registry entry
return entry
# Register the search_function in the Python codec registry
codecs.register(search_function)
if sys.platform == 'win32':
def _alias_mbcs(encoding):
try:
import _winapi
ansi_code_page = "cp%s" % _winapi.GetACP()
if encoding == ansi_code_page:
import encodings.mbcs
return encodings.mbcs.getregentry()
except ImportError:
# Imports may fail while we are shutting down
pass
codecs.register(_alias_mbcs)
|