reverse the lookup order to correct encoding issues
This commit is contained in:
parent
2c8bc1180d
commit
b5bf1125db
|
@ -17,6 +17,7 @@ from __future__ import division
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from builtins import bytes, chr
|
||||||
from .constants import CODEPAGE_CHANGE
|
from .constants import CODEPAGE_CHANGE
|
||||||
from .exceptions import CharCodeError, Error
|
from .exceptions import CharCodeError, Error
|
||||||
from .capabilities import get_profile
|
from .capabilities import get_profile
|
||||||
|
@ -66,17 +67,22 @@ class Encoder(object):
|
||||||
return encoding
|
return encoding
|
||||||
|
|
||||||
def can_encode(self, encoding, char):
|
def can_encode(self, encoding, char):
|
||||||
try:
|
# Compute the encodable characters in the upper half of this code page
|
||||||
encoded = CodePages.encode(char, encoding)
|
encodable_chars = [u" "] * 128
|
||||||
assert type(encoded) is bytes
|
for i in range(0, 128):
|
||||||
return encoded
|
codepoint = i + 128
|
||||||
except LookupError:
|
try:
|
||||||
# We don't have this encoding
|
encodable_chars[i] = bytes([codepoint]).decode(encoding)
|
||||||
return False
|
except UnicodeDecodeError:
|
||||||
except UnicodeEncodeError:
|
# Non-encodable character
|
||||||
return False
|
pass
|
||||||
|
except LookupError:
|
||||||
|
# We don't have this encoding
|
||||||
|
return False
|
||||||
|
|
||||||
return True
|
# Decide whether this character is encodeable in this code page
|
||||||
|
is_ascii = ord(char) < 128
|
||||||
|
return is_ascii or char in encodable_chars
|
||||||
|
|
||||||
def __encoding_sort_func(self, item):
|
def __encoding_sort_func(self, item):
|
||||||
key, index = item
|
key, index = item
|
||||||
|
|
Loading…
Reference in New Issue