reverse the lookup order to correct encoding issues

This commit is contained in:
Michael Billington 2016-09-11 17:17:22 +10:00
parent 2c8bc1180d
commit b5bf1125db
1 changed files with 16 additions and 10 deletions

View File

@ -17,6 +17,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
from __future__ import unicode_literals from __future__ import unicode_literals
from builtins import bytes, chr
from .constants import CODEPAGE_CHANGE from .constants import CODEPAGE_CHANGE
from .exceptions import CharCodeError, Error from .exceptions import CharCodeError, Error
from .capabilities import get_profile from .capabilities import get_profile
@ -66,17 +67,22 @@ class Encoder(object):
return encoding return encoding
def can_encode(self, encoding, char): def can_encode(self, encoding, char):
# Compute the encodable characters in the upper half of this code page
encodable_chars = [u" "] * 128
for i in range(0, 128):
codepoint = i + 128
try: try:
encoded = CodePages.encode(char, encoding) encodable_chars[i] = bytes([codepoint]).decode(encoding)
assert type(encoded) is bytes except UnicodeDecodeError:
return encoded # Non-encodable character
pass
except LookupError: except LookupError:
# We don't have this encoding # We don't have this encoding
return False return False
except UnicodeEncodeError:
return False
return True # Decide whether this character is encodeable in this code page
is_ascii = ord(char) < 128
return is_ascii or char in encodable_chars
def __encoding_sort_func(self, item): def __encoding_sort_func(self, item):
key, index = item key, index = item