Improve codepage selection logic.

This commit is contained in:
Michael Elsdörfer 2016-08-30 17:13:05 +02:00
parent 2f89f3fe3a
commit 9aa1335fd2
1 changed files with 14 additions and 11 deletions

View File

@ -55,7 +55,7 @@ class Encoder(object):
canonical encoding name; and also validate that the encoding canonical encoding name; and also validate that the encoding
is supported. is supported.
TOOD: Support encoding aliases. TODO: Support encoding aliases: pc437 instead of cp437.
""" """
encoding = CodePages.get_encoding(encoding) encoding = CodePages.get_encoding(encoding)
if not encoding in self.codepages: if not encoding in self.codepages:
@ -78,6 +78,14 @@ class Encoder(object):
return True return True
def __encoding_sort_func(self, item):
key, index = item
return (
key in self.used_encodings,
index
)
def find_suitable_encoding(self, char): def find_suitable_encoding(self, char):
"""The order of our search is a specific one: """The order of our search is a specific one:
@ -93,17 +101,12 @@ class Encoder(object):
is missing or incomplete, we might increase our change is missing or incomplete, we might increase our change
that the code page we pick for this character is actually that the code page we pick for this character is actually
supported. supported.
"""
sorted_encodings = sorted(
self.codepages.items(),
key=self.__encoding_sort_func)
# TODO actually do speed up the search for encoding, _ in sorted_encodings:
"""
"""
- remove the ones not supported
- order by used first, then others
- do not use a cache, because encode already is so fast
"""
sorted_encodings = self.codepages.keys()
for encoding in sorted_encodings:
if self.can_encode(encoding, char): if self.can_encode(encoding, char):
# This encoding worked; at it to the set of used ones. # This encoding worked; at it to the set of used ones.
self.used_encodings.add(encoding) self.used_encodings.add(encoding)