efficiency improvements for backwards encoding

2025-12-02 09:43:30 +00:00 · 2016-09-11 20:21:30 +10:00
parent b5bf1125db
commit d9a6960f07
1 changed files with 44 additions and 7 deletions
--- a/src/escpos/magicencode.py
+++ b/src/escpos/magicencode.py
@@ -17,7 +17,7 @@ from __future__ import division
 from __future__ import print_function
 from __future__ import unicode_literals

-from builtins import bytes, chr
+from builtins import bytes
 from .constants import CODEPAGE_CHANGE
 from .exceptions import CharCodeError, Error
 from .capabilities import get_profile
@@ -46,6 +46,7 @@ class Encoder(object):
    def __init__(self, codepage_map):
        self.codepages = codepage_map
        self.available_encodings = set(codepage_map.keys())
+        self.available_characters = {}
        self.used_encodings = set()

    def get_sequence(self, encoding):
@@ -66,8 +67,15 @@ class Encoder(object):
                ).format(encoding, ','.join(self.codepages.keys())))
        return encoding

-    def can_encode(self, encoding, char):
-        # Compute the encodable characters in the upper half of this code page
+    def _get_codepage_char_list(self, encoding):
+        """Get codepage character list
+        
+        Gets characters 128-255 for a given code page, as an array.
+        
+        :param encoding: The name of the encoding. This must be a valid python encoding.
+        """
+        # Compute the encodable characters as an array (this is the format
+        # that for non-standard codings come in)
        encodable_chars = [u" "] * 128
        for i in range(0, 128):
            codepoint = i + 128
@@ -76,13 +84,42 @@ class Encoder(object):
            except UnicodeDecodeError:
                # Non-encodable character
                pass
+        return encodable_chars
+
+    def _get_codepage_char_map(self, encoding):
+        """ Get codepage character map
+        
+        Process an encoding and return a map of UTF-characters to code points
+        in this encoding.
+        
+        This is generated once only, and returned from a cache.
+        
+        :param encoding: The name of the encoding.
+        """
+        # Skip things that were loaded previously
+        if encoding in self.available_characters:
+            return self.available_characters[encoding]
+        codepage_char_list = self._get_codepage_char_list(encoding)
+        codepage_char_map = dict((utf8, i + 128) for (i, utf8) in enumerate(codepage_char_list))
+        self.available_characters[encoding] = codepage_char_map
+        return codepage_char_map
+
+    def can_encode(self, encoding, char):
+        """Determine if a character is encodeable in the given code page.
+        
+        :param encoding: The name of the encoding.
+        :param char: The character to attempt to encode.
+        """
+        available_map = {}
+        try:
+            available_map = self._get_codepage_char_map(encoding)
        except LookupError:
-                # We don't have this encoding
            return False

        # Decide whether this character is encodeable in this code page
        is_ascii = ord(char) < 128
-        return is_ascii or char in encodable_chars
+        is_encodable = char in available_map
+        return is_ascii or is_encodable

    def __encoding_sort_func(self, item):
        key, index = item