From 9a65945fcd0c7a2d7a2a81f2212c0e138623640f Mon Sep 17 00:00:00 2001
From: Michael Billington <michael.billington@gmail.com>
Date: Sun, 11 Sep 2016 21:03:55 +1000
Subject: [PATCH] re-work encoder to consult the capabilities database and use
 custom code pages or python encoder as necessary

---
 src/escpos/capabilities.py |  2 +-
 src/escpos/codepages.py    | 18 +++---------
 src/escpos/magicencode.py  | 60 ++++++++++++++++++++++++++++----------
 test/test_magicencode.py   |  2 +-
 4 files changed, 50 insertions(+), 32 deletions(-)

diff --git a/src/escpos/capabilities.py b/src/escpos/capabilities.py
index e569ccc..4fa9664 100644
--- a/src/escpos/capabilities.py
+++ b/src/escpos/capabilities.py
@@ -58,7 +58,7 @@ class BaseProfile(object):
     def get_code_pages(self):
         """Return the support code pages as a {name: index} dict.
         """
-        return {v.lower(): k for k, v in self.codePages.items()}
+        return {v: k for k, v in self.codePages.items()}
 
 
 
diff --git a/src/escpos/codepages.py b/src/escpos/codepages.py
index 9666fb2..05d5852 100644
--- a/src/escpos/codepages.py
+++ b/src/escpos/codepages.py
@@ -12,21 +12,11 @@ class CodePageManager:
     def get_all(self):
         return self.data.values()
 
-    def encode(self, text, encoding, errors='strict'):
-        """Adds support for Japanese to the builtin str.encode().
-
-        TODO: Add support for custom code page data from
-        escpos-printer-db.
-        """
-        # Python has not have this builtin?
-        if encoding.upper() == 'KATAKANA':
-            return encode_katakana(text)
-
-        return text.encode(encoding, errors=errors)
+    def get_encoding_name(self, encoding):
+        # TODO resolve the encoding alias
+        return encoding.upper()
 
     def get_encoding(self, encoding):
-        # resolve the encoding alias
-        return encoding.lower()
-
+        return self.data[encoding]
 
 CodePages = CodePageManager(CAPABILITIES['encodings'])
\ No newline at end of file
diff --git a/src/escpos/magicencode.py b/src/escpos/magicencode.py
index 5ecaf6c..764b05f 100644
--- a/src/escpos/magicencode.py
+++ b/src/escpos/magicencode.py
@@ -52,14 +52,14 @@ class Encoder(object):
     def get_sequence(self, encoding):
         return int(self.codepages[encoding])
 
-    def get_encoding(self, encoding):
+    def get_encoding_name(self, encoding):
         """Given an encoding provided by the user, will return a
         canonical encoding name; and also validate that the encoding
         is supported.
 
         TODO: Support encoding aliases: pc437 instead of cp437.
         """
-        encoding = CodePages.get_encoding(encoding)
+        encoding = CodePages.get_encoding_name(encoding)
         if not encoding in self.codepages:
             raise ValueError((
                     'Encoding "{}" cannot be used for the current profile. '
@@ -72,19 +72,24 @@ class Encoder(object):
         
         Gets characters 128-255 for a given code page, as an array.
         
-        :param encoding: The name of the encoding. This must be a valid python encoding.
+        :param encoding: The name of the encoding. This must appear in the CodePage list
         """
-        # Compute the encodable characters as an array (this is the format
-        # that for non-standard codings come in)
-        encodable_chars = [u" "] * 128
-        for i in range(0, 128):
-            codepoint = i + 128
-            try:
-                encodable_chars[i] = bytes([codepoint]).decode(encoding)
-            except UnicodeDecodeError:
-                # Non-encodable character
-                pass
-        return encodable_chars
+        codepage = CodePages.get_encoding(encoding)
+        if 'data' in codepage:
+            encodable_chars = list("".join(codepage['data']))
+            assert(len(encodable_chars) == 128)
+            return encodable_chars
+        elif 'python_encode' in codepage:
+            encodable_chars = [u" "] * 128
+            for i in range(0, 128):
+                codepoint = i + 128
+                try:
+                    encodable_chars[i] = bytes([codepoint]).decode(codepage['python_encode'])
+                except UnicodeDecodeError:
+                    # Non-encodable character, just skip it
+                    pass
+            return encodable_chars
+        raise LookupError("Can't find a known encoding for {}".format(encoding))
 
     def _get_codepage_char_map(self, encoding):
         """ Get codepage character map
@@ -121,6 +126,29 @@ class Encoder(object):
         is_encodable = char in available_map
         return is_ascii or is_encodable
 
+    def _encode_char(self, char, charmap):
+        """ Encode a single character with the given encoding map
+        
+        :param char: char to encode
+        :param charmap: dictionary for mapping characters in this code page
+        """
+        if char in charmap:
+            return charmap[char]
+        if ord(char) < 128:
+            return ord(char)
+        return ord('?')
+
+    def encode(self, text, encoding, defaultchar='?'):
+        """ Encode text under the given encoding
+        
+        :param text: Text to encode
+        :param encoding: Encoding name to use (must be defined in capabilities)
+        :param defaultchar: Fallback for non-encodable characters
+        """
+        codepage_char_map = self.available_characters[encoding]
+        output_bytes = bytes([self._encode_char(char, codepage_char_map, defaultchar) for char in text])
+        return output_bytes
+
     def __encoding_sort_func(self, item):
         key, index = item
         return (
@@ -194,7 +222,7 @@ class MagicEncode(object):
         self.driver = driver
         self.encoder = encoder or Encoder(driver.profile.get_code_pages())
 
-        self.encoding = self.encoder.get_encoding(encoding) if encoding else None
+        self.encoding = self.encoder.get_encoding_name(encoding) if encoding else None
         self.defaultsymbol = defaultsymbol
         self.disabled = disabled
 
@@ -259,4 +287,4 @@ class MagicEncode(object):
                 six.int2byte(self.encoder.get_sequence(encoding)))
 
         if text:
-            self.driver._raw(CodePages.encode(text, encoding, errors="replace"))
+            self.driver._raw(self.encoder.encode(text, encoding))
diff --git a/test/test_magicencode.py b/test/test_magicencode.py
index d3d3121..665369e 100644
--- a/test/test_magicencode.py
+++ b/test/test_magicencode.py
@@ -36,7 +36,7 @@ class TestEncoder:
 
     @raises(ValueError)
     def test_get_encoding(self):
-        Encoder({}).get_encoding('latin1')
+        Encoder({}).get_encoding_name('latin1')
 
 
 class TestMagicEncode: