"Merge pull request #1 from mike42/miracle2k-text-encoding\n\nText encoding conversion to lookup tables"

2025-12-22 09:53:30 +00:00 · 2016-09-22 22:58:37 +02:00
parent a435b66006 7b68d97f5f
commit a3ca2c2a16
13 changed files with 116 additions and 49 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,7 @@
 $~
 .idea/
 .directory
+.cache/

 # temporary data
 temp
--- a/.gitmodules
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "capabilities-data"]
+	path = capabilities-data
+	url = https://github.com/receipt-print-hq/escpos-printer-db.git
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -45,8 +45,7 @@ The checks by Landscape and QuantifiedCode that run on every PR will provide you
 GIT
 ^^^
 The master-branch contains code that has been released to PyPi. A release is marked with a tag
-corresponding to the version. Issues are closed when they have been resolved in a released version
-of the package.
+corresponding to the version. Issues are closed when they have been resolved in the development-branch.

 When you have a change to make, begin by creating a new branch from the HEAD of `python-escpos/development`.
 Name your branch to indicate what you are trying to achieve. Good branch names might
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -3,6 +3,7 @@ include *.txt
 include COPYING
 include INSTALL
 include tox.ini
+include capabilities-data/dist/capabilities.json
 recursive-include doc *.bat
 recursive-include doc *.ico
 recursive-include doc *.py
--- a/1
+++ b/1
--- a/doc/user/methods.rst
+++ b/doc/user/methods.rst
@@ -84,8 +84,8 @@ text("text")

 Prints raw text. Raises ``TextError`` exception.

-set("align", "font", "type", width, height, invert, smooth, flip)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+set("align", "font", "text_type", width, height, invert, smooth, flip)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

 Set text properties.

@@ -96,7 +96,7 @@ Set text properties.
  * RIGHT > > *Default:* left
   
 * ``font`` type could be ``A`` or ``B``. *Default:* A
-* ``type`` type could be ``B`` (Bold), ``U`` (Underline) or ``normal``. *Default:* normal
+* ``text_type`` type could be ``B`` (Bold), ``U`` (Underline) or ``normal``. *Default:* normal
 * ``width`` is a numeric value, 1 is for regular size, and 2 is twice the standard size. *Default*: 1
 * ``height`` is a numeric value, 1 is for regular size and 2 is twice the standard size. *Default*: 1
 * ``invert`` is a boolean value, True enables white on black printing. *Default*: False
--- a/setup.py
+++ b/setup.py
@@ -72,7 +72,7 @@ setup(
    author='Manuel F Martinez and others',
    author_email='manpaz@bashlinux.com',
    maintainer='Patrick Kanzler',
-    maintainer_email='patrick.kanzler@fablab.fau.de',
+    maintainer_email='dev@pkanzler.de',
    keywords=[
        'ESC/POS',
        'thermoprinter',
@@ -113,6 +113,7 @@ setup(
        'pyyaml',
        'argparse',
        'argcomplete',
+        'future'
    ],
    setup_requires=[
        'setuptools_scm',
--- a/src/escpos/capabilities.json
+++ b/src/escpos/capabilities.json
--- a/src/escpos/capabilities.json
+++ b/src/escpos/capabilities.json
@@ -0,0 +1 @@
+../../capabilities-data/dist/capabilities.json
--- a/src/escpos/capabilities.py
+++ b/src/escpos/capabilities.py
@@ -58,7 +58,7 @@ class BaseProfile(object):
    def get_code_pages(self):
        """Return the support code pages as a {name: index} dict.
        """
-        return {v.lower(): k for k, v in self.codePages.items()}
+        return {v: k for k, v in self.codePages.items()}



--- a/src/escpos/codepages.py
+++ b/src/escpos/codepages.py
@@ -12,21 +12,11 @@ class CodePageManager:
    def get_all(self):
        return self.data.values()

-    def encode(self, text, encoding, errors='strict'):
-        """Adds support for Japanese to the builtin str.encode().
-
-        TODO: Add support for custom code page data from
-        escpos-printer-db.
-        """
-        # Python has not have this builtin?
-        if encoding.upper() == 'KATAKANA':
-            return encode_katakana(text)
-
-        return text.encode(encoding, errors=errors)
+    def get_encoding_name(self, encoding):
+        # TODO resolve the encoding alias
+        return encoding.upper()

    def get_encoding(self, encoding):
-        # resolve the encoding alias
-        return encoding.lower()
-
+        return self.data[encoding]

 CodePages = CodePageManager(CAPABILITIES['encodings'])
--- a/src/escpos/magicencode.py
+++ b/src/escpos/magicencode.py
@@ -17,6 +17,7 @@ from __future__ import division
 from __future__ import print_function
 from __future__ import unicode_literals

+from builtins import bytes
 from .constants import CODEPAGE_CHANGE
 from .exceptions import CharCodeError, Error
 from .capabilities import get_profile
@@ -45,19 +46,20 @@ class Encoder(object):
    def __init__(self, codepage_map):
        self.codepages = codepage_map
        self.available_encodings = set(codepage_map.keys())
+        self.available_characters = {}
        self.used_encodings = set()

    def get_sequence(self, encoding):
        return int(self.codepages[encoding])

-    def get_encoding(self, encoding):
+    def get_encoding_name(self, encoding):
        """Given an encoding provided by the user, will return a
        canonical encoding name; and also validate that the encoding
        is supported.

        TODO: Support encoding aliases: pc437 instead of cp437.
        """
-        encoding = CodePages.get_encoding(encoding)
+        encoding = CodePages.get_encoding_name(encoding)
        if not encoding in self.codepages:
            raise ValueError((
                    'Encoding "{}" cannot be used for the current profile. '
@@ -65,18 +67,87 @@ class Encoder(object):
                ).format(encoding, ','.join(self.codepages.keys())))
        return encoding

-    def can_encode(self, encoding, char):
+    def _get_codepage_char_list(self, encoding):
+        """Get codepage character list
+        
+        Gets characters 128-255 for a given code page, as an array.
+        
+        :param encoding: The name of the encoding. This must appear in the CodePage list
+        """
+        codepage = CodePages.get_encoding(encoding)
+        if 'data' in codepage:
+            encodable_chars = list("".join(codepage['data']))
+            assert(len(encodable_chars) == 128)
+            return encodable_chars
+        elif 'python_encode' in codepage:
+            encodable_chars = [u" "] * 128
+            for i in range(0, 128):
+                codepoint = i + 128
                try:
-            encoded = CodePages.encode(char, encoding)
-            assert type(encoded) is bytes
-            return encoded
+                    encodable_chars[i] = bytes([codepoint]).decode(codepage['python_encode'])
+                except UnicodeDecodeError:
+                    # Non-encodable character, just skip it
+                    pass
+            return encodable_chars
+        raise LookupError("Can't find a known encoding for {}".format(encoding))
+
+    def _get_codepage_char_map(self, encoding):
+        """ Get codepage character map
+        
+        Process an encoding and return a map of UTF-characters to code points
+        in this encoding.
+        
+        This is generated once only, and returned from a cache.
+        
+        :param encoding: The name of the encoding.
+        """
+        # Skip things that were loaded previously
+        if encoding in self.available_characters:
+            return self.available_characters[encoding]
+        codepage_char_list = self._get_codepage_char_list(encoding)
+        codepage_char_map = dict((utf8, i + 128) for (i, utf8) in enumerate(codepage_char_list))
+        self.available_characters[encoding] = codepage_char_map
+        return codepage_char_map
+
+    def can_encode(self, encoding, char):
+        """Determine if a character is encodeable in the given code page.
+        
+        :param encoding: The name of the encoding.
+        :param char: The character to attempt to encode.
+        """
+        available_map = {}
+        try:
+            available_map = self._get_codepage_char_map(encoding)
        except LookupError:
-            # We don't have this encoding
-            return False
-        except UnicodeEncodeError:
            return False

-        return True
+        # Decide whether this character is encodeable in this code page
+        is_ascii = ord(char) < 128
+        is_encodable = char in available_map
+        return is_ascii or is_encodable
+
+    def _encode_char(self, char, charmap, defaultchar):
+        """ Encode a single character with the given encoding map
+        
+        :param char: char to encode
+        :param charmap: dictionary for mapping characters in this code page
+        """
+        if ord(char) < 128:
+            return ord(char)
+        if char in charmap:
+            return charmap[char]
+        return ord(defaultchar)
+
+    def encode(self, text, encoding, defaultchar='?'):
+        """ Encode text under the given encoding
+        
+        :param text: Text to encode
+        :param encoding: Encoding name to use (must be defined in capabilities)
+        :param defaultchar: Fallback for non-encodable characters
+        """
+        codepage_char_map = self._get_codepage_char_map(encoding)
+        output_bytes = bytes([self._encode_char(char, codepage_char_map, defaultchar) for char in text])
+        return output_bytes

    def __encoding_sort_func(self, item):
        key, index = item
@@ -151,7 +222,7 @@ class MagicEncode(object):
        self.driver = driver
        self.encoder = encoder or Encoder(driver.profile.get_code_pages())

-        self.encoding = self.encoder.get_encoding(encoding) if encoding else None
+        self.encoding = self.encoder.get_encoding_name(encoding) if encoding else None
        self.defaultsymbol = defaultsymbol
        self.disabled = disabled

@@ -216,4 +287,4 @@ class MagicEncode(object):
                six.int2byte(self.encoder.get_sequence(encoding)))

        if text:
-            self.driver._raw(CodePages.encode(text, encoding, errors="replace"))
+            self.driver._raw(self.encoder.encode(text, encoding))
--- a/test/test_function_text.py
+++ b/test/test_function_text.py
@@ -20,7 +20,7 @@ from escpos.printer import Dummy


 def get_printer():
-    return Dummy(magic_encode_args={'disabled': True, 'encoding': 'cp437'})
+    return Dummy(magic_encode_args={'disabled': True, 'encoding': 'CP437'})


@given(text=st.text())
--- a/test/test_magicencode.py
+++ b/test/test_magicencode.py
@@ -26,17 +26,17 @@ from escpos.exceptions import CharCodeError, Error
 class TestEncoder:

    def test_can_encode(self):
-        assert not Encoder({'cp437': 1}).can_encode('cp437', u'€')
-        assert Encoder({'cp437': 1}).can_encode('cp437', u'á')
+        assert not Encoder({'CP437': 1}).can_encode('CP437', u'€')
+        assert Encoder({'CP437': 1}).can_encode('CP437', u'á')
        assert not Encoder({'foobar': 1}).can_encode('foobar', 'a')

    def test_find_suitable_encoding(self):
-        assert not Encoder({'cp437': 1}).find_suitable_encoding(u'€')
-        assert Encoder({'cp858': 1}).find_suitable_encoding(u'€') == 'cp858'
+        assert not Encoder({'CP437': 1}).find_suitable_encoding(u'€')
+        assert Encoder({'CP858': 1}).find_suitable_encoding(u'€') == 'CP858'

    @raises(ValueError)
    def test_get_encoding(self):
-        Encoder({}).get_encoding('latin1')
+        Encoder({}).get_encoding_name('latin1')


 class TestMagicEncode:
@@ -51,17 +51,17 @@ class TestMagicEncode:

        def test_init_from_none(self, driver):
            encode = MagicEncode(driver, encoding=None)
-            encode.write_with_encoding('cp858', '€ ist teuro.')
+            encode.write_with_encoding('CP858', '€ ist teuro.')
            assert driver.output == b'\x1bt\x13\xd5 ist teuro.'

        def test_change_from_another(self, driver):
-            encode = MagicEncode(driver, encoding='cp437')
-            encode.write_with_encoding('cp858', '€ ist teuro.')
+            encode = MagicEncode(driver, encoding='CP437')
+            encode.write_with_encoding('CP858', '€ ist teuro.')
            assert driver.output == b'\x1bt\x13\xd5 ist teuro.'

        def test_no_change(self, driver):
-            encode = MagicEncode(driver, encoding='cp858')
-            encode.write_with_encoding('cp858', '€ ist teuro.')
+            encode = MagicEncode(driver, encoding='CP858')
+            encode.write_with_encoding('CP858', '€ ist teuro.')
            assert driver.output == b'\xd5 ist teuro.'

    class TestWrite:
@@ -72,14 +72,14 @@ class TestMagicEncode:
            assert driver.output == b'\x1bt\x0f\xa4 ist teuro.'

        def test_write_disabled(self, driver):
-            encode = MagicEncode(driver, encoding='cp437', disabled=True)
+            encode = MagicEncode(driver, encoding='CP437', disabled=True)
            encode.write('€ ist teuro.')
            assert driver.output == b'? ist teuro.'

        def test_write_no_codepage(self, driver):
            encode = MagicEncode(
-                driver, defaultsymbol="_", encoder=Encoder({'cp437': 1}),
-                encoding='cp437')
+                driver, defaultsymbol="_", encoder=Encoder({'CP437': 1}),
+                encoding='CP437')
            encode.write(u'€ ist teuro.')
            assert driver.output == b'_ ist teuro.'

@@ -87,7 +87,7 @@ class TestMagicEncode:

        def test(self, driver):
            encode = MagicEncode(driver)
-            encode.force_encoding('cp437')
+            encode.force_encoding('CP437')
            assert driver.output == b'\x1bt\x00'

            encode.write('€ ist teuro.')
				`@@ -0,0 +1 @@`
				`../../capabilities-data/dist/capabilities.json`