add automatic codepage-changing

This code is adapted from the works by Frédéric Van der Essen in pyxmlescpos. I had to adapt the code completely in order to make it compatible with modern unicode-handling Further changes: * improve text unittests in CLI and MagicEncode with hypothesis * add feature force_encoding in order to enable old behaviour * disable cli_text_test (for now) * fix charcode(): it does now cooperate with the new structure * remove redundant variable codepage from class Escpos
2025-12-02 09:43:30 +00:00 · 2016-07-23 22:16:11 +02:00
parent b0af9e9652
commit 0cfedb5706
6 changed files with 357 additions and 141 deletions
--- a/src/escpos/escpos.py
+++ b/src/escpos/escpos.py
@@ -20,6 +20,7 @@ import textwrap

 from .constants import *
 from .exceptions import *
+from .magicencode import MagicEncode

 from abc import ABCMeta, abstractmethod  # abstract base class support
 from escpos.image import EscposImage
@@ -33,13 +34,13 @@ class Escpos(object):
    class.
    """
    device = None
-    codepage = None

-    def __init__(self, columns=32):
+    def __init__(self, columns=32, **kwargs):
        """ Initialize ESCPOS Printer

        :param columns: Text columns used by the printer. Defaults to 32."""
        self.columns = columns
+        self.magic = MagicEncode(**kwargs)

    def __del__(self):
        """ call self.close upon deletion """
@@ -203,82 +204,21 @@ class Escpos(object):
            inp_number //= 256
        return outp

-    def charcode(self, code):
+    def charcode(self, code="AUTO"):
        """ Set Character Code Table

-        Sends the control sequence from :py:mod:`escpos.constants` to the printer
-        with :py:meth:`escpos.printer.'implementation'._raw()`.
+        Sets the control sequence from ``CHARCODE`` in :py:mod:`escpos.constants` as active. It will be sent with
+        the next text sequence. If you set the variable code to ``AUTO`` it will try to automatically guess the
+        right codepage. (This is the standard behaviour.)

        :param code: Name of CharCode
        :raises: :py:exc:`~escpos.exceptions.CharCodeError`
        """
-        # TODO improve this (rather unhandy code)
-        # TODO check the codepages
-        if code.upper() == "USA":
-            self._raw(CHARCODE_PC437)
-            self.codepage = 'cp437'
-        elif code.upper() == "JIS":
-            self._raw(CHARCODE_JIS)
-            self.codepage = 'cp932'
-        elif code.upper() == "MULTILINGUAL":
-            self._raw(CHARCODE_PC850)
-            self.codepage = 'cp850'
-        elif code.upper() == "PORTUGUESE":
-            self._raw(CHARCODE_PC860)
-            self.codepage = 'cp860'
-        elif code.upper() == "CA_FRENCH":
-            self._raw(CHARCODE_PC863)
-            self.codepage = 'cp863'
-        elif code.upper() == "NORDIC":
-            self._raw(CHARCODE_PC865)
-            self.codepage = 'cp865'
-        elif code.upper() == "WEST_EUROPE":
-            self._raw(CHARCODE_WEU)
-            self.codepage = 'latin_1'
-        elif code.upper() == "GREEK":
-            self._raw(CHARCODE_GREEK)
-            self.codepage = 'cp737'
-        elif code.upper() == "HEBREW":
-            self._raw(CHARCODE_HEBREW)
-            self.codepage = 'cp862'
-        # elif code.upper() == "LATVIAN":  # this is not listed in the constants
-        #    self._raw(CHARCODE_PC755)
-        #    self.codepage = 'cp'
-        elif code.upper() == "WPC1252":
-            self._raw(CHARCODE_PC1252)
-            self.codepage = 'cp1252'
-        elif code.upper() == "CIRILLIC2":
-            self._raw(CHARCODE_PC866)
-            self.codepage = 'cp866'
-        elif code.upper() == "LATIN2":
-            self._raw(CHARCODE_PC852)
-            self.codepage = 'cp852'
-        elif code.upper() == "EURO":
-            self._raw(CHARCODE_PC858)
-            self.codepage = 'cp858'
-        elif code.upper() == "THAI42":
-            self._raw(CHARCODE_THAI42)
-            self.codepage = 'cp874'
-        elif code.upper() == "THAI11":
-            self._raw(CHARCODE_THAI11)
-            self.codepage = 'cp874'
-        elif code.upper() == "THAI13":
-            self._raw(CHARCODE_THAI13)
-            self.codepage = 'cp874'
-        elif code.upper() == "THAI14":
-            self._raw(CHARCODE_THAI14)
-            self.codepage = 'cp874'
-        elif code.upper() == "THAI16":
-            self._raw(CHARCODE_THAI16)
-            self.codepage = 'cp874'
-        elif code.upper() == "THAI17":
-            self._raw(CHARCODE_THAI17)
-            self.codepage = 'cp874'
-        elif code.upper() == "THAI18":
-            self._raw(CHARCODE_THAI18)
-            self.codepage = 'cp874'
+        if code.upper() == "AUTO":
+            self.magic.force_encoding = False
        else:
-            raise CharCodeError()
+            self.magic.encoding = self.magic.codepage_sequence(code)
+            self.magic.force_encoding = True

    def barcode(self, code, bc, height=64, width=3, pos="BELOW", font="A", align_ct=True, function_type="A"):
        """ Print Barcode
@@ -418,14 +358,8 @@ class Escpos(object):
        :param txt: text to be printed
        :raises: :py:exc:`~escpos.exceptions.TextError`
        """
-        if txt:
-            if self.codepage:
-                self._raw(txt.encode(self.codepage))
-            else:
-                self._raw(txt.encode())
-        else:
-            # TODO: why is it problematic to print an empty string?
-            raise TextError()
+        txt = six.text_type(txt)
+        self._raw(self.magic.encode_text(txt=txt))

    def block_text(self, txt, columns=None):
        """ Text is printed wrapped to specified columns
--- a/src/escpos/magicencode.py
+++ b/src/escpos/magicencode.py
@@ -0,0 +1,252 @@
+#!/usr/bin/python
+#  -*- coding: utf-8 -*-
+""" Magic Encode
+
+This module tries to convert an UTF-8 string to an encoded string for the printer.
+It uses trial and error in order to guess the right codepage.
+The code is based on the encoding-code in py-xml-escpos by @fvdsn.
+
+:author: `Patrick Kanzler <dev@pkanzler.de>`_
+:organization: `python-escpos <https://github.com/python-escpos>`_
+:copyright: Copyright (c) 2016 Patrick Kanzler and Frédéric van der Essen
+:license: GNU GPL v3
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from .constants import CHARCODE
+from .exceptions import CharCodeError, Error
+import copy
+import six
+
+try:
+    import jcconv
+except ImportError:
+    jcconv = None
+
+class MagicEncode(object):
+    """ Magic Encode Class
+
+    It tries to automatically encode utf-8 input into the right coding. When encoding is impossible a configurable
+    symbol will be inserted.
+    """
+    def __init__(self, startencoding='PC437', force_encoding=False, defaultsymbol=b'', defaultencoding='PC437'):
+        # running these functions makes sure that the encoding is suitable
+        MagicEncode.codepage_name(startencoding)
+        MagicEncode.codepage_name(defaultencoding)
+
+        self.encoding = startencoding
+        self.defaultsymbol = defaultsymbol
+        if type(self.defaultsymbol) is not six.binary_type:
+            raise Error("The supplied symbol {sym} has to be a binary string".format(sym=defaultsymbol))
+        self.defaultencoding = defaultencoding
+        self.force_encoding = force_encoding
+
+    def set_encoding(self, encoding='PC437', force_encoding=False):
+        """sets an encoding (normally not used)
+
+        This function should normally not be used since it manipulates the automagic behaviour. However, if you want to
+        force a certain codepage, then you can use this function.
+
+        :param encoding: must be a valid encoding from CHARCODE
+        :param force_encoding: whether the encoding should not be changed automatically
+        """
+        self.codepage_name(encoding)
+        self.encoding = encoding
+        self.force_encoding = force_encoding
+
+    @staticmethod
+    def codepage_sequence(codepage):
+        """returns the corresponding codepage-sequence"""
+        try:
+            return CHARCODE[codepage][0]
+        except KeyError:
+            raise CharCodeError("The encoding {enc} is unknown.".format(enc=codepage))
+
+    @staticmethod
+    def codepage_name(codepage):
+        """returns the corresponding codepage-name (for python)"""
+        try:
+            name = CHARCODE[codepage][1]
+            if name == '':
+                raise CharCodeError("The codepage {enc} does not have a connected python-codepage".format(enc=codepage))
+            return name
+        except KeyError:
+            raise CharCodeError("The encoding {enc} is unknown.".format(enc=codepage))
+
+    def encode_char(self, char):
+        """
+        Encodes a single unicode character into a sequence of
+        esc-pos code page change instructions and character declarations
+        """
+        if type(char) is not six.text_type:
+            raise Error("The supplied text has to be unicode, but is of type {type}.".format(
+                type=type(char)
+            ))
+        encoded = b''
+        encoding = self.encoding  # we reuse the last encoding to prevent code page switches at every character
+        remaining = copy.copy(CHARCODE)
+
+        while True:  # Trying all encoding until one succeeds
+            try:
+                if encoding == 'KATAKANA':  # Japanese characters
+                    if jcconv:
+                        # try to convert japanese text to half-katakanas
+                        kata = jcconv.kata2half(jcconv.hira2kata(char))
+                        if kata != char:
+                            self.extra_chars += len(kata) - 1
+                            # the conversion may result in multiple characters
+                            return self.encode_str(kata)
+                    else:
+                        kata = char
+
+                    if kata in TXT_ENC_KATAKANA_MAP:
+                        encoded = TXT_ENC_KATAKANA_MAP[kata]
+                        break
+                    else:
+                        raise ValueError()
+                else:
+                    try:
+                        enc_name = MagicEncode.codepage_name(encoding)
+                        encoded = char.encode(enc_name)
+                        assert type(encoded) is bytes
+                    except LookupError:
+                        raise ValueError("The encoding {enc} seems to not exist in Python".format(enc=encoding))
+                    except CharCodeError:
+                        raise ValueError("The encoding {enc} is not fully configured in constants".format(
+                            enc=encoding
+                        ))
+                    break
+
+            except ValueError:  # the encoding failed, select another one and retry
+                if encoding in remaining:
+                    del remaining[encoding]
+                if len(remaining) >= 1:
+                    encoding = list(remaining)[0]
+                else:
+                    encoding = self.defaultencoding
+                    encoded = self.defaultsymbol  # could not encode, output error character
+                    break
+
+        if encoding != self.encoding:
+            # if the encoding changed, remember it and prefix the character with
+            # the esc-pos encoding change sequence
+            self.encoding = encoding
+            encoded = CHARCODE[encoding][0] + encoded
+
+        return encoded
+
+    def encode_str(self, txt):
+        # make sure the right codepage is set in the printer
+        buffer = self.codepage_sequence(self.encoding)
+        if self.force_encoding:
+            buffer += txt.encode(self.codepage)
+        else:
+            for c in txt:
+                buffer += self.encode_char(c)
+        return buffer
+
+    def encode_text(self, txt):
+        """returns a byte-string with encoded text
+
+        :param txt: text that shall be encoded
+        :return: byte-string for the printer
+        """
+        if not txt:
+            return
+
+        self.extra_chars = 0
+
+        txt = self.encode_str(txt)
+
+        # if the utf-8 -> codepage conversion inserted extra characters,
+        # remove double spaces to try to restore the original string length
+        # and prevent printing alignment issues
+        while self.extra_chars > 0:
+            dspace = txt.find('  ')
+            if dspace > 0:
+                txt = txt[:dspace] + txt[dspace+1:]
+                self.extra_chars -= 1
+            else:
+                break
+
+        return txt
+
+
+# todo emoticons mit charmap encoden
+# todo Escpos liste von unterdrückten charcodes mitgeben
+# todo Doku anpassen
+# todo Changelog schreiben
+
+
+TXT_ENC_KATAKANA_MAP = {
+    # Maps UTF-8 Katakana symbols to KATAKANA Page Codes
+
+    # Half-Width Katakanas
+    '｡': b'\xa1',
+    '｢': b'\xa2',
+    '｣': b'\xa3',
+    '､': b'\xa4',
+    '･': b'\xa5',
+    'ｦ': b'\xa6',
+    'ｧ': b'\xa7',
+    'ｨ': b'\xa8',
+    'ｩ': b'\xa9',
+    'ｪ': b'\xaa',
+    'ｫ': b'\xab',
+    'ｬ': b'\xac',
+    'ｭ': b'\xad',
+    'ｮ': b'\xae',
+    'ｯ': b'\xaf',
+    'ｰ': b'\xb0',
+    'ｱ': b'\xb1',
+    'ｲ': b'\xb2',
+    'ｳ': b'\xb3',
+    'ｴ': b'\xb4',
+    'ｵ': b'\xb5',
+    'ｶ': b'\xb6',
+    'ｷ': b'\xb7',
+    'ｸ': b'\xb8',
+    'ｹ': b'\xb9',
+    'ｺ': b'\xba',
+    'ｻ': b'\xbb',
+    'ｼ': b'\xbc',
+    'ｽ': b'\xbd',
+    'ｾ': b'\xbe',
+    'ｿ': b'\xbf',
+    'ﾀ': b'\xc0',
+    'ﾁ': b'\xc1',
+    'ﾂ': b'\xc2',
+    'ﾃ': b'\xc3',
+    'ﾄ': b'\xc4',
+    'ﾅ': b'\xc5',
+    'ﾆ': b'\xc6',
+    'ﾇ': b'\xc7',
+    'ﾈ': b'\xc8',
+    'ﾉ': b'\xc9',
+    'ﾊ': b'\xca',
+    'ﾋ': b'\xcb',
+    'ﾌ': b'\xcc',
+    'ﾍ': b'\xcd',
+    'ﾎ': b'\xce',
+    'ﾏ': b'\xcf',
+    'ﾐ': b'\xd0',
+    'ﾑ': b'\xd1',
+    'ﾒ': b'\xd2',
+    'ﾓ': b'\xd3',
+    'ﾔ': b'\xd4',
+    'ﾕ': b'\xd5',
+    'ﾖ': b'\xd6',
+    'ﾗ': b'\xd7',
+    'ﾘ': b'\xd8',
+    'ﾙ': b'\xd9',
+    'ﾚ': b'\xda',
+    'ﾛ': b'\xdb',
+    'ﾜ': b'\xdc',
+    'ﾝ': b'\xdd',
+    'ﾞ': b'\xde',
+    'ﾟ': b'\xdf',
+}