Largely rewrite the magic text encoding feature.

2025-12-02 09:43:30 +00:00 · 2016-08-27 11:09:08 +02:00
parent f6ce7e45da
commit c7864fd785
6 changed files with 411 additions and 332 deletions
--- a/src/escpos/capabilities.yml
+++ b/src/escpos/capabilities.yml
@@ -80,8 +80,8 @@ default:
        #24: // Thai Character Code 16
        #25: // Thai Character Code 17
        #26: // Thai Character Code 18
-        30: 'TCVN-3-1', # TCVN-3: Vietnamese
-        31: 'TCVN-3-2', # TCVN-3: Vietnamese
+        30: 'TCVN-3-1' # TCVN-3: Vietnamese
+        31: 'TCVN-3-2' # TCVN-3: Vietnamese
        32: "CP720"
        33: "CP775"
        34: "CP855"
@@ -152,13 +152,13 @@ epson:
        a: 42
        b: 56
    codePages:
-        - PC437 # 0
+        - cp437 # 0
        - Katakana # 1
-        - PC850 # 2
-        - PC860 # 3
-        - PC863 # 4
-        - PC865 # 5
-        - PC858 # 19
+        - cp850 # 2
+        - cp860 # 3
+        - cp863 # 4
+        - cp865 # 5
+        - cp858 # 19
        - blank

 # http://support.epostraders.co.uk/support-files/documents/3/l7O-TM-T88II_TechnicalRefGuide.pdf
@@ -168,16 +168,16 @@ epson:
        a: 42
        b: 56
    codePages:
-        - PC437 # 0
+        - CP437 # 0
        - Katakana # 1
-        - PC850 # 2
-        - PC860 # 3
-        - PC863 # 4
-        - PC865 # 5
-        - WPC1252 # 16
-        - PC866 # 17
-        - PC852 # 18
-        - PC858 # 19
+        - CP850 # 2
+        - CP860 # 3
+        - CP863 # 4
+        - CP865 # 5
+        - PC1252 # 16
+        - CP866 # 17
+        - CP852 # 18
+        - CP858 # 19
        - blank


--- a/src/escpos/constants.py
+++ b/src/escpos/constants.py
@@ -101,99 +101,101 @@ TXT_ALIGN_RT   = ESC + b'\x61\x02'  # Right justification
 TXT_INVERT_ON  = GS  + b'\x42\x01'  # Inverse Printing ON
 TXT_INVERT_OFF = GS  + b'\x42\x00'  # Inverse Printing OFF

+
+CODEPAGE_CHANGE = ESC + b'\x74'
 # Char code table
-CHARCODE = {
-    'PC437':
-        [ESC + b'\x74\x00', 'cp437'],  # PC437 USA
-    'KATAKANA':
-        [ESC + b'\x74\x01', ''],  # KATAKANA (JAPAN)
-    'PC850':
-        [ESC + b'\x74\x02', 'cp850'],  # PC850 Multilingual
-    'PC860':
-        [ESC + b'\x74\x03', 'cp860'],  # PC860 Portuguese
-    'PC863':
-        [ESC + b'\x74\x04', 'cp863'],  # PC863 Canadian-French
-    'PC865':
-        [ESC + b'\x74\x05', 'cp865'],  # PC865 Nordic
-    'KANJI6':
-        [ESC + b'\x74\x06', ''],  # One-pass Kanji, Hiragana
-    'KANJI7':
-        [ESC + b'\x74\x07', ''],  # One-pass Kanji
-    'KANJI8':
-        [ESC + b'\x74\x08', ''],  # One-pass Kanji
-    'PC851':
-        [ESC + b'\x74\x0b', 'cp851'],  # PC851 Greek
-    'PC853':
-        [ESC + b'\x74\x0c', 'cp853'],  # PC853 Turkish
-    'PC857':
-        [ESC + b'\x74\x0d', 'cp857'],  # PC857 Turkish
-    'PC737':
-        [ESC + b'\x74\x0e', 'cp737'],  # PC737 Greek
-    '8859_7':
-        [ESC + b'\x74\x0f', 'iso8859_7'],  # ISO8859-7 Greek
-    'WPC1252':
-        [ESC + b'\x74\x10', 'cp1252'],  # WPC1252
-    'PC866':
-        [ESC + b'\x74\x11', 'cp866'],  # PC866 Cyrillic #2
-    'PC852':
-        [ESC + b'\x74\x12', 'cp852'],  # PC852 Latin2
-    'PC858':
-        [ESC + b'\x74\x13', 'cp858'],  # PC858 Euro
-    'KU42':
-        [ESC + b'\x74\x14', ''],  # KU42 Thai
-    'TIS11':
-        [ESC + b'\x74\x15', ''],  # TIS11 Thai
-    'TIS18':
-        [ESC + b'\x74\x1a', ''],  # TIS18 Thai
-    'TCVN3':
-        [ESC + b'\x74\x1e', ''],  # TCVN3 Vietnamese
-    'TCVN3B':
-        [ESC + b'\x74\x1f', ''],  # TCVN3 Vietnamese
-    'PC720':
-        [ESC + b'\x74\x20', 'cp720'],  # PC720 Arabic
-    'WPC775':
-        [ESC + b'\x74\x21', ''],  # WPC775 Baltic Rim
-    'PC855':
-        [ESC + b'\x74\x22', 'cp855'],  # PC855 Cyrillic
-    'PC861':
-        [ESC + b'\x74\x23', 'cp861'],  # PC861 Icelandic
-    'PC862':
-        [ESC + b'\x74\x24', 'cp862'],  # PC862 Hebrew
-    'PC864':
-        [ESC + b'\x74\x25', 'cp864'],  # PC864 Arabic
-    'PC869':
-        [ESC + b'\x74\x26', 'cp869'],  # PC869 Greek
-    '8859_2':
-        [ESC + b'\x74\x27', 'iso8859_2'],  # ISO8859-2 Latin2
-    '8859_9':
-        [ESC + b'\x74\x28', 'iso8859_9'],  # ISO8859-2 Latin9
-    'PC1098':
-        [ESC + b'\x74\x29', 'cp1098'],  # PC1098 Farsi
-    'PC1118':
-        [ESC + b'\x74\x2a', 'cp1118'],  # PC1118 Lithuanian
-    'PC1119':
-        [ESC + b'\x74\x2b', 'cp1119'],  # PC1119 Lithuanian
-    'PC1125':
-        [ESC + b'\x74\x2c', 'cp1125'],  # PC1125 Ukrainian
-    'WPC1250':
-        [ESC + b'\x74\x2d', 'cp1250'],  # WPC1250 Latin2
-    'WPC1251':
-        [ESC + b'\x74\x2e', 'cp1251'],  # WPC1251 Cyrillic
-    'WPC1253':
-        [ESC + b'\x74\x2f', 'cp1253'],  # WPC1253 Greek
-    'WPC1254':
-        [ESC + b'\x74\x30', 'cp1254'],  # WPC1254 Turkish
-    'WPC1255':
-        [ESC + b'\x74\x31', 'cp1255'],  # WPC1255 Hebrew
-    'WPC1256':
-        [ESC + b'\x74\x32', 'cp1256'],  # WPC1256 Arabic
-    'WPC1257':
-        [ESC + b'\x74\x33', 'cp1257'],  # WPC1257 Baltic Rim
-    'WPC1258':
-        [ESC + b'\x74\x34', 'cp1258'],  # WPC1258 Vietnamese
-    'KZ1048':
-        [ESC + b'\x74\x35', 'kz1048'],  # KZ-1048 Kazakhstan
-}
+# CHARCODE = {
+#     'PC437':
+#         [ESC + b'\x74\x00', 'cp437'],  # PC437 USA
+#     'KATAKANA':
+#         [ESC + b'\x74\x01', ''],  # KATAKANA (JAPAN)
+#     'PC850':
+#         [ESC + b'\x74\x02', 'cp850'],  # PC850 Multilingual
+#     'PC860':
+#         [ESC + b'\x74\x03', 'cp860'],  # PC860 Portuguese
+#     'PC863':
+#         [ESC + b'\x74\x04', 'cp863'],  # PC863 Canadian-French
+#     'PC865':
+#         [ESC + b'\x74\x05', 'cp865'],  # PC865 Nordic
+#     'KANJI6':
+#         [ESC + b'\x74\x06', ''],  # One-pass Kanji, Hiragana
+#     'KANJI7':
+#         [ESC + b'\x74\x07', ''],  # One-pass Kanji
+#     'KANJI8':
+#         [ESC + b'\x74\x08', ''],  # One-pass Kanji
+#     'PC851':
+#         [ESC + b'\x74\x0b', 'cp851'],  # PC851 Greek
+#     'PC853':
+#         [ESC + b'\x74\x0c', 'cp853'],  # PC853 Turkish
+#     'PC857':
+#         [ESC + b'\x74\x0d', 'cp857'],  # PC857 Turkish
+#     'PC737':
+#         [ESC + b'\x74\x0e', 'cp737'],  # PC737 Greek
+#     '8859_7':
+#         [ESC + b'\x74\x0f', 'iso8859_7'],  # ISO8859-7 Greek
+#     'WPC1252':
+#         [ESC + b'\x74\x10', 'cp1252'],  # WPC1252
+#     'PC866':
+#         [ESC + b'\x74\x11', 'cp866'],  # PC866 Cyrillic #2
+#     'PC852':
+#         [ESC + b'\x74\x12', 'cp852'],  # PC852 Latin2
+#     'PC858':
+#         [ESC + b'\x74\x13', 'cp858'],  # PC858 Euro
+#     'KU42':
+#         [ESC + b'\x74\x14', ''],  # KU42 Thai
+#     'TIS11':
+#         [ESC + b'\x74\x15', ''],  # TIS11 Thai
+#     'TIS18':
+#         [ESC + b'\x74\x1a', ''],  # TIS18 Thai
+#     'TCVN3':
+#         [ESC + b'\x74\x1e', ''],  # TCVN3 Vietnamese
+#     'TCVN3B':
+#         [ESC + b'\x74\x1f', ''],  # TCVN3 Vietnamese
+#     'PC720':
+#         [ESC + b'\x74\x20', 'cp720'],  # PC720 Arabic
+#     'WPC775':
+#         [ESC + b'\x74\x21', ''],  # WPC775 Baltic Rim
+#     'PC855':
+#         [ESC + b'\x74\x22', 'cp855'],  # PC855 Cyrillic
+#     'PC861':
+#         [ESC + b'\x74\x23', 'cp861'],  # PC861 Icelandic
+#     'PC862':
+#         [ESC + b'\x74\x24', 'cp862'],  # PC862 Hebrew
+#     'PC864':
+#         [ESC + b'\x74\x25', 'cp864'],  # PC864 Arabic
+#     'PC869':
+#         [ESC + b'\x74\x26', 'cp869'],  # PC869 Greek
+#     '8859_2':
+#         [ESC + b'\x74\x27', 'iso8859_2'],  # ISO8859-2 Latin2
+#     '8859_9':
+#         [ESC + b'\x74\x28', 'iso8859_9'],  # ISO8859-2 Latin9
+#     'PC1098':
+#         [ESC + b'\x74\x29', 'cp1098'],  # PC1098 Farsi
+#     'PC1118':
+#         [ESC + b'\x74\x2a', 'cp1118'],  # PC1118 Lithuanian
+#     'PC1119':
+#         [ESC + b'\x74\x2b', 'cp1119'],  # PC1119 Lithuanian
+#     'PC1125':
+#         [ESC + b'\x74\x2c', 'cp1125'],  # PC1125 Ukrainian
+#     'WPC1250':
+#         [ESC + b'\x74\x2d', 'cp1250'],  # WPC1250 Latin2
+#     'WPC1251':
+#         [ESC + b'\x74\x2e', 'cp1251'],  # WPC1251 Cyrillic
+#     'WPC1253':
+#         [ESC + b'\x74\x2f', 'cp1253'],  # WPC1253 Greek
+#     'WPC1254':
+#         [ESC + b'\x74\x30', 'cp1254'],  # WPC1254 Turkish
+#     'WPC1255':
+#         [ESC + b'\x74\x31', 'cp1255'],  # WPC1255 Hebrew
+#     'WPC1256':
+#         [ESC + b'\x74\x32', 'cp1256'],  # WPC1256 Arabic
+#     'WPC1257':
+#         [ESC + b'\x74\x33', 'cp1257'],  # WPC1257 Baltic Rim
+#     'WPC1258':
+#         [ESC + b'\x74\x34', 'cp1258'],  # WPC1258 Vietnamese
+#     'KZ1048':
+#         [ESC + b'\x74\x35', 'kz1048'],  # KZ-1048 Kazakhstan
+# }

 # Barcode format
 _SET_BARCODE_TXT_POS = lambda n: GS + b'H' + n
--- a/src/escpos/escpos.py
+++ b/src/escpos/escpos.py
@@ -36,12 +36,12 @@ class Escpos(object):
    """
    device = None

-    def __init__(self, profile=None, **kwargs):
+    def __init__(self, profile=None, magic_encode_args=None, **kwargs):
        """ Initialize ESCPOS Printer

        :param profile: Printer profile"""
        self.profile = get_profile(profile)
-        self.magic = MagicEncode(**kwargs)
+        self.magic = MagicEncode(self, **(magic_encode_args or {}))

    def __del__(self):
        """ call self.close upon deletion """
@@ -228,11 +228,9 @@ class Escpos(object):
        :raises: :py:exc:`~escpos.exceptions.CharCodeError`
        """
        if code.upper() == "AUTO":
-            self.magic.force_encoding = False
+            self.magic.force_encoding(False)
        else:
-            self.magic.codepage_sequence(code)
-            self.magic.encoding = code
-            self.magic.force_encoding = True
+            self.magic.force_encoding(code)

    def barcode(self, code, bc, height=64, width=3, pos="BELOW", font="A", align_ct=True, function_type="A"):
        """ Print Barcode
@@ -373,7 +371,7 @@ class Escpos(object):
        :raises: :py:exc:`~escpos.exceptions.TextError`
        """
        txt = six.text_type(txt)
-        self._raw(self.magic.encode_text(txt=txt))
+        self.magic.write(txt)

    def block_text(self, txt, font=None, columns=None):
        """ Text is printed wrapped to specified columns
--- a/src/escpos/magicencode.py
+++ b/src/escpos/magicencode.py
@@ -17,8 +17,9 @@ from __future__ import division
 from __future__ import print_function
 from __future__ import unicode_literals

-from .constants import CHARCODE
+from .constants import CODEPAGE_CHANGE
 from .exceptions import CharCodeError, Error
+from .capabilities import get_profile
 import copy
 import six

@@ -27,153 +28,230 @@ try:
 except ImportError:
    jcconv = None

+
+def encode_katakana(text):
+    """I don't think this quite works yet."""
+    encoded = []
+    for char in text:
+        if jcconv:
+            # try to convert japanese text to half-katakanas
+            char = jcconv.kata2half(jcconv.hira2kata(char))
+            # TODO: "the conversion may result in multiple characters"
+            # When? What should we do about it?
+
+        if char in TXT_ENC_KATAKANA_MAP:
+            encoded.append(TXT_ENC_KATAKANA_MAP[char])
+        else:
+            encoded.append(char)
+    print(encoded)
+    return b"".join(encoded)
+
+
+
+# TODO: When the capabilities.yml format is finished, this should be
+# in the profile itself.
+def get_encodings_from_profile(profile):
+    mapping = {k: v.lower() for k, v in profile.codePageMap.items()}
+    if hasattr(profile, 'codePages'):
+        code_pages = [n.lower() for n in profile.codePages]
+        return {k: v for k, v in mapping.items() if v in code_pages}
+    else:
+        return mapping
+
+
+class CodePages:
+    def get_all(self):
+        return get_encodings_from_profile(get_profile()).values()
+
+    def encode(self, text, encoding, errors='strict'):
+        # Python has not have this builtin?
+        if encoding.upper() == 'KATAKANA':
+            return encode_katakana(text)
+
+        return text.encode(encoding, errors=errors)
+
+    def get_encoding(self, encoding):
+        # resolve the encoding alias
+        return encoding.lower()
+
+code_pages = CodePages()
+
+
+class Encoder(object):
+    """Takes a list of available code spaces. Picks the right one for a
+    given character.
+
+    Note: To determine the codespace, it needs to do the conversion, and
+    thus already knows what the final byte in the target encoding would
+    be. Nevertheless, the API of this class doesn't return the byte.
+
+    The caller use to do the character conversion itself.
+
+        $ python -m timeit -s "{u'ö':'a'}.get(u'ö')"
+        100000000 loops, best of 3: 0.0133 usec per loop
+
+        $ python -m timeit -s "u'ö'.encode('latin1')"
+        100000000 loops, best of 3: 0.0141 usec per loop
+    """
+
+    def __init__(self, codepages):
+        self.codepages = codepages
+        self.reverse = {v:k for k, v in codepages.items()}
+        self.available_encodings = set(codepages.values())
+        self.used_encodings = set()
+
+    def get_sequence(self, encoding):
+        return self.reverse[encoding]
+
+    def get_encoding(self, encoding):
+        """resolve aliases
+
+        check that the profile allows this encoding
+        """
+        encoding = code_pages.get_encoding(encoding)
+        if not encoding in self.available_encodings:
+            raise ValueError('This encoding cannot be used for the current profile')
+        return encoding
+
+    def get_encodings(self):
+        """
+        - remove the ones not supported
+        - order by used first, then others
+        - do not use a cache, because encode already is so fast
+        """
+        return self.available_encodings
+
+    def can_encode(self, encoding, char):
+        try:
+            encoded = code_pages.encode(char, encoding)
+            assert type(encoded) is bytes
+            return encoded
+        except LookupError:
+            # We don't have this encoding
+            return False
+        except UnicodeEncodeError:
+            return False
+
+        return True
+
+    def find_suitable_codespace(self, char):
+        """The order of our search is a specific one:
+
+        1. code pages that we already tried before; there is a good
+           chance they might work again, reducing the search space,
+           and by re-using already used encodings we might also
+           reduce the number of codepage change instructiosn we have
+           to send. Still, any performance gains will presumably be
+           fairly minor.
+
+        2. code pages in lower ESCPOS slots first. Presumably, they
+           are more likely to be supported, so if a printer profile
+           is missing or incomplete, we might increase our change
+           that the code page we pick for this character is actually
+           supported.
+
+        # XXX actually do speed up the search
+        """
+        for encoding in self.get_encodings():
+            if self.can_encode(encoding, char):
+                # This encoding worked; at it to the set of used ones.
+                self.used_encodings.add(encoding)
+                return encoding
+
+
 class MagicEncode(object):
    """ Magic Encode Class

    It tries to automatically encode utf-8 input into the right coding. When encoding is impossible a configurable
    symbol will be inserted.
+
+    encoding: If you know the current encoding of the printer when
+    initializing this class, set it here. If the current encoding is
+    unknown, the first character emitted will be a codepage switch.
    """
-    def __init__(self, startencoding='PC437', force_encoding=False, defaultsymbol=b'', defaultencoding='PC437'):
-        # running these functions makes sure that the encoding is suitable
-        MagicEncode.codepage_name(startencoding)
-        MagicEncode.codepage_name(defaultencoding)
+    def __init__(self, driver, encoding=None, disabled=False,
+                 defaultsymbol='?', encoder=None):
+        if disabled and not encoding:
+            raise Error('If you disable magic encode, you need to define an encoding!')

-        self.encoding = startencoding
+        self.driver = driver
+        self.encoder = encoder or Encoder(get_encodings_from_profile(driver.profile))
+
+        self.encoding = self.encoder.get_encoding(encoding) if encoding else None
        self.defaultsymbol = defaultsymbol
-        if type(self.defaultsymbol) is not six.binary_type:
-            raise Error("The supplied symbol {sym} has to be a binary string".format(sym=defaultsymbol))
-        self.defaultencoding = defaultencoding
-        self.force_encoding = force_encoding
+        self.disabled = disabled

-    def set_encoding(self, encoding='PC437', force_encoding=False):
-        """sets an encoding (normally not used)
+    def force_encoding(self, encoding):
+        """Sets a fixed encoding. The change is emitted right away.

-        This function should normally not be used since it manipulates the automagic behaviour. However, if you want to
-        force a certain codepage, then you can use this function.
-
-        :param encoding: must be a valid encoding from CHARCODE
-        :param force_encoding: whether the encoding should not be changed automatically
+        From now one, this buffer will switch the code page anymore.
+        However, it will still keep track of the current code page.
        """
-        self.codepage_name(encoding)
-        self.encoding = encoding
-        self.force_encoding = force_encoding
-
-    @staticmethod
-    def codepage_sequence(codepage):
-        """returns the corresponding codepage-sequence"""
-        try:
-            return CHARCODE[codepage][0]
-        except KeyError:
-            raise CharCodeError("The encoding {enc} is unknown.".format(enc=codepage))
-
-    @staticmethod
-    def codepage_name(codepage):
-        """returns the corresponding codepage-name (for python)"""
-        try:
-            name = CHARCODE[codepage][1]
-            if name == '':
-                raise CharCodeError("The codepage {enc} does not have a connected python-codepage".format(enc=codepage))
-            return name
-        except KeyError:
-            raise CharCodeError("The encoding {enc} is unknown.".format(enc=codepage))
-
-    def encode_char(self, char):
-        """
-        Encodes a single unicode character into a sequence of
-        esc-pos code page change instructions and character declarations
-        """
-        if type(char) is not six.text_type:
-            raise Error("The supplied text has to be unicode, but is of type {type}.".format(
-                type=type(char)
-            ))
-        encoded = b''
-        encoding = self.encoding  # we reuse the last encoding to prevent code page switches at every character
-        remaining = copy.copy(CHARCODE)
-
-        while True:  # Trying all encoding until one succeeds
-            try:
-                if encoding == 'KATAKANA':  # Japanese characters
-                    if jcconv:
-                        # try to convert japanese text to half-katakanas
-                        kata = jcconv.kata2half(jcconv.hira2kata(char))
-                        if kata != char:
-                            self.extra_chars += len(kata) - 1
-                            # the conversion may result in multiple characters
-                            return self.encode_str(kata)
-                    else:
-                        kata = char
-
-                    if kata in TXT_ENC_KATAKANA_MAP:
-                        encoded = TXT_ENC_KATAKANA_MAP[kata]
-                        break
-                    else:
-                        raise ValueError()
-                else:
-                    try:
-                        enc_name = MagicEncode.codepage_name(encoding)
-                        encoded = char.encode(enc_name)
-                        assert type(encoded) is bytes
-                    except LookupError:
-                        raise ValueError("The encoding {enc} seems to not exist in Python".format(enc=encoding))
-                    except CharCodeError:
-                        raise ValueError("The encoding {enc} is not fully configured in constants".format(
-                            enc=encoding
-                        ))
-                    break
-
-            except ValueError:  # the encoding failed, select another one and retry
-                if encoding in remaining:
-                    del remaining[encoding]
-                if len(remaining) >= 1:
-                    encoding = list(remaining)[0]
-                else:
-                    encoding = self.defaultencoding
-                    encoded = self.defaultsymbol  # could not encode, output error character
-                    break
-
-        if encoding != self.encoding:
-            # if the encoding changed, remember it and prefix the character with
-            # the esc-pos encoding change sequence
-            self.encoding = encoding
-            encoded = CHARCODE[encoding][0] + encoded
-
-        return encoded
-
-    def encode_str(self, txt):
-        # make sure the right codepage is set in the printer
-        buffer = self.codepage_sequence(self.encoding)
-        if self.force_encoding:
-            buffer += txt.encode(self.codepage_name(self.encoding))
+        if not encoding:
+            self.disabled = False
        else:
-            for c in txt:
-                buffer += self.encode_char(c)
-        return buffer
+            self.write_with_encoding(encoding, None)
+            self.disabled = True

-    def encode_text(self, txt):
-        """returns a byte-string with encoded text
-
-        :param txt: text that shall be encoded
-        :return: byte-string for the printer
+    def write(self, text):
+        """Write the text, automatically switching encodings.
        """
-        if not txt:
+
+        if self.disabled:
+            self.write_with_encoding(self.encoding, text)
            return

-        self.extra_chars = 0
+        # TODO: Currently this very simple loop means we send every
+        # character individually to the printer. We can probably
+        # improve performace by searching the text for the first
+        # character that cannot be rendered using the current code
+        # page, and then sending all of those characters at once.
+        # Or, should a lower-level buffer be responsible for that?

-        txt = self.encode_str(txt)
+        for char in text:
+            # See if the current code page works for this character.
+            # The encoder object will use a cache to be able to answer
+            # this question fairly easily.
+            if self.encoding and self.encoder.can_encode(self.encoding, char):
+                self.write_with_encoding(self.encoding, char)
+                continue

-        # if the utf-8 -> codepage conversion inserted extra characters,
-        # remove double spaces to try to restore the original string length
-        # and prevent printing alignment issues
-        while self.extra_chars > 0:
-            dspace = txt.find('  ')
-            if dspace > 0:
-                txt = txt[:dspace] + txt[dspace+1:]
-                self.extra_chars -= 1
-            else:
-                break
+            # We have to find another way to print this character.
+            # See if any of the code pages that the printer profile supports
+            # can encode this character.
+            codespace = self.encoder.find_suitable_codespace(char)
+            if not codespace:
+                self._handle_character_failed(char)
+                continue

-        return txt
+            self.write_with_encoding(codespace, char)
+
+    def _handle_character_failed(self, char):
+        """Called when no codepage was found to render a character.
+        """
+        # Writing the default symbol via write() allows us to avoid
+        # unnecesary codepage switches.
+        self.write(self.defaultsymbol)
+
+    def write_with_encoding(self, encoding, text):
+        if text is not None and type(text) is not six.text_type:
+            raise Error("The supplied text has to be unicode, but is of type {type}.".format(
+                type=type(text)
+            ))
+
+        encoding = self.encoder.get_encoding(encoding)
+
+        # We always know the current code page; if the new codepage
+        # is different, emit a change command.
+        if encoding != self.encoding:
+            self.encoding = encoding
+            self.driver._raw(b'{}{}'.format(
+                CODEPAGE_CHANGE,
+                six.int2byte(self.encoder.get_sequence(encoding))
+            ))
+
+        if text:
+            self.driver._raw(code_pages.encode(text, encoding, errors="replace"))


 # todo emoticons mit charmap encoden
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -0,0 +1,7 @@
+import pytest
+from escpos.printer import Dummy
+
+
+@pytest.fixture
+def driver():
+    return Dummy()
--- a/test/test_magicencode.py
+++ b/test/test_magicencode.py
@@ -13,103 +13,97 @@ from __future__ import division
 from __future__ import print_function
 from __future__ import unicode_literals

+import pytest
 from nose.tools import raises, assert_raises
 from hypothesis import given, example
 import hypothesis.strategies as st
-from escpos.magicencode import MagicEncode
+from escpos.magicencode import MagicEncode, Encoder, encode_katakana
 from escpos.exceptions import CharCodeError, Error
-from escpos.constants import CHARCODE
-
-@raises(CharCodeError)
-def test_magic_encode_unkown_char_constant_as_startenc():
-    """tests whether MagicEncode raises the proper Exception when an unknown charcode-name is passed as startencoding"""
-    MagicEncode(startencoding="something")
-
-@raises(CharCodeError)
-def test_magic_encode_unkown_char_constant_as_defaultenc():
-    """tests whether MagicEncode raises the proper Exception when an unknown charcode-name is passed as defaultenc."""
-    MagicEncode(defaultencoding="something")
-
-def test_magic_encode_wo_arguments():
-    """tests whether MagicEncode works in the standard configuration"""
-    MagicEncode()
-
-@raises(Error)
-def test_magic_encode_w_non_binary_defaultsymbol():
-    """tests whether MagicEncode catches non-binary defaultsymbols"""
-    MagicEncode(defaultsymbol="non-binary")
-
-@given(symbol=st.binary())
-def test_magic_encode_w_binary_defaultsymbol(symbol):
-    """tests whether MagicEncode works with any binary symbol"""
-    MagicEncode(defaultsymbol=symbol)
-
-@given(st.text())
-@example("カタカナ")
-@example("あいうえお")
-@example("ﾊﾝｶｸｶﾀｶﾅ")
-def test_magic_encode_encode_text_unicode_string(text):
-    """tests whether MagicEncode can accept a unicode string"""
-    me = MagicEncode()
-    me.encode_text(text)
-
-@given(char=st.characters())
-def test_magic_encode_encode_char(char):
-    """tests the encode_char-method of MagicEncode"""
-    me = MagicEncode()
-    me.encode_char(char)
-
-@raises(Error)
-@given(char=st.binary())
-def test_magic_encode_encode_char_binary(char):
-    """tests the encode_char-method of MagicEncode with binary input"""
-    me = MagicEncode()
-    me.encode_char(char)


-def test_magic_encode_string_with_katakana_and_hiragana():
-    """tests the encode_string-method with katakana and hiragana"""
-    me = MagicEncode()
-    me.encode_str("カタカナ")
-    me.encode_str("あいうえお")

-@raises(CharCodeError)
-def test_magic_encode_codepage_sequence_unknown_key():
-    """tests whether MagicEncode.codepage_sequence raises the proper Exception with unknown charcode-names"""
-    MagicEncode.codepage_sequence("something")
+class TestEncoder:

-@raises(CharCodeError)
-def test_magic_encode_codepage_name_unknown_key():
-    """tests whether MagicEncode.codepage_name raises the proper Exception with unknown charcode-names"""
-    MagicEncode.codepage_name("something")
+    def test_can_encode(self):
+        assert not Encoder({1: 'cp437'}).can_encode('cp437', u'€')
+        assert Encoder({1: 'cp437'}).can_encode('cp437', u'á')
+        assert not Encoder({1: 'foobar'}).can_encode('foobar', 'a')

-def test_magic_encode_constants_getter():
-    """tests whether the constants are properly fetched"""
-    for key in CHARCODE:
-        name = CHARCODE[key][1]
-        if name == '':
-            assert_raises(CharCodeError, MagicEncode.codepage_name, key)
-        else:
-            assert name == MagicEncode.codepage_name(key)
-        assert MagicEncode.codepage_sequence(key) == CHARCODE[key][0]
+    def test_find_suitable_encoding(self):
+        assert not Encoder({1: 'cp437'}).find_suitable_codespace(u'€')
+        assert Encoder({1: 'cp858'}).find_suitable_codespace(u'€') == 'cp858'

-@given(st.text())
-def test_magic_encode_force_encoding(text):
-    """test whether force_encoding works as expected"""
-    me = MagicEncode()
-    assert me.force_encoding is False
-    me.set_encoding(encoding='PC850', force_encoding=True)
-    assert me.encoding == 'PC850'
-    assert me.force_encoding is True
-    try:
-        me.encode_text(text)
-    except UnicodeEncodeError:
-        # we discard these errors as they are to be expected
-        # what we want to check here is, whether encoding or codepage will switch through some of the magic code
-        # being called accidentally
-        pass
-    assert me.encoding == 'PC850'
-    assert me.force_encoding is True
+    @raises(ValueError)
+    def test_get_encoding(self):
+        Encoder({}).get_encoding('latin1')
+
+
+class TestMagicEncode:
+
+    class TestInit:
+
+        def test_disabled_requires_encoding(self, driver):
+            with pytest.raises(Error):
+                MagicEncode(driver, disabled=True)
+
+    class TestWriteWithEncoding:
+
+        def test_init_from_none(self, driver):
+            encode = MagicEncode(driver, encoding=None)
+            encode.write_with_encoding('cp858', '€ ist teuro.')
+            assert driver.output == b'\x1bt\xd5 ist teuro.'
+
+        def test_change_from_another(self, driver):
+            encode = MagicEncode(driver, encoding='cp437')
+            encode.write_with_encoding('cp858', '€ ist teuro.')
+            assert driver.output == b'\x1bt\xd5 ist teuro.'
+
+        def test_no_change(self, driver):
+            encode = MagicEncode(driver, encoding='cp858')
+            encode.write_with_encoding('cp858', '€ ist teuro.')
+            assert driver.output == b'\xd5 ist teuro.'
+
+    class TestWrite:
+
+        def test_write(self, driver):
+            encode = MagicEncode(driver)
+            encode.write('€ ist teuro.')
+            assert driver.output == b'\x1bt\xa4 ist teuro.'
+
+        def test_write_disabled(self, driver):
+            encode = MagicEncode(driver, encoding='cp437', disabled=True)
+            encode.write('€ ist teuro.')
+            assert driver.output == b'? ist teuro.'
+
+        def test_write_no_codepage(self, driver):
+            encode = MagicEncode(
+                driver, defaultsymbol="_", encoder=Encoder({1: 'cp437'}),
+                encoding='cp437')
+            encode.write(u'€ ist teuro.')
+            assert driver.output == b'_ ist teuro.'
+
+    class TestForceEncoding:
+
+        def test(self, driver):
+            encode = MagicEncode(driver)
+            encode.force_encoding('cp437')
+            assert driver.output == b'\x1bt'
+
+            encode.write('€ ist teuro.')
+            assert driver.output == b'\x1bt? ist teuro.'
+
+
+class TestKatakana:
+    @given(st.text())
+    @example("カタカナ")
+    @example("あいうえお")
+    @example("ﾊﾝｶｸｶﾀｶﾅ")
+    def test_accept(self, text):
+        encode_katakana(text)
+
+    def test_result(self):
+        assert encode_katakana('カタカナ') == b'\xb6\xc0\xb6\xc5'
+        assert encode_katakana("あいうえお") == b'\xb1\xb2\xb3\xb4\xb5'


 # TODO Idee für unittest: hypothesis-strings erzeugen, in encode_text werfen