Largely rewrite the magic text encoding feature.

2025-08-04 08:53:29 +00:00 · 2016-08-27 11:09:08 +02:00 · 2016-08-27 11:09:08 +02:00 · c7864fd785
commit c7864fd785
parent f6ce7e45da
6 changed files with 411 additions and 332 deletions
--- a/src/escpos/capabilities.yml
+++ b/src/escpos/capabilities.yml
@ -80,8 +80,8 @@ default:
        #24: // Thai Character Code 16
        #25: // Thai Character Code 17
        #26: // Thai Character Code 18
-        30: 'TCVN-3-1', # TCVN-3: Vietnamese
+        30: 'TCVN-3-1' # TCVN-3: Vietnamese
-        31: 'TCVN-3-2', # TCVN-3: Vietnamese
+        31: 'TCVN-3-2' # TCVN-3: Vietnamese
        32: "CP720"
        33: "CP775"
        34: "CP855"
@ -152,13 +152,13 @@ epson:
        a: 42
        b: 56
    codePages:
-        - PC437 # 0
+        - cp437 # 0
        - Katakana # 1
-        - PC850 # 2
+        - cp850 # 2
-        - PC860 # 3
+        - cp860 # 3
-        - PC863 # 4
+        - cp863 # 4
-        - PC865 # 5
+        - cp865 # 5
-        - PC858 # 19
+        - cp858 # 19
        - blank
 # http://support.epostraders.co.uk/support-files/documents/3/l7O-TM-T88II_TechnicalRefGuide.pdf
@ -168,16 +168,16 @@ epson:
        a: 42
        b: 56
    codePages:
-        - PC437 # 0
+        - CP437 # 0
        - Katakana # 1
-        - PC850 # 2
+        - CP850 # 2
-        - PC860 # 3
+        - CP860 # 3
-        - PC863 # 4
+        - CP863 # 4
-        - PC865 # 5
+        - CP865 # 5
-        - WPC1252 # 16
+        - PC1252 # 16
-        - PC866 # 17
+        - CP866 # 17
-        - PC852 # 18
+        - CP852 # 18
-        - PC858 # 19
+        - CP858 # 19
        - blank
--- a/src/escpos/constants.py
+++ b/src/escpos/constants.py
@ -101,99 +101,101 @@ TXT_ALIGN_RT   = ESC + b'\x61\x02'  # Right justification
 TXT_INVERT_ON  = GS  + b'\x42\x01'  # Inverse Printing ON
 TXT_INVERT_OFF = GS  + b'\x42\x00'  # Inverse Printing OFF
 CODEPAGE_CHANGE = ESC + b'\x74'
 # Char code table
-CHARCODE = {
+# CHARCODE = {
-    'PC437':
+#     'PC437':
-        [ESC + b'\x74\x00', 'cp437'],  # PC437 USA
+#         [ESC + b'\x74\x00', 'cp437'],  # PC437 USA
-    'KATAKANA':
+#     'KATAKANA':
-        [ESC + b'\x74\x01', ''],  # KATAKANA (JAPAN)
+#         [ESC + b'\x74\x01', ''],  # KATAKANA (JAPAN)
-    'PC850':
+#     'PC850':
-        [ESC + b'\x74\x02', 'cp850'],  # PC850 Multilingual
+#         [ESC + b'\x74\x02', 'cp850'],  # PC850 Multilingual
-    'PC860':
+#     'PC860':
-        [ESC + b'\x74\x03', 'cp860'],  # PC860 Portuguese
+#         [ESC + b'\x74\x03', 'cp860'],  # PC860 Portuguese
-    'PC863':
+#     'PC863':
-        [ESC + b'\x74\x04', 'cp863'],  # PC863 Canadian-French
+#         [ESC + b'\x74\x04', 'cp863'],  # PC863 Canadian-French
-    'PC865':
+#     'PC865':
-        [ESC + b'\x74\x05', 'cp865'],  # PC865 Nordic
+#         [ESC + b'\x74\x05', 'cp865'],  # PC865 Nordic
-    'KANJI6':
+#     'KANJI6':
-        [ESC + b'\x74\x06', ''],  # One-pass Kanji, Hiragana
+#         [ESC + b'\x74\x06', ''],  # One-pass Kanji, Hiragana
-    'KANJI7':
+#     'KANJI7':
-        [ESC + b'\x74\x07', ''],  # One-pass Kanji
+#         [ESC + b'\x74\x07', ''],  # One-pass Kanji
-    'KANJI8':
+#     'KANJI8':
-        [ESC + b'\x74\x08', ''],  # One-pass Kanji
+#         [ESC + b'\x74\x08', ''],  # One-pass Kanji
-    'PC851':
+#     'PC851':
-        [ESC + b'\x74\x0b', 'cp851'],  # PC851 Greek
+#         [ESC + b'\x74\x0b', 'cp851'],  # PC851 Greek
-    'PC853':
+#     'PC853':
-        [ESC + b'\x74\x0c', 'cp853'],  # PC853 Turkish
+#         [ESC + b'\x74\x0c', 'cp853'],  # PC853 Turkish
-    'PC857':
+#     'PC857':
-        [ESC + b'\x74\x0d', 'cp857'],  # PC857 Turkish
+#         [ESC + b'\x74\x0d', 'cp857'],  # PC857 Turkish
-    'PC737':
+#     'PC737':
-        [ESC + b'\x74\x0e', 'cp737'],  # PC737 Greek
+#         [ESC + b'\x74\x0e', 'cp737'],  # PC737 Greek
-    '8859_7':
+#     '8859_7':
-        [ESC + b'\x74\x0f', 'iso8859_7'],  # ISO8859-7 Greek
+#         [ESC + b'\x74\x0f', 'iso8859_7'],  # ISO8859-7 Greek
-    'WPC1252':
+#     'WPC1252':
-        [ESC + b'\x74\x10', 'cp1252'],  # WPC1252
+#         [ESC + b'\x74\x10', 'cp1252'],  # WPC1252
-    'PC866':
+#     'PC866':
-        [ESC + b'\x74\x11', 'cp866'],  # PC866 Cyrillic #2
+#         [ESC + b'\x74\x11', 'cp866'],  # PC866 Cyrillic #2
-    'PC852':
+#     'PC852':
-        [ESC + b'\x74\x12', 'cp852'],  # PC852 Latin2
+#         [ESC + b'\x74\x12', 'cp852'],  # PC852 Latin2
-    'PC858':
+#     'PC858':
-        [ESC + b'\x74\x13', 'cp858'],  # PC858 Euro
+#         [ESC + b'\x74\x13', 'cp858'],  # PC858 Euro
-    'KU42':
+#     'KU42':
-        [ESC + b'\x74\x14', ''],  # KU42 Thai
+#         [ESC + b'\x74\x14', ''],  # KU42 Thai
-    'TIS11':
+#     'TIS11':
-        [ESC + b'\x74\x15', ''],  # TIS11 Thai
+#         [ESC + b'\x74\x15', ''],  # TIS11 Thai
-    'TIS18':
+#     'TIS18':
-        [ESC + b'\x74\x1a', ''],  # TIS18 Thai
+#         [ESC + b'\x74\x1a', ''],  # TIS18 Thai
-    'TCVN3':
+#     'TCVN3':
-        [ESC + b'\x74\x1e', ''],  # TCVN3 Vietnamese
+#         [ESC + b'\x74\x1e', ''],  # TCVN3 Vietnamese
-    'TCVN3B':
+#     'TCVN3B':
-        [ESC + b'\x74\x1f', ''],  # TCVN3 Vietnamese
+#         [ESC + b'\x74\x1f', ''],  # TCVN3 Vietnamese
-    'PC720':
+#     'PC720':
-        [ESC + b'\x74\x20', 'cp720'],  # PC720 Arabic
+#         [ESC + b'\x74\x20', 'cp720'],  # PC720 Arabic
-    'WPC775':
+#     'WPC775':
-        [ESC + b'\x74\x21', ''],  # WPC775 Baltic Rim
+#         [ESC + b'\x74\x21', ''],  # WPC775 Baltic Rim
-    'PC855':
+#     'PC855':
-        [ESC + b'\x74\x22', 'cp855'],  # PC855 Cyrillic
+#         [ESC + b'\x74\x22', 'cp855'],  # PC855 Cyrillic
-    'PC861':
+#     'PC861':
-        [ESC + b'\x74\x23', 'cp861'],  # PC861 Icelandic
+#         [ESC + b'\x74\x23', 'cp861'],  # PC861 Icelandic
-    'PC862':
+#     'PC862':
-        [ESC + b'\x74\x24', 'cp862'],  # PC862 Hebrew
+#         [ESC + b'\x74\x24', 'cp862'],  # PC862 Hebrew
-    'PC864':
+#     'PC864':
-        [ESC + b'\x74\x25', 'cp864'],  # PC864 Arabic
+#         [ESC + b'\x74\x25', 'cp864'],  # PC864 Arabic
-    'PC869':
+#     'PC869':
-        [ESC + b'\x74\x26', 'cp869'],  # PC869 Greek
+#         [ESC + b'\x74\x26', 'cp869'],  # PC869 Greek
-    '8859_2':
+#     '8859_2':
-        [ESC + b'\x74\x27', 'iso8859_2'],  # ISO8859-2 Latin2
+#         [ESC + b'\x74\x27', 'iso8859_2'],  # ISO8859-2 Latin2
-    '8859_9':
+#     '8859_9':
-        [ESC + b'\x74\x28', 'iso8859_9'],  # ISO8859-2 Latin9
+#         [ESC + b'\x74\x28', 'iso8859_9'],  # ISO8859-2 Latin9
-    'PC1098':
+#     'PC1098':
-        [ESC + b'\x74\x29', 'cp1098'],  # PC1098 Farsi
+#         [ESC + b'\x74\x29', 'cp1098'],  # PC1098 Farsi
-    'PC1118':
+#     'PC1118':
-        [ESC + b'\x74\x2a', 'cp1118'],  # PC1118 Lithuanian
+#         [ESC + b'\x74\x2a', 'cp1118'],  # PC1118 Lithuanian
-    'PC1119':
+#     'PC1119':
-        [ESC + b'\x74\x2b', 'cp1119'],  # PC1119 Lithuanian
+#         [ESC + b'\x74\x2b', 'cp1119'],  # PC1119 Lithuanian
-    'PC1125':
+#     'PC1125':
-        [ESC + b'\x74\x2c', 'cp1125'],  # PC1125 Ukrainian
+#         [ESC + b'\x74\x2c', 'cp1125'],  # PC1125 Ukrainian
-    'WPC1250':
+#     'WPC1250':
-        [ESC + b'\x74\x2d', 'cp1250'],  # WPC1250 Latin2
+#         [ESC + b'\x74\x2d', 'cp1250'],  # WPC1250 Latin2
-    'WPC1251':
+#     'WPC1251':
-        [ESC + b'\x74\x2e', 'cp1251'],  # WPC1251 Cyrillic
+#         [ESC + b'\x74\x2e', 'cp1251'],  # WPC1251 Cyrillic
-    'WPC1253':
+#     'WPC1253':
-        [ESC + b'\x74\x2f', 'cp1253'],  # WPC1253 Greek
+#         [ESC + b'\x74\x2f', 'cp1253'],  # WPC1253 Greek
-    'WPC1254':
+#     'WPC1254':
-        [ESC + b'\x74\x30', 'cp1254'],  # WPC1254 Turkish
+#         [ESC + b'\x74\x30', 'cp1254'],  # WPC1254 Turkish
-    'WPC1255':
+#     'WPC1255':
-        [ESC + b'\x74\x31', 'cp1255'],  # WPC1255 Hebrew
+#         [ESC + b'\x74\x31', 'cp1255'],  # WPC1255 Hebrew
-    'WPC1256':
+#     'WPC1256':
-        [ESC + b'\x74\x32', 'cp1256'],  # WPC1256 Arabic
+#         [ESC + b'\x74\x32', 'cp1256'],  # WPC1256 Arabic
-    'WPC1257':
+#     'WPC1257':
-        [ESC + b'\x74\x33', 'cp1257'],  # WPC1257 Baltic Rim
+#         [ESC + b'\x74\x33', 'cp1257'],  # WPC1257 Baltic Rim
-    'WPC1258':
+#     'WPC1258':
-        [ESC + b'\x74\x34', 'cp1258'],  # WPC1258 Vietnamese
+#         [ESC + b'\x74\x34', 'cp1258'],  # WPC1258 Vietnamese
-    'KZ1048':
+#     'KZ1048':
-        [ESC + b'\x74\x35', 'kz1048'],  # KZ-1048 Kazakhstan
+#         [ESC + b'\x74\x35', 'kz1048'],  # KZ-1048 Kazakhstan
-}
+# }
 # Barcode format
 _SET_BARCODE_TXT_POS = lambda n: GS + b'H' + n
--- a/src/escpos/escpos.py
+++ b/src/escpos/escpos.py
@ -36,12 +36,12 @@ class Escpos(object):
    """
    device = None
-    def __init__(self, profile=None, **kwargs):
+    def __init__(self, profile=None, magic_encode_args=None, **kwargs):
        """ Initialize ESCPOS Printer
        :param profile: Printer profile"""
        self.profile = get_profile(profile)
-        self.magic = MagicEncode(**kwargs)
+        self.magic = MagicEncode(self, **(magic_encode_args or {}))
    def __del__(self):
        """ call self.close upon deletion """
@ -228,11 +228,9 @@ class Escpos(object):
        :raises: :py:exc:`~escpos.exceptions.CharCodeError`
        """
        if code.upper() == "AUTO":
-            self.magic.force_encoding = False
+            self.magic.force_encoding(False)
        else:
-            self.magic.codepage_sequence(code)
+            self.magic.force_encoding(code)
            self.magic.encoding = code
            self.magic.force_encoding = True
    def barcode(self, code, bc, height=64, width=3, pos="BELOW", font="A", align_ct=True, function_type="A"):
        """ Print Barcode
@ -373,7 +371,7 @@ class Escpos(object):
        :raises: :py:exc:`~escpos.exceptions.TextError`
        """
        txt = six.text_type(txt)
-        self._raw(self.magic.encode_text(txt=txt))
+        self.magic.write(txt)
    def block_text(self, txt, font=None, columns=None):
        """ Text is printed wrapped to specified columns
--- a/src/escpos/magicencode.py
+++ b/src/escpos/magicencode.py
@ -17,8 +17,9 @@ from __future__ import division
 from __future__ import print_function
 from __future__ import unicode_literals
-from .constants import CHARCODE
+from .constants import CODEPAGE_CHANGE
 from .exceptions import CharCodeError, Error
 from .capabilities import get_profile
 import copy
 import six
@ -27,153 +28,230 @@ try:
 except ImportError:
    jcconv = None
 def encode_katakana(text):
    """I don't think this quite works yet."""
    encoded = []
    for char in text:
        if jcconv:
            # try to convert japanese text to half-katakanas
            char = jcconv.kata2half(jcconv.hira2kata(char))
            # TODO: "the conversion may result in multiple characters"
            # When? What should we do about it?
        if char in TXT_ENC_KATAKANA_MAP:
            encoded.append(TXT_ENC_KATAKANA_MAP[char])
        else:
            encoded.append(char)
    print(encoded)
    return b"".join(encoded)
 # TODO: When the capabilities.yml format is finished, this should be
 # in the profile itself.
 def get_encodings_from_profile(profile):
    mapping = {k: v.lower() for k, v in profile.codePageMap.items()}
    if hasattr(profile, 'codePages'):
        code_pages = [n.lower() for n in profile.codePages]
        return {k: v for k, v in mapping.items() if v in code_pages}
    else:
        return mapping
 class CodePages:
    def get_all(self):
        return get_encodings_from_profile(get_profile()).values()
    def encode(self, text, encoding, errors='strict'):
        # Python has not have this builtin?
        if encoding.upper() == 'KATAKANA':
            return encode_katakana(text)
        return text.encode(encoding, errors=errors)
    def get_encoding(self, encoding):
        # resolve the encoding alias
        return encoding.lower()
 code_pages = CodePages()
 class Encoder(object):
    """Takes a list of available code spaces. Picks the right one for a
    given character.
    Note: To determine the codespace, it needs to do the conversion, and
    thus already knows what the final byte in the target encoding would
    be. Nevertheless, the API of this class doesn't return the byte.
    The caller use to do the character conversion itself.
        $ python -m timeit -s "{u'ö':'a'}.get(u'ö')"
        100000000 loops, best of 3: 0.0133 usec per loop
        $ python -m timeit -s "u'ö'.encode('latin1')"
        100000000 loops, best of 3: 0.0141 usec per loop
    """
    def __init__(self, codepages):
        self.codepages = codepages
        self.reverse = {v:k for k, v in codepages.items()}
        self.available_encodings = set(codepages.values())
        self.used_encodings = set()
    def get_sequence(self, encoding):
        return self.reverse[encoding]
    def get_encoding(self, encoding):
        """resolve aliases
        check that the profile allows this encoding
        """
        encoding = code_pages.get_encoding(encoding)
        if not encoding in self.available_encodings:
            raise ValueError('This encoding cannot be used for the current profile')
        return encoding
    def get_encodings(self):
        """
        - remove the ones not supported
        - order by used first, then others
        - do not use a cache, because encode already is so fast
        """
        return self.available_encodings
    def can_encode(self, encoding, char):
        try:
            encoded = code_pages.encode(char, encoding)
            assert type(encoded) is bytes
            return encoded
        except LookupError:
            # We don't have this encoding
            return False
        except UnicodeEncodeError:
            return False
        return True
    def find_suitable_codespace(self, char):
        """The order of our search is a specific one:
        1. code pages that we already tried before; there is a good
           chance they might work again, reducing the search space,
           and by re-using already used encodings we might also
           reduce the number of codepage change instructiosn we have
           to send. Still, any performance gains will presumably be
           fairly minor.
        2. code pages in lower ESCPOS slots first. Presumably, they
           are more likely to be supported, so if a printer profile
           is missing or incomplete, we might increase our change
           that the code page we pick for this character is actually
           supported.
        # XXX actually do speed up the search
        """
        for encoding in self.get_encodings():
            if self.can_encode(encoding, char):
                # This encoding worked; at it to the set of used ones.
                self.used_encodings.add(encoding)
                return encoding
 class MagicEncode(object):
    """ Magic Encode Class
    It tries to automatically encode utf-8 input into the right coding. When encoding is impossible a configurable
    symbol will be inserted.
    """
    def __init__(self, startencoding='PC437', force_encoding=False, defaultsymbol=b'', defaultencoding='PC437'):
        # running these functions makes sure that the encoding is suitable
        MagicEncode.codepage_name(startencoding)
        MagicEncode.codepage_name(defaultencoding)
-        self.encoding = startencoding
+    encoding: If you know the current encoding of the printer when
    initializing this class, set it here. If the current encoding is
    unknown, the first character emitted will be a codepage switch.
    """
    def __init__(self, driver, encoding=None, disabled=False,
                 defaultsymbol='?', encoder=None):
        if disabled and not encoding:
            raise Error('If you disable magic encode, you need to define an encoding!')
        self.driver = driver
        self.encoder = encoder or Encoder(get_encodings_from_profile(driver.profile))
        self.encoding = self.encoder.get_encoding(encoding) if encoding else None
        self.defaultsymbol = defaultsymbol
-        if type(self.defaultsymbol) is not six.binary_type:
+        self.disabled = disabled
            raise Error("The supplied symbol {sym} has to be a binary string".format(sym=defaultsymbol))
        self.defaultencoding = defaultencoding
        self.force_encoding = force_encoding
-    def set_encoding(self, encoding='PC437', force_encoding=False):
+    def force_encoding(self, encoding):
-        """sets an encoding (normally not used)
+        """Sets a fixed encoding. The change is emitted right away.
-        This function should normally not be used since it manipulates the automagic behaviour. However, if you want to
+        From now one, this buffer will switch the code page anymore.
-        force a certain codepage, then you can use this function.
+        However, it will still keep track of the current code page.
        :param encoding: must be a valid encoding from CHARCODE
        :param force_encoding: whether the encoding should not be changed automatically
        """
-        self.codepage_name(encoding)
+        if not encoding:
-        self.encoding = encoding
+            self.disabled = False
-        self.force_encoding = force_encoding
+        else:
            self.write_with_encoding(encoding, None)
            self.disabled = True
-    @staticmethod
+    def write(self, text):
-    def codepage_sequence(codepage):
+        """Write the text, automatically switching encodings.
        """returns the corresponding codepage-sequence"""
        try:
            return CHARCODE[codepage][0]
        except KeyError:
            raise CharCodeError("The encoding {enc} is unknown.".format(enc=codepage))
    @staticmethod
    def codepage_name(codepage):
        """returns the corresponding codepage-name (for python)"""
        try:
            name = CHARCODE[codepage][1]
            if name == '':
                raise CharCodeError("The codepage {enc} does not have a connected python-codepage".format(enc=codepage))
            return name
        except KeyError:
            raise CharCodeError("The encoding {enc} is unknown.".format(enc=codepage))
    def encode_char(self, char):
        """
        Encodes a single unicode character into a sequence of
        esc-pos code page change instructions and character declarations
        """
        if type(char) is not six.text_type:
            raise Error("The supplied text has to be unicode, but is of type {type}.".format(
                type=type(char)
            ))
        encoded = b''
        encoding = self.encoding  # we reuse the last encoding to prevent code page switches at every character
        remaining = copy.copy(CHARCODE)
-        while True:  # Trying all encoding until one succeeds
+        if self.disabled:
-            try:
+            self.write_with_encoding(self.encoding, text)
                if encoding == 'KATAKANA':  # Japanese characters
                    if jcconv:
                        # try to convert japanese text to half-katakanas
                        kata = jcconv.kata2half(jcconv.hira2kata(char))
                        if kata != char:
                            self.extra_chars += len(kata) - 1
                            # the conversion may result in multiple characters
                            return self.encode_str(kata)
                    else:
                        kata = char
                    if kata in TXT_ENC_KATAKANA_MAP:
                        encoded = TXT_ENC_KATAKANA_MAP[kata]
                        break
                    else:
                        raise ValueError()
                else:
                    try:
                        enc_name = MagicEncode.codepage_name(encoding)
                        encoded = char.encode(enc_name)
                        assert type(encoded) is bytes
                    except LookupError:
                        raise ValueError("The encoding {enc} seems to not exist in Python".format(enc=encoding))
                    except CharCodeError:
                        raise ValueError("The encoding {enc} is not fully configured in constants".format(
                            enc=encoding
                        ))
                    break
            except ValueError:  # the encoding failed, select another one and retry
                if encoding in remaining:
                    del remaining[encoding]
                if len(remaining) >= 1:
                    encoding = list(remaining)[0]
                else:
                    encoding = self.defaultencoding
                    encoded = self.defaultsymbol  # could not encode, output error character
                    break
        if encoding != self.encoding:
            # if the encoding changed, remember it and prefix the character with
            # the esc-pos encoding change sequence
            self.encoding = encoding
            encoded = CHARCODE[encoding][0] + encoded
        return encoded
    def encode_str(self, txt):
        # make sure the right codepage is set in the printer
        buffer = self.codepage_sequence(self.encoding)
        if self.force_encoding:
            buffer += txt.encode(self.codepage_name(self.encoding))
        else:
            for c in txt:
                buffer += self.encode_char(c)
        return buffer
    def encode_text(self, txt):
        """returns a byte-string with encoded text
        :param txt: text that shall be encoded
        :return: byte-string for the printer
        """
        if not txt:
            return
-        self.extra_chars = 0
+        # TODO: Currently this very simple loop means we send every
        # character individually to the printer. We can probably
        # improve performace by searching the text for the first
        # character that cannot be rendered using the current code
        # page, and then sending all of those characters at once.
        # Or, should a lower-level buffer be responsible for that?
-        txt = self.encode_str(txt)
+        for char in text:
            # See if the current code page works for this character.
            # The encoder object will use a cache to be able to answer
            # this question fairly easily.
            if self.encoding and self.encoder.can_encode(self.encoding, char):
                self.write_with_encoding(self.encoding, char)
                continue
-        # if the utf-8 -> codepage conversion inserted extra characters,
+            # We have to find another way to print this character.
-        # remove double spaces to try to restore the original string length
+            # See if any of the code pages that the printer profile supports
-        # and prevent printing alignment issues
+            # can encode this character.
-        while self.extra_chars > 0:
+            codespace = self.encoder.find_suitable_codespace(char)
-            dspace = txt.find('  ')
+            if not codespace:
-            if dspace > 0:
+                self._handle_character_failed(char)
-                txt = txt[:dspace] + txt[dspace+1:]
+                continue
                self.extra_chars -= 1
            else:
                break
-        return txt
+            self.write_with_encoding(codespace, char)
    def _handle_character_failed(self, char):
        """Called when no codepage was found to render a character.
        """
        # Writing the default symbol via write() allows us to avoid
        # unnecesary codepage switches.
        self.write(self.defaultsymbol)
    def write_with_encoding(self, encoding, text):
        if text is not None and type(text) is not six.text_type:
            raise Error("The supplied text has to be unicode, but is of type {type}.".format(
                type=type(text)
            ))
        encoding = self.encoder.get_encoding(encoding)
        # We always know the current code page; if the new codepage
        # is different, emit a change command.
        if encoding != self.encoding:
            self.encoding = encoding
            self.driver._raw(b'{}{}'.format(
                CODEPAGE_CHANGE,
                six.int2byte(self.encoder.get_sequence(encoding))
            ))
        if text:
            self.driver._raw(code_pages.encode(text, encoding, errors="replace"))
 # todo emoticons mit charmap encoden
--- a/test/conftest.py
+++ b/test/conftest.py
@ -0,0 +1,7 @@
 import pytest
 from escpos.printer import Dummy
@pytest.fixture
 def driver():
    return Dummy()
--- a/test/test_magicencode.py
+++ b/test/test_magicencode.py
@ -13,103 +13,97 @@ from __future__ import division
 from __future__ import print_function
 from __future__ import unicode_literals
 import pytest
 from nose.tools import raises, assert_raises
 from hypothesis import given, example
 import hypothesis.strategies as st
-from escpos.magicencode import MagicEncode
+from escpos.magicencode import MagicEncode, Encoder, encode_katakana
 from escpos.exceptions import CharCodeError, Error
 from escpos.constants import CHARCODE
@raises(CharCodeError)
 def test_magic_encode_unkown_char_constant_as_startenc():
    """tests whether MagicEncode raises the proper Exception when an unknown charcode-name is passed as startencoding"""
    MagicEncode(startencoding="something")
@raises(CharCodeError)
 def test_magic_encode_unkown_char_constant_as_defaultenc():
    """tests whether MagicEncode raises the proper Exception when an unknown charcode-name is passed as defaultenc."""
    MagicEncode(defaultencoding="something")
-def test_magic_encode_wo_arguments():
+class TestEncoder:
    """tests whether MagicEncode works in the standard configuration"""
    MagicEncode()
-@raises(Error)
+    def test_can_encode(self):
-def test_magic_encode_w_non_binary_defaultsymbol():
+        assert not Encoder({1: 'cp437'}).can_encode('cp437', u'€')
-    """tests whether MagicEncode catches non-binary defaultsymbols"""
+        assert Encoder({1: 'cp437'}).can_encode('cp437', u'á')
-    MagicEncode(defaultsymbol="non-binary")
+        assert not Encoder({1: 'foobar'}).can_encode('foobar', 'a')
-@given(symbol=st.binary())
+    def test_find_suitable_encoding(self):
-def test_magic_encode_w_binary_defaultsymbol(symbol):
+        assert not Encoder({1: 'cp437'}).find_suitable_codespace(u'€')
-    """tests whether MagicEncode works with any binary symbol"""
+        assert Encoder({1: 'cp858'}).find_suitable_codespace(u'€') == 'cp858'
    MagicEncode(defaultsymbol=symbol)
    @raises(ValueError)
    def test_get_encoding(self):
        Encoder({}).get_encoding('latin1')
 class TestMagicEncode:
    class TestInit:
        def test_disabled_requires_encoding(self, driver):
            with pytest.raises(Error):
                MagicEncode(driver, disabled=True)
    class TestWriteWithEncoding:
        def test_init_from_none(self, driver):
            encode = MagicEncode(driver, encoding=None)
            encode.write_with_encoding('cp858', '€ ist teuro.')
            assert driver.output == b'\x1bt\xd5 ist teuro.'
        def test_change_from_another(self, driver):
            encode = MagicEncode(driver, encoding='cp437')
            encode.write_with_encoding('cp858', '€ ist teuro.')
            assert driver.output == b'\x1bt\xd5 ist teuro.'
        def test_no_change(self, driver):
            encode = MagicEncode(driver, encoding='cp858')
            encode.write_with_encoding('cp858', '€ ist teuro.')
            assert driver.output == b'\xd5 ist teuro.'
    class TestWrite:
        def test_write(self, driver):
            encode = MagicEncode(driver)
            encode.write('€ ist teuro.')
            assert driver.output == b'\x1bt\xa4 ist teuro.'
        def test_write_disabled(self, driver):
            encode = MagicEncode(driver, encoding='cp437', disabled=True)
            encode.write('€ ist teuro.')
            assert driver.output == b'? ist teuro.'
        def test_write_no_codepage(self, driver):
            encode = MagicEncode(
                driver, defaultsymbol="_", encoder=Encoder({1: 'cp437'}),
                encoding='cp437')
            encode.write(u'€ ist teuro.')
            assert driver.output == b'_ ist teuro.'
    class TestForceEncoding:
        def test(self, driver):
            encode = MagicEncode(driver)
            encode.force_encoding('cp437')
            assert driver.output == b'\x1bt'
            encode.write('€ ist teuro.')
            assert driver.output == b'\x1bt? ist teuro.'
 class TestKatakana:
    @given(st.text())
    @example("カタカナ")
    @example("あいうえお")
    @example("ﾊﾝｶｸｶﾀｶﾅ")
-def test_magic_encode_encode_text_unicode_string(text):
+    def test_accept(self, text):
-    """tests whether MagicEncode can accept a unicode string"""
+        encode_katakana(text)
    me = MagicEncode()
    me.encode_text(text)
-@given(char=st.characters())
+    def test_result(self):
-def test_magic_encode_encode_char(char):
+        assert encode_katakana('カタカナ') == b'\xb6\xc0\xb6\xc5'
-    """tests the encode_char-method of MagicEncode"""
+        assert encode_katakana("あいうえお") == b'\xb1\xb2\xb3\xb4\xb5'
    me = MagicEncode()
    me.encode_char(char)
@raises(Error)
@given(char=st.binary())
 def test_magic_encode_encode_char_binary(char):
    """tests the encode_char-method of MagicEncode with binary input"""
    me = MagicEncode()
    me.encode_char(char)
 def test_magic_encode_string_with_katakana_and_hiragana():
    """tests the encode_string-method with katakana and hiragana"""
    me = MagicEncode()
    me.encode_str("カタカナ")
    me.encode_str("あいうえお")
@raises(CharCodeError)
 def test_magic_encode_codepage_sequence_unknown_key():
    """tests whether MagicEncode.codepage_sequence raises the proper Exception with unknown charcode-names"""
    MagicEncode.codepage_sequence("something")
@raises(CharCodeError)
 def test_magic_encode_codepage_name_unknown_key():
    """tests whether MagicEncode.codepage_name raises the proper Exception with unknown charcode-names"""
    MagicEncode.codepage_name("something")
 def test_magic_encode_constants_getter():
    """tests whether the constants are properly fetched"""
    for key in CHARCODE:
        name = CHARCODE[key][1]
        if name == '':
            assert_raises(CharCodeError, MagicEncode.codepage_name, key)
        else:
            assert name == MagicEncode.codepage_name(key)
        assert MagicEncode.codepage_sequence(key) == CHARCODE[key][0]
@given(st.text())
 def test_magic_encode_force_encoding(text):
    """test whether force_encoding works as expected"""
    me = MagicEncode()
    assert me.force_encoding is False
    me.set_encoding(encoding='PC850', force_encoding=True)
    assert me.encoding == 'PC850'
    assert me.force_encoding is True
    try:
        me.encode_text(text)
    except UnicodeEncodeError:
        # we discard these errors as they are to be expected
        # what we want to check here is, whether encoding or codepage will switch through some of the magic code
        # being called accidentally
        pass
    assert me.encoding == 'PC850'
    assert me.force_encoding is True
 # TODO Idee für unittest: hypothesis-strings erzeugen, in encode_text werfen