diff --git a/src/escpos/capabilities.yml b/src/escpos/capabilities.yml index e105687..5849218 100644 --- a/src/escpos/capabilities.yml +++ b/src/escpos/capabilities.yml @@ -80,8 +80,8 @@ default: #24: // Thai Character Code 16 #25: // Thai Character Code 17 #26: // Thai Character Code 18 - 30: 'TCVN-3-1', # TCVN-3: Vietnamese - 31: 'TCVN-3-2', # TCVN-3: Vietnamese + 30: 'TCVN-3-1' # TCVN-3: Vietnamese + 31: 'TCVN-3-2' # TCVN-3: Vietnamese 32: "CP720" 33: "CP775" 34: "CP855" @@ -152,13 +152,13 @@ epson: a: 42 b: 56 codePages: - - PC437 # 0 + - cp437 # 0 - Katakana # 1 - - PC850 # 2 - - PC860 # 3 - - PC863 # 4 - - PC865 # 5 - - PC858 # 19 + - cp850 # 2 + - cp860 # 3 + - cp863 # 4 + - cp865 # 5 + - cp858 # 19 - blank # http://support.epostraders.co.uk/support-files/documents/3/l7O-TM-T88II_TechnicalRefGuide.pdf @@ -168,16 +168,16 @@ epson: a: 42 b: 56 codePages: - - PC437 # 0 + - CP437 # 0 - Katakana # 1 - - PC850 # 2 - - PC860 # 3 - - PC863 # 4 - - PC865 # 5 - - WPC1252 # 16 - - PC866 # 17 - - PC852 # 18 - - PC858 # 19 + - CP850 # 2 + - CP860 # 3 + - CP863 # 4 + - CP865 # 5 + - PC1252 # 16 + - CP866 # 17 + - CP852 # 18 + - CP858 # 19 - blank diff --git a/src/escpos/constants.py b/src/escpos/constants.py index b9625c1..aa7857b 100644 --- a/src/escpos/constants.py +++ b/src/escpos/constants.py @@ -101,99 +101,101 @@ TXT_ALIGN_RT = ESC + b'\x61\x02' # Right justification TXT_INVERT_ON = GS + b'\x42\x01' # Inverse Printing ON TXT_INVERT_OFF = GS + b'\x42\x00' # Inverse Printing OFF + +CODEPAGE_CHANGE = ESC + b'\x74' # Char code table -CHARCODE = { - 'PC437': - [ESC + b'\x74\x00', 'cp437'], # PC437 USA - 'KATAKANA': - [ESC + b'\x74\x01', ''], # KATAKANA (JAPAN) - 'PC850': - [ESC + b'\x74\x02', 'cp850'], # PC850 Multilingual - 'PC860': - [ESC + b'\x74\x03', 'cp860'], # PC860 Portuguese - 'PC863': - [ESC + b'\x74\x04', 'cp863'], # PC863 Canadian-French - 'PC865': - [ESC + b'\x74\x05', 'cp865'], # PC865 Nordic - 'KANJI6': - [ESC + b'\x74\x06', ''], # One-pass Kanji, Hiragana - 'KANJI7': - [ESC + b'\x74\x07', ''], # One-pass Kanji - 'KANJI8': - [ESC + b'\x74\x08', ''], # One-pass Kanji - 'PC851': - [ESC + b'\x74\x0b', 'cp851'], # PC851 Greek - 'PC853': - [ESC + b'\x74\x0c', 'cp853'], # PC853 Turkish - 'PC857': - [ESC + b'\x74\x0d', 'cp857'], # PC857 Turkish - 'PC737': - [ESC + b'\x74\x0e', 'cp737'], # PC737 Greek - '8859_7': - [ESC + b'\x74\x0f', 'iso8859_7'], # ISO8859-7 Greek - 'WPC1252': - [ESC + b'\x74\x10', 'cp1252'], # WPC1252 - 'PC866': - [ESC + b'\x74\x11', 'cp866'], # PC866 Cyrillic #2 - 'PC852': - [ESC + b'\x74\x12', 'cp852'], # PC852 Latin2 - 'PC858': - [ESC + b'\x74\x13', 'cp858'], # PC858 Euro - 'KU42': - [ESC + b'\x74\x14', ''], # KU42 Thai - 'TIS11': - [ESC + b'\x74\x15', ''], # TIS11 Thai - 'TIS18': - [ESC + b'\x74\x1a', ''], # TIS18 Thai - 'TCVN3': - [ESC + b'\x74\x1e', ''], # TCVN3 Vietnamese - 'TCVN3B': - [ESC + b'\x74\x1f', ''], # TCVN3 Vietnamese - 'PC720': - [ESC + b'\x74\x20', 'cp720'], # PC720 Arabic - 'WPC775': - [ESC + b'\x74\x21', ''], # WPC775 Baltic Rim - 'PC855': - [ESC + b'\x74\x22', 'cp855'], # PC855 Cyrillic - 'PC861': - [ESC + b'\x74\x23', 'cp861'], # PC861 Icelandic - 'PC862': - [ESC + b'\x74\x24', 'cp862'], # PC862 Hebrew - 'PC864': - [ESC + b'\x74\x25', 'cp864'], # PC864 Arabic - 'PC869': - [ESC + b'\x74\x26', 'cp869'], # PC869 Greek - '8859_2': - [ESC + b'\x74\x27', 'iso8859_2'], # ISO8859-2 Latin2 - '8859_9': - [ESC + b'\x74\x28', 'iso8859_9'], # ISO8859-2 Latin9 - 'PC1098': - [ESC + b'\x74\x29', 'cp1098'], # PC1098 Farsi - 'PC1118': - [ESC + b'\x74\x2a', 'cp1118'], # PC1118 Lithuanian - 'PC1119': - [ESC + b'\x74\x2b', 'cp1119'], # PC1119 Lithuanian - 'PC1125': - [ESC + b'\x74\x2c', 'cp1125'], # PC1125 Ukrainian - 'WPC1250': - [ESC + b'\x74\x2d', 'cp1250'], # WPC1250 Latin2 - 'WPC1251': - [ESC + b'\x74\x2e', 'cp1251'], # WPC1251 Cyrillic - 'WPC1253': - [ESC + b'\x74\x2f', 'cp1253'], # WPC1253 Greek - 'WPC1254': - [ESC + b'\x74\x30', 'cp1254'], # WPC1254 Turkish - 'WPC1255': - [ESC + b'\x74\x31', 'cp1255'], # WPC1255 Hebrew - 'WPC1256': - [ESC + b'\x74\x32', 'cp1256'], # WPC1256 Arabic - 'WPC1257': - [ESC + b'\x74\x33', 'cp1257'], # WPC1257 Baltic Rim - 'WPC1258': - [ESC + b'\x74\x34', 'cp1258'], # WPC1258 Vietnamese - 'KZ1048': - [ESC + b'\x74\x35', 'kz1048'], # KZ-1048 Kazakhstan -} +# CHARCODE = { +# 'PC437': +# [ESC + b'\x74\x00', 'cp437'], # PC437 USA +# 'KATAKANA': +# [ESC + b'\x74\x01', ''], # KATAKANA (JAPAN) +# 'PC850': +# [ESC + b'\x74\x02', 'cp850'], # PC850 Multilingual +# 'PC860': +# [ESC + b'\x74\x03', 'cp860'], # PC860 Portuguese +# 'PC863': +# [ESC + b'\x74\x04', 'cp863'], # PC863 Canadian-French +# 'PC865': +# [ESC + b'\x74\x05', 'cp865'], # PC865 Nordic +# 'KANJI6': +# [ESC + b'\x74\x06', ''], # One-pass Kanji, Hiragana +# 'KANJI7': +# [ESC + b'\x74\x07', ''], # One-pass Kanji +# 'KANJI8': +# [ESC + b'\x74\x08', ''], # One-pass Kanji +# 'PC851': +# [ESC + b'\x74\x0b', 'cp851'], # PC851 Greek +# 'PC853': +# [ESC + b'\x74\x0c', 'cp853'], # PC853 Turkish +# 'PC857': +# [ESC + b'\x74\x0d', 'cp857'], # PC857 Turkish +# 'PC737': +# [ESC + b'\x74\x0e', 'cp737'], # PC737 Greek +# '8859_7': +# [ESC + b'\x74\x0f', 'iso8859_7'], # ISO8859-7 Greek +# 'WPC1252': +# [ESC + b'\x74\x10', 'cp1252'], # WPC1252 +# 'PC866': +# [ESC + b'\x74\x11', 'cp866'], # PC866 Cyrillic #2 +# 'PC852': +# [ESC + b'\x74\x12', 'cp852'], # PC852 Latin2 +# 'PC858': +# [ESC + b'\x74\x13', 'cp858'], # PC858 Euro +# 'KU42': +# [ESC + b'\x74\x14', ''], # KU42 Thai +# 'TIS11': +# [ESC + b'\x74\x15', ''], # TIS11 Thai +# 'TIS18': +# [ESC + b'\x74\x1a', ''], # TIS18 Thai +# 'TCVN3': +# [ESC + b'\x74\x1e', ''], # TCVN3 Vietnamese +# 'TCVN3B': +# [ESC + b'\x74\x1f', ''], # TCVN3 Vietnamese +# 'PC720': +# [ESC + b'\x74\x20', 'cp720'], # PC720 Arabic +# 'WPC775': +# [ESC + b'\x74\x21', ''], # WPC775 Baltic Rim +# 'PC855': +# [ESC + b'\x74\x22', 'cp855'], # PC855 Cyrillic +# 'PC861': +# [ESC + b'\x74\x23', 'cp861'], # PC861 Icelandic +# 'PC862': +# [ESC + b'\x74\x24', 'cp862'], # PC862 Hebrew +# 'PC864': +# [ESC + b'\x74\x25', 'cp864'], # PC864 Arabic +# 'PC869': +# [ESC + b'\x74\x26', 'cp869'], # PC869 Greek +# '8859_2': +# [ESC + b'\x74\x27', 'iso8859_2'], # ISO8859-2 Latin2 +# '8859_9': +# [ESC + b'\x74\x28', 'iso8859_9'], # ISO8859-2 Latin9 +# 'PC1098': +# [ESC + b'\x74\x29', 'cp1098'], # PC1098 Farsi +# 'PC1118': +# [ESC + b'\x74\x2a', 'cp1118'], # PC1118 Lithuanian +# 'PC1119': +# [ESC + b'\x74\x2b', 'cp1119'], # PC1119 Lithuanian +# 'PC1125': +# [ESC + b'\x74\x2c', 'cp1125'], # PC1125 Ukrainian +# 'WPC1250': +# [ESC + b'\x74\x2d', 'cp1250'], # WPC1250 Latin2 +# 'WPC1251': +# [ESC + b'\x74\x2e', 'cp1251'], # WPC1251 Cyrillic +# 'WPC1253': +# [ESC + b'\x74\x2f', 'cp1253'], # WPC1253 Greek +# 'WPC1254': +# [ESC + b'\x74\x30', 'cp1254'], # WPC1254 Turkish +# 'WPC1255': +# [ESC + b'\x74\x31', 'cp1255'], # WPC1255 Hebrew +# 'WPC1256': +# [ESC + b'\x74\x32', 'cp1256'], # WPC1256 Arabic +# 'WPC1257': +# [ESC + b'\x74\x33', 'cp1257'], # WPC1257 Baltic Rim +# 'WPC1258': +# [ESC + b'\x74\x34', 'cp1258'], # WPC1258 Vietnamese +# 'KZ1048': +# [ESC + b'\x74\x35', 'kz1048'], # KZ-1048 Kazakhstan +# } # Barcode format _SET_BARCODE_TXT_POS = lambda n: GS + b'H' + n diff --git a/src/escpos/escpos.py b/src/escpos/escpos.py index a217db7..2dc17e0 100644 --- a/src/escpos/escpos.py +++ b/src/escpos/escpos.py @@ -36,12 +36,12 @@ class Escpos(object): """ device = None - def __init__(self, profile=None, **kwargs): + def __init__(self, profile=None, magic_encode_args=None, **kwargs): """ Initialize ESCPOS Printer :param profile: Printer profile""" self.profile = get_profile(profile) - self.magic = MagicEncode(**kwargs) + self.magic = MagicEncode(self, **(magic_encode_args or {})) def __del__(self): """ call self.close upon deletion """ @@ -228,11 +228,9 @@ class Escpos(object): :raises: :py:exc:`~escpos.exceptions.CharCodeError` """ if code.upper() == "AUTO": - self.magic.force_encoding = False + self.magic.force_encoding(False) else: - self.magic.codepage_sequence(code) - self.magic.encoding = code - self.magic.force_encoding = True + self.magic.force_encoding(code) def barcode(self, code, bc, height=64, width=3, pos="BELOW", font="A", align_ct=True, function_type="A"): """ Print Barcode @@ -373,7 +371,7 @@ class Escpos(object): :raises: :py:exc:`~escpos.exceptions.TextError` """ txt = six.text_type(txt) - self._raw(self.magic.encode_text(txt=txt)) + self.magic.write(txt) def block_text(self, txt, font=None, columns=None): """ Text is printed wrapped to specified columns diff --git a/src/escpos/magicencode.py b/src/escpos/magicencode.py index 1091b31..ed8a4bc 100644 --- a/src/escpos/magicencode.py +++ b/src/escpos/magicencode.py @@ -17,8 +17,9 @@ from __future__ import division from __future__ import print_function from __future__ import unicode_literals -from .constants import CHARCODE +from .constants import CODEPAGE_CHANGE from .exceptions import CharCodeError, Error +from .capabilities import get_profile import copy import six @@ -27,153 +28,230 @@ try: except ImportError: jcconv = None + +def encode_katakana(text): + """I don't think this quite works yet.""" + encoded = [] + for char in text: + if jcconv: + # try to convert japanese text to half-katakanas + char = jcconv.kata2half(jcconv.hira2kata(char)) + # TODO: "the conversion may result in multiple characters" + # When? What should we do about it? + + if char in TXT_ENC_KATAKANA_MAP: + encoded.append(TXT_ENC_KATAKANA_MAP[char]) + else: + encoded.append(char) + print(encoded) + return b"".join(encoded) + + + +# TODO: When the capabilities.yml format is finished, this should be +# in the profile itself. +def get_encodings_from_profile(profile): + mapping = {k: v.lower() for k, v in profile.codePageMap.items()} + if hasattr(profile, 'codePages'): + code_pages = [n.lower() for n in profile.codePages] + return {k: v for k, v in mapping.items() if v in code_pages} + else: + return mapping + + +class CodePages: + def get_all(self): + return get_encodings_from_profile(get_profile()).values() + + def encode(self, text, encoding, errors='strict'): + # Python has not have this builtin? + if encoding.upper() == 'KATAKANA': + return encode_katakana(text) + + return text.encode(encoding, errors=errors) + + def get_encoding(self, encoding): + # resolve the encoding alias + return encoding.lower() + +code_pages = CodePages() + + +class Encoder(object): + """Takes a list of available code spaces. Picks the right one for a + given character. + + Note: To determine the codespace, it needs to do the conversion, and + thus already knows what the final byte in the target encoding would + be. Nevertheless, the API of this class doesn't return the byte. + + The caller use to do the character conversion itself. + + $ python -m timeit -s "{u'ö':'a'}.get(u'ö')" + 100000000 loops, best of 3: 0.0133 usec per loop + + $ python -m timeit -s "u'ö'.encode('latin1')" + 100000000 loops, best of 3: 0.0141 usec per loop + """ + + def __init__(self, codepages): + self.codepages = codepages + self.reverse = {v:k for k, v in codepages.items()} + self.available_encodings = set(codepages.values()) + self.used_encodings = set() + + def get_sequence(self, encoding): + return self.reverse[encoding] + + def get_encoding(self, encoding): + """resolve aliases + + check that the profile allows this encoding + """ + encoding = code_pages.get_encoding(encoding) + if not encoding in self.available_encodings: + raise ValueError('This encoding cannot be used for the current profile') + return encoding + + def get_encodings(self): + """ + - remove the ones not supported + - order by used first, then others + - do not use a cache, because encode already is so fast + """ + return self.available_encodings + + def can_encode(self, encoding, char): + try: + encoded = code_pages.encode(char, encoding) + assert type(encoded) is bytes + return encoded + except LookupError: + # We don't have this encoding + return False + except UnicodeEncodeError: + return False + + return True + + def find_suitable_codespace(self, char): + """The order of our search is a specific one: + + 1. code pages that we already tried before; there is a good + chance they might work again, reducing the search space, + and by re-using already used encodings we might also + reduce the number of codepage change instructiosn we have + to send. Still, any performance gains will presumably be + fairly minor. + + 2. code pages in lower ESCPOS slots first. Presumably, they + are more likely to be supported, so if a printer profile + is missing or incomplete, we might increase our change + that the code page we pick for this character is actually + supported. + + # XXX actually do speed up the search + """ + for encoding in self.get_encodings(): + if self.can_encode(encoding, char): + # This encoding worked; at it to the set of used ones. + self.used_encodings.add(encoding) + return encoding + + class MagicEncode(object): """ Magic Encode Class It tries to automatically encode utf-8 input into the right coding. When encoding is impossible a configurable symbol will be inserted. + + encoding: If you know the current encoding of the printer when + initializing this class, set it here. If the current encoding is + unknown, the first character emitted will be a codepage switch. """ - def __init__(self, startencoding='PC437', force_encoding=False, defaultsymbol=b'', defaultencoding='PC437'): - # running these functions makes sure that the encoding is suitable - MagicEncode.codepage_name(startencoding) - MagicEncode.codepage_name(defaultencoding) + def __init__(self, driver, encoding=None, disabled=False, + defaultsymbol='?', encoder=None): + if disabled and not encoding: + raise Error('If you disable magic encode, you need to define an encoding!') - self.encoding = startencoding + self.driver = driver + self.encoder = encoder or Encoder(get_encodings_from_profile(driver.profile)) + + self.encoding = self.encoder.get_encoding(encoding) if encoding else None self.defaultsymbol = defaultsymbol - if type(self.defaultsymbol) is not six.binary_type: - raise Error("The supplied symbol {sym} has to be a binary string".format(sym=defaultsymbol)) - self.defaultencoding = defaultencoding - self.force_encoding = force_encoding + self.disabled = disabled - def set_encoding(self, encoding='PC437', force_encoding=False): - """sets an encoding (normally not used) + def force_encoding(self, encoding): + """Sets a fixed encoding. The change is emitted right away. - This function should normally not be used since it manipulates the automagic behaviour. However, if you want to - force a certain codepage, then you can use this function. - - :param encoding: must be a valid encoding from CHARCODE - :param force_encoding: whether the encoding should not be changed automatically + From now one, this buffer will switch the code page anymore. + However, it will still keep track of the current code page. """ - self.codepage_name(encoding) - self.encoding = encoding - self.force_encoding = force_encoding - - @staticmethod - def codepage_sequence(codepage): - """returns the corresponding codepage-sequence""" - try: - return CHARCODE[codepage][0] - except KeyError: - raise CharCodeError("The encoding {enc} is unknown.".format(enc=codepage)) - - @staticmethod - def codepage_name(codepage): - """returns the corresponding codepage-name (for python)""" - try: - name = CHARCODE[codepage][1] - if name == '': - raise CharCodeError("The codepage {enc} does not have a connected python-codepage".format(enc=codepage)) - return name - except KeyError: - raise CharCodeError("The encoding {enc} is unknown.".format(enc=codepage)) - - def encode_char(self, char): - """ - Encodes a single unicode character into a sequence of - esc-pos code page change instructions and character declarations - """ - if type(char) is not six.text_type: - raise Error("The supplied text has to be unicode, but is of type {type}.".format( - type=type(char) - )) - encoded = b'' - encoding = self.encoding # we reuse the last encoding to prevent code page switches at every character - remaining = copy.copy(CHARCODE) - - while True: # Trying all encoding until one succeeds - try: - if encoding == 'KATAKANA': # Japanese characters - if jcconv: - # try to convert japanese text to half-katakanas - kata = jcconv.kata2half(jcconv.hira2kata(char)) - if kata != char: - self.extra_chars += len(kata) - 1 - # the conversion may result in multiple characters - return self.encode_str(kata) - else: - kata = char - - if kata in TXT_ENC_KATAKANA_MAP: - encoded = TXT_ENC_KATAKANA_MAP[kata] - break - else: - raise ValueError() - else: - try: - enc_name = MagicEncode.codepage_name(encoding) - encoded = char.encode(enc_name) - assert type(encoded) is bytes - except LookupError: - raise ValueError("The encoding {enc} seems to not exist in Python".format(enc=encoding)) - except CharCodeError: - raise ValueError("The encoding {enc} is not fully configured in constants".format( - enc=encoding - )) - break - - except ValueError: # the encoding failed, select another one and retry - if encoding in remaining: - del remaining[encoding] - if len(remaining) >= 1: - encoding = list(remaining)[0] - else: - encoding = self.defaultencoding - encoded = self.defaultsymbol # could not encode, output error character - break - - if encoding != self.encoding: - # if the encoding changed, remember it and prefix the character with - # the esc-pos encoding change sequence - self.encoding = encoding - encoded = CHARCODE[encoding][0] + encoded - - return encoded - - def encode_str(self, txt): - # make sure the right codepage is set in the printer - buffer = self.codepage_sequence(self.encoding) - if self.force_encoding: - buffer += txt.encode(self.codepage_name(self.encoding)) + if not encoding: + self.disabled = False else: - for c in txt: - buffer += self.encode_char(c) - return buffer + self.write_with_encoding(encoding, None) + self.disabled = True - def encode_text(self, txt): - """returns a byte-string with encoded text - - :param txt: text that shall be encoded - :return: byte-string for the printer + def write(self, text): + """Write the text, automatically switching encodings. """ - if not txt: + + if self.disabled: + self.write_with_encoding(self.encoding, text) return - self.extra_chars = 0 + # TODO: Currently this very simple loop means we send every + # character individually to the printer. We can probably + # improve performace by searching the text for the first + # character that cannot be rendered using the current code + # page, and then sending all of those characters at once. + # Or, should a lower-level buffer be responsible for that? - txt = self.encode_str(txt) + for char in text: + # See if the current code page works for this character. + # The encoder object will use a cache to be able to answer + # this question fairly easily. + if self.encoding and self.encoder.can_encode(self.encoding, char): + self.write_with_encoding(self.encoding, char) + continue - # if the utf-8 -> codepage conversion inserted extra characters, - # remove double spaces to try to restore the original string length - # and prevent printing alignment issues - while self.extra_chars > 0: - dspace = txt.find(' ') - if dspace > 0: - txt = txt[:dspace] + txt[dspace+1:] - self.extra_chars -= 1 - else: - break + # We have to find another way to print this character. + # See if any of the code pages that the printer profile supports + # can encode this character. + codespace = self.encoder.find_suitable_codespace(char) + if not codespace: + self._handle_character_failed(char) + continue - return txt + self.write_with_encoding(codespace, char) + + def _handle_character_failed(self, char): + """Called when no codepage was found to render a character. + """ + # Writing the default symbol via write() allows us to avoid + # unnecesary codepage switches. + self.write(self.defaultsymbol) + + def write_with_encoding(self, encoding, text): + if text is not None and type(text) is not six.text_type: + raise Error("The supplied text has to be unicode, but is of type {type}.".format( + type=type(text) + )) + + encoding = self.encoder.get_encoding(encoding) + + # We always know the current code page; if the new codepage + # is different, emit a change command. + if encoding != self.encoding: + self.encoding = encoding + self.driver._raw(b'{}{}'.format( + CODEPAGE_CHANGE, + six.int2byte(self.encoder.get_sequence(encoding)) + )) + + if text: + self.driver._raw(code_pages.encode(text, encoding, errors="replace")) # todo emoticons mit charmap encoden diff --git a/test/conftest.py b/test/conftest.py new file mode 100644 index 0000000..2dad088 --- /dev/null +++ b/test/conftest.py @@ -0,0 +1,7 @@ +import pytest +from escpos.printer import Dummy + + +@pytest.fixture +def driver(): + return Dummy() diff --git a/test/test_magicencode.py b/test/test_magicencode.py index eb0f07b..24fb0db 100644 --- a/test/test_magicencode.py +++ b/test/test_magicencode.py @@ -13,103 +13,97 @@ from __future__ import division from __future__ import print_function from __future__ import unicode_literals +import pytest from nose.tools import raises, assert_raises from hypothesis import given, example import hypothesis.strategies as st -from escpos.magicencode import MagicEncode +from escpos.magicencode import MagicEncode, Encoder, encode_katakana from escpos.exceptions import CharCodeError, Error -from escpos.constants import CHARCODE - -@raises(CharCodeError) -def test_magic_encode_unkown_char_constant_as_startenc(): - """tests whether MagicEncode raises the proper Exception when an unknown charcode-name is passed as startencoding""" - MagicEncode(startencoding="something") - -@raises(CharCodeError) -def test_magic_encode_unkown_char_constant_as_defaultenc(): - """tests whether MagicEncode raises the proper Exception when an unknown charcode-name is passed as defaultenc.""" - MagicEncode(defaultencoding="something") - -def test_magic_encode_wo_arguments(): - """tests whether MagicEncode works in the standard configuration""" - MagicEncode() - -@raises(Error) -def test_magic_encode_w_non_binary_defaultsymbol(): - """tests whether MagicEncode catches non-binary defaultsymbols""" - MagicEncode(defaultsymbol="non-binary") - -@given(symbol=st.binary()) -def test_magic_encode_w_binary_defaultsymbol(symbol): - """tests whether MagicEncode works with any binary symbol""" - MagicEncode(defaultsymbol=symbol) - -@given(st.text()) -@example("カタカナ") -@example("あいうえお") -@example("ハンカクカタカナ") -def test_magic_encode_encode_text_unicode_string(text): - """tests whether MagicEncode can accept a unicode string""" - me = MagicEncode() - me.encode_text(text) - -@given(char=st.characters()) -def test_magic_encode_encode_char(char): - """tests the encode_char-method of MagicEncode""" - me = MagicEncode() - me.encode_char(char) - -@raises(Error) -@given(char=st.binary()) -def test_magic_encode_encode_char_binary(char): - """tests the encode_char-method of MagicEncode with binary input""" - me = MagicEncode() - me.encode_char(char) -def test_magic_encode_string_with_katakana_and_hiragana(): - """tests the encode_string-method with katakana and hiragana""" - me = MagicEncode() - me.encode_str("カタカナ") - me.encode_str("あいうえお") -@raises(CharCodeError) -def test_magic_encode_codepage_sequence_unknown_key(): - """tests whether MagicEncode.codepage_sequence raises the proper Exception with unknown charcode-names""" - MagicEncode.codepage_sequence("something") +class TestEncoder: -@raises(CharCodeError) -def test_magic_encode_codepage_name_unknown_key(): - """tests whether MagicEncode.codepage_name raises the proper Exception with unknown charcode-names""" - MagicEncode.codepage_name("something") + def test_can_encode(self): + assert not Encoder({1: 'cp437'}).can_encode('cp437', u'€') + assert Encoder({1: 'cp437'}).can_encode('cp437', u'á') + assert not Encoder({1: 'foobar'}).can_encode('foobar', 'a') -def test_magic_encode_constants_getter(): - """tests whether the constants are properly fetched""" - for key in CHARCODE: - name = CHARCODE[key][1] - if name == '': - assert_raises(CharCodeError, MagicEncode.codepage_name, key) - else: - assert name == MagicEncode.codepage_name(key) - assert MagicEncode.codepage_sequence(key) == CHARCODE[key][0] + def test_find_suitable_encoding(self): + assert not Encoder({1: 'cp437'}).find_suitable_codespace(u'€') + assert Encoder({1: 'cp858'}).find_suitable_codespace(u'€') == 'cp858' -@given(st.text()) -def test_magic_encode_force_encoding(text): - """test whether force_encoding works as expected""" - me = MagicEncode() - assert me.force_encoding is False - me.set_encoding(encoding='PC850', force_encoding=True) - assert me.encoding == 'PC850' - assert me.force_encoding is True - try: - me.encode_text(text) - except UnicodeEncodeError: - # we discard these errors as they are to be expected - # what we want to check here is, whether encoding or codepage will switch through some of the magic code - # being called accidentally - pass - assert me.encoding == 'PC850' - assert me.force_encoding is True + @raises(ValueError) + def test_get_encoding(self): + Encoder({}).get_encoding('latin1') + + +class TestMagicEncode: + + class TestInit: + + def test_disabled_requires_encoding(self, driver): + with pytest.raises(Error): + MagicEncode(driver, disabled=True) + + class TestWriteWithEncoding: + + def test_init_from_none(self, driver): + encode = MagicEncode(driver, encoding=None) + encode.write_with_encoding('cp858', '€ ist teuro.') + assert driver.output == b'\x1bt\xd5 ist teuro.' + + def test_change_from_another(self, driver): + encode = MagicEncode(driver, encoding='cp437') + encode.write_with_encoding('cp858', '€ ist teuro.') + assert driver.output == b'\x1bt\xd5 ist teuro.' + + def test_no_change(self, driver): + encode = MagicEncode(driver, encoding='cp858') + encode.write_with_encoding('cp858', '€ ist teuro.') + assert driver.output == b'\xd5 ist teuro.' + + class TestWrite: + + def test_write(self, driver): + encode = MagicEncode(driver) + encode.write('€ ist teuro.') + assert driver.output == b'\x1bt\xa4 ist teuro.' + + def test_write_disabled(self, driver): + encode = MagicEncode(driver, encoding='cp437', disabled=True) + encode.write('€ ist teuro.') + assert driver.output == b'? ist teuro.' + + def test_write_no_codepage(self, driver): + encode = MagicEncode( + driver, defaultsymbol="_", encoder=Encoder({1: 'cp437'}), + encoding='cp437') + encode.write(u'€ ist teuro.') + assert driver.output == b'_ ist teuro.' + + class TestForceEncoding: + + def test(self, driver): + encode = MagicEncode(driver) + encode.force_encoding('cp437') + assert driver.output == b'\x1bt' + + encode.write('€ ist teuro.') + assert driver.output == b'\x1bt? ist teuro.' + + +class TestKatakana: + @given(st.text()) + @example("カタカナ") + @example("あいうえお") + @example("ハンカクカタカナ") + def test_accept(self, text): + encode_katakana(text) + + def test_result(self): + assert encode_katakana('カタカナ') == b'\xb6\xc0\xb6\xc5' + assert encode_katakana("あいうえお") == b'\xb1\xb2\xb3\xb4\xb5' # TODO Idee für unittest: hypothesis-strings erzeugen, in encode_text werfen