Largely rewrite the magic text encoding feature.

This commit is contained in:
Michael Elsdörfer 2016-08-27 11:09:08 +02:00
parent f6ce7e45da
commit c7864fd785
6 changed files with 411 additions and 332 deletions

View File

@ -80,8 +80,8 @@ default:
#24: // Thai Character Code 16
#25: // Thai Character Code 17
#26: // Thai Character Code 18
30: 'TCVN-3-1', # TCVN-3: Vietnamese
31: 'TCVN-3-2', # TCVN-3: Vietnamese
30: 'TCVN-3-1' # TCVN-3: Vietnamese
31: 'TCVN-3-2' # TCVN-3: Vietnamese
32: "CP720"
33: "CP775"
34: "CP855"
@ -152,13 +152,13 @@ epson:
a: 42
b: 56
codePages:
- PC437 # 0
- cp437 # 0
- Katakana # 1
- PC850 # 2
- PC860 # 3
- PC863 # 4
- PC865 # 5
- PC858 # 19
- cp850 # 2
- cp860 # 3
- cp863 # 4
- cp865 # 5
- cp858 # 19
- blank
# http://support.epostraders.co.uk/support-files/documents/3/l7O-TM-T88II_TechnicalRefGuide.pdf
@ -168,16 +168,16 @@ epson:
a: 42
b: 56
codePages:
- PC437 # 0
- CP437 # 0
- Katakana # 1
- PC850 # 2
- PC860 # 3
- PC863 # 4
- PC865 # 5
- WPC1252 # 16
- PC866 # 17
- PC852 # 18
- PC858 # 19
- CP850 # 2
- CP860 # 3
- CP863 # 4
- CP865 # 5
- PC1252 # 16
- CP866 # 17
- CP852 # 18
- CP858 # 19
- blank

View File

@ -101,99 +101,101 @@ TXT_ALIGN_RT = ESC + b'\x61\x02' # Right justification
TXT_INVERT_ON = GS + b'\x42\x01' # Inverse Printing ON
TXT_INVERT_OFF = GS + b'\x42\x00' # Inverse Printing OFF
CODEPAGE_CHANGE = ESC + b'\x74'
# Char code table
CHARCODE = {
'PC437':
[ESC + b'\x74\x00', 'cp437'], # PC437 USA
'KATAKANA':
[ESC + b'\x74\x01', ''], # KATAKANA (JAPAN)
'PC850':
[ESC + b'\x74\x02', 'cp850'], # PC850 Multilingual
'PC860':
[ESC + b'\x74\x03', 'cp860'], # PC860 Portuguese
'PC863':
[ESC + b'\x74\x04', 'cp863'], # PC863 Canadian-French
'PC865':
[ESC + b'\x74\x05', 'cp865'], # PC865 Nordic
'KANJI6':
[ESC + b'\x74\x06', ''], # One-pass Kanji, Hiragana
'KANJI7':
[ESC + b'\x74\x07', ''], # One-pass Kanji
'KANJI8':
[ESC + b'\x74\x08', ''], # One-pass Kanji
'PC851':
[ESC + b'\x74\x0b', 'cp851'], # PC851 Greek
'PC853':
[ESC + b'\x74\x0c', 'cp853'], # PC853 Turkish
'PC857':
[ESC + b'\x74\x0d', 'cp857'], # PC857 Turkish
'PC737':
[ESC + b'\x74\x0e', 'cp737'], # PC737 Greek
'8859_7':
[ESC + b'\x74\x0f', 'iso8859_7'], # ISO8859-7 Greek
'WPC1252':
[ESC + b'\x74\x10', 'cp1252'], # WPC1252
'PC866':
[ESC + b'\x74\x11', 'cp866'], # PC866 Cyrillic #2
'PC852':
[ESC + b'\x74\x12', 'cp852'], # PC852 Latin2
'PC858':
[ESC + b'\x74\x13', 'cp858'], # PC858 Euro
'KU42':
[ESC + b'\x74\x14', ''], # KU42 Thai
'TIS11':
[ESC + b'\x74\x15', ''], # TIS11 Thai
'TIS18':
[ESC + b'\x74\x1a', ''], # TIS18 Thai
'TCVN3':
[ESC + b'\x74\x1e', ''], # TCVN3 Vietnamese
'TCVN3B':
[ESC + b'\x74\x1f', ''], # TCVN3 Vietnamese
'PC720':
[ESC + b'\x74\x20', 'cp720'], # PC720 Arabic
'WPC775':
[ESC + b'\x74\x21', ''], # WPC775 Baltic Rim
'PC855':
[ESC + b'\x74\x22', 'cp855'], # PC855 Cyrillic
'PC861':
[ESC + b'\x74\x23', 'cp861'], # PC861 Icelandic
'PC862':
[ESC + b'\x74\x24', 'cp862'], # PC862 Hebrew
'PC864':
[ESC + b'\x74\x25', 'cp864'], # PC864 Arabic
'PC869':
[ESC + b'\x74\x26', 'cp869'], # PC869 Greek
'8859_2':
[ESC + b'\x74\x27', 'iso8859_2'], # ISO8859-2 Latin2
'8859_9':
[ESC + b'\x74\x28', 'iso8859_9'], # ISO8859-2 Latin9
'PC1098':
[ESC + b'\x74\x29', 'cp1098'], # PC1098 Farsi
'PC1118':
[ESC + b'\x74\x2a', 'cp1118'], # PC1118 Lithuanian
'PC1119':
[ESC + b'\x74\x2b', 'cp1119'], # PC1119 Lithuanian
'PC1125':
[ESC + b'\x74\x2c', 'cp1125'], # PC1125 Ukrainian
'WPC1250':
[ESC + b'\x74\x2d', 'cp1250'], # WPC1250 Latin2
'WPC1251':
[ESC + b'\x74\x2e', 'cp1251'], # WPC1251 Cyrillic
'WPC1253':
[ESC + b'\x74\x2f', 'cp1253'], # WPC1253 Greek
'WPC1254':
[ESC + b'\x74\x30', 'cp1254'], # WPC1254 Turkish
'WPC1255':
[ESC + b'\x74\x31', 'cp1255'], # WPC1255 Hebrew
'WPC1256':
[ESC + b'\x74\x32', 'cp1256'], # WPC1256 Arabic
'WPC1257':
[ESC + b'\x74\x33', 'cp1257'], # WPC1257 Baltic Rim
'WPC1258':
[ESC + b'\x74\x34', 'cp1258'], # WPC1258 Vietnamese
'KZ1048':
[ESC + b'\x74\x35', 'kz1048'], # KZ-1048 Kazakhstan
}
# CHARCODE = {
# 'PC437':
# [ESC + b'\x74\x00', 'cp437'], # PC437 USA
# 'KATAKANA':
# [ESC + b'\x74\x01', ''], # KATAKANA (JAPAN)
# 'PC850':
# [ESC + b'\x74\x02', 'cp850'], # PC850 Multilingual
# 'PC860':
# [ESC + b'\x74\x03', 'cp860'], # PC860 Portuguese
# 'PC863':
# [ESC + b'\x74\x04', 'cp863'], # PC863 Canadian-French
# 'PC865':
# [ESC + b'\x74\x05', 'cp865'], # PC865 Nordic
# 'KANJI6':
# [ESC + b'\x74\x06', ''], # One-pass Kanji, Hiragana
# 'KANJI7':
# [ESC + b'\x74\x07', ''], # One-pass Kanji
# 'KANJI8':
# [ESC + b'\x74\x08', ''], # One-pass Kanji
# 'PC851':
# [ESC + b'\x74\x0b', 'cp851'], # PC851 Greek
# 'PC853':
# [ESC + b'\x74\x0c', 'cp853'], # PC853 Turkish
# 'PC857':
# [ESC + b'\x74\x0d', 'cp857'], # PC857 Turkish
# 'PC737':
# [ESC + b'\x74\x0e', 'cp737'], # PC737 Greek
# '8859_7':
# [ESC + b'\x74\x0f', 'iso8859_7'], # ISO8859-7 Greek
# 'WPC1252':
# [ESC + b'\x74\x10', 'cp1252'], # WPC1252
# 'PC866':
# [ESC + b'\x74\x11', 'cp866'], # PC866 Cyrillic #2
# 'PC852':
# [ESC + b'\x74\x12', 'cp852'], # PC852 Latin2
# 'PC858':
# [ESC + b'\x74\x13', 'cp858'], # PC858 Euro
# 'KU42':
# [ESC + b'\x74\x14', ''], # KU42 Thai
# 'TIS11':
# [ESC + b'\x74\x15', ''], # TIS11 Thai
# 'TIS18':
# [ESC + b'\x74\x1a', ''], # TIS18 Thai
# 'TCVN3':
# [ESC + b'\x74\x1e', ''], # TCVN3 Vietnamese
# 'TCVN3B':
# [ESC + b'\x74\x1f', ''], # TCVN3 Vietnamese
# 'PC720':
# [ESC + b'\x74\x20', 'cp720'], # PC720 Arabic
# 'WPC775':
# [ESC + b'\x74\x21', ''], # WPC775 Baltic Rim
# 'PC855':
# [ESC + b'\x74\x22', 'cp855'], # PC855 Cyrillic
# 'PC861':
# [ESC + b'\x74\x23', 'cp861'], # PC861 Icelandic
# 'PC862':
# [ESC + b'\x74\x24', 'cp862'], # PC862 Hebrew
# 'PC864':
# [ESC + b'\x74\x25', 'cp864'], # PC864 Arabic
# 'PC869':
# [ESC + b'\x74\x26', 'cp869'], # PC869 Greek
# '8859_2':
# [ESC + b'\x74\x27', 'iso8859_2'], # ISO8859-2 Latin2
# '8859_9':
# [ESC + b'\x74\x28', 'iso8859_9'], # ISO8859-2 Latin9
# 'PC1098':
# [ESC + b'\x74\x29', 'cp1098'], # PC1098 Farsi
# 'PC1118':
# [ESC + b'\x74\x2a', 'cp1118'], # PC1118 Lithuanian
# 'PC1119':
# [ESC + b'\x74\x2b', 'cp1119'], # PC1119 Lithuanian
# 'PC1125':
# [ESC + b'\x74\x2c', 'cp1125'], # PC1125 Ukrainian
# 'WPC1250':
# [ESC + b'\x74\x2d', 'cp1250'], # WPC1250 Latin2
# 'WPC1251':
# [ESC + b'\x74\x2e', 'cp1251'], # WPC1251 Cyrillic
# 'WPC1253':
# [ESC + b'\x74\x2f', 'cp1253'], # WPC1253 Greek
# 'WPC1254':
# [ESC + b'\x74\x30', 'cp1254'], # WPC1254 Turkish
# 'WPC1255':
# [ESC + b'\x74\x31', 'cp1255'], # WPC1255 Hebrew
# 'WPC1256':
# [ESC + b'\x74\x32', 'cp1256'], # WPC1256 Arabic
# 'WPC1257':
# [ESC + b'\x74\x33', 'cp1257'], # WPC1257 Baltic Rim
# 'WPC1258':
# [ESC + b'\x74\x34', 'cp1258'], # WPC1258 Vietnamese
# 'KZ1048':
# [ESC + b'\x74\x35', 'kz1048'], # KZ-1048 Kazakhstan
# }
# Barcode format
_SET_BARCODE_TXT_POS = lambda n: GS + b'H' + n

View File

@ -36,12 +36,12 @@ class Escpos(object):
"""
device = None
def __init__(self, profile=None, **kwargs):
def __init__(self, profile=None, magic_encode_args=None, **kwargs):
""" Initialize ESCPOS Printer
:param profile: Printer profile"""
self.profile = get_profile(profile)
self.magic = MagicEncode(**kwargs)
self.magic = MagicEncode(self, **(magic_encode_args or {}))
def __del__(self):
""" call self.close upon deletion """
@ -228,11 +228,9 @@ class Escpos(object):
:raises: :py:exc:`~escpos.exceptions.CharCodeError`
"""
if code.upper() == "AUTO":
self.magic.force_encoding = False
self.magic.force_encoding(False)
else:
self.magic.codepage_sequence(code)
self.magic.encoding = code
self.magic.force_encoding = True
self.magic.force_encoding(code)
def barcode(self, code, bc, height=64, width=3, pos="BELOW", font="A", align_ct=True, function_type="A"):
""" Print Barcode
@ -373,7 +371,7 @@ class Escpos(object):
:raises: :py:exc:`~escpos.exceptions.TextError`
"""
txt = six.text_type(txt)
self._raw(self.magic.encode_text(txt=txt))
self.magic.write(txt)
def block_text(self, txt, font=None, columns=None):
""" Text is printed wrapped to specified columns

View File

@ -17,8 +17,9 @@ from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from .constants import CHARCODE
from .constants import CODEPAGE_CHANGE
from .exceptions import CharCodeError, Error
from .capabilities import get_profile
import copy
import six
@ -27,153 +28,230 @@ try:
except ImportError:
jcconv = None
def encode_katakana(text):
"""I don't think this quite works yet."""
encoded = []
for char in text:
if jcconv:
# try to convert japanese text to half-katakanas
char = jcconv.kata2half(jcconv.hira2kata(char))
# TODO: "the conversion may result in multiple characters"
# When? What should we do about it?
if char in TXT_ENC_KATAKANA_MAP:
encoded.append(TXT_ENC_KATAKANA_MAP[char])
else:
encoded.append(char)
print(encoded)
return b"".join(encoded)
# TODO: When the capabilities.yml format is finished, this should be
# in the profile itself.
def get_encodings_from_profile(profile):
mapping = {k: v.lower() for k, v in profile.codePageMap.items()}
if hasattr(profile, 'codePages'):
code_pages = [n.lower() for n in profile.codePages]
return {k: v for k, v in mapping.items() if v in code_pages}
else:
return mapping
class CodePages:
def get_all(self):
return get_encodings_from_profile(get_profile()).values()
def encode(self, text, encoding, errors='strict'):
# Python has not have this builtin?
if encoding.upper() == 'KATAKANA':
return encode_katakana(text)
return text.encode(encoding, errors=errors)
def get_encoding(self, encoding):
# resolve the encoding alias
return encoding.lower()
code_pages = CodePages()
class Encoder(object):
"""Takes a list of available code spaces. Picks the right one for a
given character.
Note: To determine the codespace, it needs to do the conversion, and
thus already knows what the final byte in the target encoding would
be. Nevertheless, the API of this class doesn't return the byte.
The caller use to do the character conversion itself.
$ python -m timeit -s "{u'ö':'a'}.get(u'ö')"
100000000 loops, best of 3: 0.0133 usec per loop
$ python -m timeit -s "u'ö'.encode('latin1')"
100000000 loops, best of 3: 0.0141 usec per loop
"""
def __init__(self, codepages):
self.codepages = codepages
self.reverse = {v:k for k, v in codepages.items()}
self.available_encodings = set(codepages.values())
self.used_encodings = set()
def get_sequence(self, encoding):
return self.reverse[encoding]
def get_encoding(self, encoding):
"""resolve aliases
check that the profile allows this encoding
"""
encoding = code_pages.get_encoding(encoding)
if not encoding in self.available_encodings:
raise ValueError('This encoding cannot be used for the current profile')
return encoding
def get_encodings(self):
"""
- remove the ones not supported
- order by used first, then others
- do not use a cache, because encode already is so fast
"""
return self.available_encodings
def can_encode(self, encoding, char):
try:
encoded = code_pages.encode(char, encoding)
assert type(encoded) is bytes
return encoded
except LookupError:
# We don't have this encoding
return False
except UnicodeEncodeError:
return False
return True
def find_suitable_codespace(self, char):
"""The order of our search is a specific one:
1. code pages that we already tried before; there is a good
chance they might work again, reducing the search space,
and by re-using already used encodings we might also
reduce the number of codepage change instructiosn we have
to send. Still, any performance gains will presumably be
fairly minor.
2. code pages in lower ESCPOS slots first. Presumably, they
are more likely to be supported, so if a printer profile
is missing or incomplete, we might increase our change
that the code page we pick for this character is actually
supported.
# XXX actually do speed up the search
"""
for encoding in self.get_encodings():
if self.can_encode(encoding, char):
# This encoding worked; at it to the set of used ones.
self.used_encodings.add(encoding)
return encoding
class MagicEncode(object):
""" Magic Encode Class
It tries to automatically encode utf-8 input into the right coding. When encoding is impossible a configurable
symbol will be inserted.
"""
def __init__(self, startencoding='PC437', force_encoding=False, defaultsymbol=b'', defaultencoding='PC437'):
# running these functions makes sure that the encoding is suitable
MagicEncode.codepage_name(startencoding)
MagicEncode.codepage_name(defaultencoding)
self.encoding = startencoding
encoding: If you know the current encoding of the printer when
initializing this class, set it here. If the current encoding is
unknown, the first character emitted will be a codepage switch.
"""
def __init__(self, driver, encoding=None, disabled=False,
defaultsymbol='?', encoder=None):
if disabled and not encoding:
raise Error('If you disable magic encode, you need to define an encoding!')
self.driver = driver
self.encoder = encoder or Encoder(get_encodings_from_profile(driver.profile))
self.encoding = self.encoder.get_encoding(encoding) if encoding else None
self.defaultsymbol = defaultsymbol
if type(self.defaultsymbol) is not six.binary_type:
raise Error("The supplied symbol {sym} has to be a binary string".format(sym=defaultsymbol))
self.defaultencoding = defaultencoding
self.force_encoding = force_encoding
self.disabled = disabled
def set_encoding(self, encoding='PC437', force_encoding=False):
"""sets an encoding (normally not used)
def force_encoding(self, encoding):
"""Sets a fixed encoding. The change is emitted right away.
This function should normally not be used since it manipulates the automagic behaviour. However, if you want to
force a certain codepage, then you can use this function.
:param encoding: must be a valid encoding from CHARCODE
:param force_encoding: whether the encoding should not be changed automatically
From now one, this buffer will switch the code page anymore.
However, it will still keep track of the current code page.
"""
self.codepage_name(encoding)
self.encoding = encoding
self.force_encoding = force_encoding
if not encoding:
self.disabled = False
else:
self.write_with_encoding(encoding, None)
self.disabled = True
@staticmethod
def codepage_sequence(codepage):
"""returns the corresponding codepage-sequence"""
try:
return CHARCODE[codepage][0]
except KeyError:
raise CharCodeError("The encoding {enc} is unknown.".format(enc=codepage))
@staticmethod
def codepage_name(codepage):
"""returns the corresponding codepage-name (for python)"""
try:
name = CHARCODE[codepage][1]
if name == '':
raise CharCodeError("The codepage {enc} does not have a connected python-codepage".format(enc=codepage))
return name
except KeyError:
raise CharCodeError("The encoding {enc} is unknown.".format(enc=codepage))
def encode_char(self, char):
def write(self, text):
"""Write the text, automatically switching encodings.
"""
Encodes a single unicode character into a sequence of
esc-pos code page change instructions and character declarations
"""
if type(char) is not six.text_type:
raise Error("The supplied text has to be unicode, but is of type {type}.".format(
type=type(char)
))
encoded = b''
encoding = self.encoding # we reuse the last encoding to prevent code page switches at every character
remaining = copy.copy(CHARCODE)
while True: # Trying all encoding until one succeeds
try:
if encoding == 'KATAKANA': # Japanese characters
if jcconv:
# try to convert japanese text to half-katakanas
kata = jcconv.kata2half(jcconv.hira2kata(char))
if kata != char:
self.extra_chars += len(kata) - 1
# the conversion may result in multiple characters
return self.encode_str(kata)
else:
kata = char
if kata in TXT_ENC_KATAKANA_MAP:
encoded = TXT_ENC_KATAKANA_MAP[kata]
break
else:
raise ValueError()
else:
try:
enc_name = MagicEncode.codepage_name(encoding)
encoded = char.encode(enc_name)
assert type(encoded) is bytes
except LookupError:
raise ValueError("The encoding {enc} seems to not exist in Python".format(enc=encoding))
except CharCodeError:
raise ValueError("The encoding {enc} is not fully configured in constants".format(
enc=encoding
))
break
except ValueError: # the encoding failed, select another one and retry
if encoding in remaining:
del remaining[encoding]
if len(remaining) >= 1:
encoding = list(remaining)[0]
else:
encoding = self.defaultencoding
encoded = self.defaultsymbol # could not encode, output error character
break
if encoding != self.encoding:
# if the encoding changed, remember it and prefix the character with
# the esc-pos encoding change sequence
self.encoding = encoding
encoded = CHARCODE[encoding][0] + encoded
return encoded
def encode_str(self, txt):
# make sure the right codepage is set in the printer
buffer = self.codepage_sequence(self.encoding)
if self.force_encoding:
buffer += txt.encode(self.codepage_name(self.encoding))
else:
for c in txt:
buffer += self.encode_char(c)
return buffer
def encode_text(self, txt):
"""returns a byte-string with encoded text
:param txt: text that shall be encoded
:return: byte-string for the printer
"""
if not txt:
if self.disabled:
self.write_with_encoding(self.encoding, text)
return
self.extra_chars = 0
# TODO: Currently this very simple loop means we send every
# character individually to the printer. We can probably
# improve performace by searching the text for the first
# character that cannot be rendered using the current code
# page, and then sending all of those characters at once.
# Or, should a lower-level buffer be responsible for that?
txt = self.encode_str(txt)
for char in text:
# See if the current code page works for this character.
# The encoder object will use a cache to be able to answer
# this question fairly easily.
if self.encoding and self.encoder.can_encode(self.encoding, char):
self.write_with_encoding(self.encoding, char)
continue
# if the utf-8 -> codepage conversion inserted extra characters,
# remove double spaces to try to restore the original string length
# and prevent printing alignment issues
while self.extra_chars > 0:
dspace = txt.find(' ')
if dspace > 0:
txt = txt[:dspace] + txt[dspace+1:]
self.extra_chars -= 1
else:
break
# We have to find another way to print this character.
# See if any of the code pages that the printer profile supports
# can encode this character.
codespace = self.encoder.find_suitable_codespace(char)
if not codespace:
self._handle_character_failed(char)
continue
return txt
self.write_with_encoding(codespace, char)
def _handle_character_failed(self, char):
"""Called when no codepage was found to render a character.
"""
# Writing the default symbol via write() allows us to avoid
# unnecesary codepage switches.
self.write(self.defaultsymbol)
def write_with_encoding(self, encoding, text):
if text is not None and type(text) is not six.text_type:
raise Error("The supplied text has to be unicode, but is of type {type}.".format(
type=type(text)
))
encoding = self.encoder.get_encoding(encoding)
# We always know the current code page; if the new codepage
# is different, emit a change command.
if encoding != self.encoding:
self.encoding = encoding
self.driver._raw(b'{}{}'.format(
CODEPAGE_CHANGE,
six.int2byte(self.encoder.get_sequence(encoding))
))
if text:
self.driver._raw(code_pages.encode(text, encoding, errors="replace"))
# todo emoticons mit charmap encoden

7
test/conftest.py Normal file
View File

@ -0,0 +1,7 @@
import pytest
from escpos.printer import Dummy
@pytest.fixture
def driver():
return Dummy()

View File

@ -13,103 +13,97 @@ from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import pytest
from nose.tools import raises, assert_raises
from hypothesis import given, example
import hypothesis.strategies as st
from escpos.magicencode import MagicEncode
from escpos.magicencode import MagicEncode, Encoder, encode_katakana
from escpos.exceptions import CharCodeError, Error
from escpos.constants import CHARCODE
@raises(CharCodeError)
def test_magic_encode_unkown_char_constant_as_startenc():
"""tests whether MagicEncode raises the proper Exception when an unknown charcode-name is passed as startencoding"""
MagicEncode(startencoding="something")
@raises(CharCodeError)
def test_magic_encode_unkown_char_constant_as_defaultenc():
"""tests whether MagicEncode raises the proper Exception when an unknown charcode-name is passed as defaultenc."""
MagicEncode(defaultencoding="something")
def test_magic_encode_wo_arguments():
"""tests whether MagicEncode works in the standard configuration"""
MagicEncode()
@raises(Error)
def test_magic_encode_w_non_binary_defaultsymbol():
"""tests whether MagicEncode catches non-binary defaultsymbols"""
MagicEncode(defaultsymbol="non-binary")
@given(symbol=st.binary())
def test_magic_encode_w_binary_defaultsymbol(symbol):
"""tests whether MagicEncode works with any binary symbol"""
MagicEncode(defaultsymbol=symbol)
@given(st.text())
@example("カタカナ")
@example("あいうえお")
@example("ハンカクカタカナ")
def test_magic_encode_encode_text_unicode_string(text):
"""tests whether MagicEncode can accept a unicode string"""
me = MagicEncode()
me.encode_text(text)
@given(char=st.characters())
def test_magic_encode_encode_char(char):
"""tests the encode_char-method of MagicEncode"""
me = MagicEncode()
me.encode_char(char)
@raises(Error)
@given(char=st.binary())
def test_magic_encode_encode_char_binary(char):
"""tests the encode_char-method of MagicEncode with binary input"""
me = MagicEncode()
me.encode_char(char)
def test_magic_encode_string_with_katakana_and_hiragana():
"""tests the encode_string-method with katakana and hiragana"""
me = MagicEncode()
me.encode_str("カタカナ")
me.encode_str("あいうえお")
@raises(CharCodeError)
def test_magic_encode_codepage_sequence_unknown_key():
"""tests whether MagicEncode.codepage_sequence raises the proper Exception with unknown charcode-names"""
MagicEncode.codepage_sequence("something")
class TestEncoder:
@raises(CharCodeError)
def test_magic_encode_codepage_name_unknown_key():
"""tests whether MagicEncode.codepage_name raises the proper Exception with unknown charcode-names"""
MagicEncode.codepage_name("something")
def test_can_encode(self):
assert not Encoder({1: 'cp437'}).can_encode('cp437', u'')
assert Encoder({1: 'cp437'}).can_encode('cp437', u'á')
assert not Encoder({1: 'foobar'}).can_encode('foobar', 'a')
def test_magic_encode_constants_getter():
"""tests whether the constants are properly fetched"""
for key in CHARCODE:
name = CHARCODE[key][1]
if name == '':
assert_raises(CharCodeError, MagicEncode.codepage_name, key)
else:
assert name == MagicEncode.codepage_name(key)
assert MagicEncode.codepage_sequence(key) == CHARCODE[key][0]
def test_find_suitable_encoding(self):
assert not Encoder({1: 'cp437'}).find_suitable_codespace(u'')
assert Encoder({1: 'cp858'}).find_suitable_codespace(u'') == 'cp858'
@given(st.text())
def test_magic_encode_force_encoding(text):
"""test whether force_encoding works as expected"""
me = MagicEncode()
assert me.force_encoding is False
me.set_encoding(encoding='PC850', force_encoding=True)
assert me.encoding == 'PC850'
assert me.force_encoding is True
try:
me.encode_text(text)
except UnicodeEncodeError:
# we discard these errors as they are to be expected
# what we want to check here is, whether encoding or codepage will switch through some of the magic code
# being called accidentally
pass
assert me.encoding == 'PC850'
assert me.force_encoding is True
@raises(ValueError)
def test_get_encoding(self):
Encoder({}).get_encoding('latin1')
class TestMagicEncode:
class TestInit:
def test_disabled_requires_encoding(self, driver):
with pytest.raises(Error):
MagicEncode(driver, disabled=True)
class TestWriteWithEncoding:
def test_init_from_none(self, driver):
encode = MagicEncode(driver, encoding=None)
encode.write_with_encoding('cp858', '€ ist teuro.')
assert driver.output == b'\x1bt\xd5 ist teuro.'
def test_change_from_another(self, driver):
encode = MagicEncode(driver, encoding='cp437')
encode.write_with_encoding('cp858', '€ ist teuro.')
assert driver.output == b'\x1bt\xd5 ist teuro.'
def test_no_change(self, driver):
encode = MagicEncode(driver, encoding='cp858')
encode.write_with_encoding('cp858', '€ ist teuro.')
assert driver.output == b'\xd5 ist teuro.'
class TestWrite:
def test_write(self, driver):
encode = MagicEncode(driver)
encode.write('€ ist teuro.')
assert driver.output == b'\x1bt\xa4 ist teuro.'
def test_write_disabled(self, driver):
encode = MagicEncode(driver, encoding='cp437', disabled=True)
encode.write('€ ist teuro.')
assert driver.output == b'? ist teuro.'
def test_write_no_codepage(self, driver):
encode = MagicEncode(
driver, defaultsymbol="_", encoder=Encoder({1: 'cp437'}),
encoding='cp437')
encode.write(u'€ ist teuro.')
assert driver.output == b'_ ist teuro.'
class TestForceEncoding:
def test(self, driver):
encode = MagicEncode(driver)
encode.force_encoding('cp437')
assert driver.output == b'\x1bt'
encode.write('€ ist teuro.')
assert driver.output == b'\x1bt? ist teuro.'
class TestKatakana:
@given(st.text())
@example("カタカナ")
@example("あいうえお")
@example("ハンカクカタカナ")
def test_accept(self, text):
encode_katakana(text)
def test_result(self):
assert encode_katakana('カタカナ') == b'\xb6\xc0\xb6\xc5'
assert encode_katakana("あいうえお") == b'\xb1\xb2\xb3\xb4\xb5'
# TODO Idee für unittest: hypothesis-strings erzeugen, in encode_text werfen