Largely rewrite the magic text encoding feature.
This commit is contained in:
parent
f6ce7e45da
commit
c7864fd785
|
@ -80,8 +80,8 @@ default:
|
|||
#24: // Thai Character Code 16
|
||||
#25: // Thai Character Code 17
|
||||
#26: // Thai Character Code 18
|
||||
30: 'TCVN-3-1', # TCVN-3: Vietnamese
|
||||
31: 'TCVN-3-2', # TCVN-3: Vietnamese
|
||||
30: 'TCVN-3-1' # TCVN-3: Vietnamese
|
||||
31: 'TCVN-3-2' # TCVN-3: Vietnamese
|
||||
32: "CP720"
|
||||
33: "CP775"
|
||||
34: "CP855"
|
||||
|
@ -152,13 +152,13 @@ epson:
|
|||
a: 42
|
||||
b: 56
|
||||
codePages:
|
||||
- PC437 # 0
|
||||
- cp437 # 0
|
||||
- Katakana # 1
|
||||
- PC850 # 2
|
||||
- PC860 # 3
|
||||
- PC863 # 4
|
||||
- PC865 # 5
|
||||
- PC858 # 19
|
||||
- cp850 # 2
|
||||
- cp860 # 3
|
||||
- cp863 # 4
|
||||
- cp865 # 5
|
||||
- cp858 # 19
|
||||
- blank
|
||||
|
||||
# http://support.epostraders.co.uk/support-files/documents/3/l7O-TM-T88II_TechnicalRefGuide.pdf
|
||||
|
@ -168,16 +168,16 @@ epson:
|
|||
a: 42
|
||||
b: 56
|
||||
codePages:
|
||||
- PC437 # 0
|
||||
- CP437 # 0
|
||||
- Katakana # 1
|
||||
- PC850 # 2
|
||||
- PC860 # 3
|
||||
- PC863 # 4
|
||||
- PC865 # 5
|
||||
- WPC1252 # 16
|
||||
- PC866 # 17
|
||||
- PC852 # 18
|
||||
- PC858 # 19
|
||||
- CP850 # 2
|
||||
- CP860 # 3
|
||||
- CP863 # 4
|
||||
- CP865 # 5
|
||||
- PC1252 # 16
|
||||
- CP866 # 17
|
||||
- CP852 # 18
|
||||
- CP858 # 19
|
||||
- blank
|
||||
|
||||
|
||||
|
|
|
@ -101,99 +101,101 @@ TXT_ALIGN_RT = ESC + b'\x61\x02' # Right justification
|
|||
TXT_INVERT_ON = GS + b'\x42\x01' # Inverse Printing ON
|
||||
TXT_INVERT_OFF = GS + b'\x42\x00' # Inverse Printing OFF
|
||||
|
||||
|
||||
CODEPAGE_CHANGE = ESC + b'\x74'
|
||||
# Char code table
|
||||
CHARCODE = {
|
||||
'PC437':
|
||||
[ESC + b'\x74\x00', 'cp437'], # PC437 USA
|
||||
'KATAKANA':
|
||||
[ESC + b'\x74\x01', ''], # KATAKANA (JAPAN)
|
||||
'PC850':
|
||||
[ESC + b'\x74\x02', 'cp850'], # PC850 Multilingual
|
||||
'PC860':
|
||||
[ESC + b'\x74\x03', 'cp860'], # PC860 Portuguese
|
||||
'PC863':
|
||||
[ESC + b'\x74\x04', 'cp863'], # PC863 Canadian-French
|
||||
'PC865':
|
||||
[ESC + b'\x74\x05', 'cp865'], # PC865 Nordic
|
||||
'KANJI6':
|
||||
[ESC + b'\x74\x06', ''], # One-pass Kanji, Hiragana
|
||||
'KANJI7':
|
||||
[ESC + b'\x74\x07', ''], # One-pass Kanji
|
||||
'KANJI8':
|
||||
[ESC + b'\x74\x08', ''], # One-pass Kanji
|
||||
'PC851':
|
||||
[ESC + b'\x74\x0b', 'cp851'], # PC851 Greek
|
||||
'PC853':
|
||||
[ESC + b'\x74\x0c', 'cp853'], # PC853 Turkish
|
||||
'PC857':
|
||||
[ESC + b'\x74\x0d', 'cp857'], # PC857 Turkish
|
||||
'PC737':
|
||||
[ESC + b'\x74\x0e', 'cp737'], # PC737 Greek
|
||||
'8859_7':
|
||||
[ESC + b'\x74\x0f', 'iso8859_7'], # ISO8859-7 Greek
|
||||
'WPC1252':
|
||||
[ESC + b'\x74\x10', 'cp1252'], # WPC1252
|
||||
'PC866':
|
||||
[ESC + b'\x74\x11', 'cp866'], # PC866 Cyrillic #2
|
||||
'PC852':
|
||||
[ESC + b'\x74\x12', 'cp852'], # PC852 Latin2
|
||||
'PC858':
|
||||
[ESC + b'\x74\x13', 'cp858'], # PC858 Euro
|
||||
'KU42':
|
||||
[ESC + b'\x74\x14', ''], # KU42 Thai
|
||||
'TIS11':
|
||||
[ESC + b'\x74\x15', ''], # TIS11 Thai
|
||||
'TIS18':
|
||||
[ESC + b'\x74\x1a', ''], # TIS18 Thai
|
||||
'TCVN3':
|
||||
[ESC + b'\x74\x1e', ''], # TCVN3 Vietnamese
|
||||
'TCVN3B':
|
||||
[ESC + b'\x74\x1f', ''], # TCVN3 Vietnamese
|
||||
'PC720':
|
||||
[ESC + b'\x74\x20', 'cp720'], # PC720 Arabic
|
||||
'WPC775':
|
||||
[ESC + b'\x74\x21', ''], # WPC775 Baltic Rim
|
||||
'PC855':
|
||||
[ESC + b'\x74\x22', 'cp855'], # PC855 Cyrillic
|
||||
'PC861':
|
||||
[ESC + b'\x74\x23', 'cp861'], # PC861 Icelandic
|
||||
'PC862':
|
||||
[ESC + b'\x74\x24', 'cp862'], # PC862 Hebrew
|
||||
'PC864':
|
||||
[ESC + b'\x74\x25', 'cp864'], # PC864 Arabic
|
||||
'PC869':
|
||||
[ESC + b'\x74\x26', 'cp869'], # PC869 Greek
|
||||
'8859_2':
|
||||
[ESC + b'\x74\x27', 'iso8859_2'], # ISO8859-2 Latin2
|
||||
'8859_9':
|
||||
[ESC + b'\x74\x28', 'iso8859_9'], # ISO8859-2 Latin9
|
||||
'PC1098':
|
||||
[ESC + b'\x74\x29', 'cp1098'], # PC1098 Farsi
|
||||
'PC1118':
|
||||
[ESC + b'\x74\x2a', 'cp1118'], # PC1118 Lithuanian
|
||||
'PC1119':
|
||||
[ESC + b'\x74\x2b', 'cp1119'], # PC1119 Lithuanian
|
||||
'PC1125':
|
||||
[ESC + b'\x74\x2c', 'cp1125'], # PC1125 Ukrainian
|
||||
'WPC1250':
|
||||
[ESC + b'\x74\x2d', 'cp1250'], # WPC1250 Latin2
|
||||
'WPC1251':
|
||||
[ESC + b'\x74\x2e', 'cp1251'], # WPC1251 Cyrillic
|
||||
'WPC1253':
|
||||
[ESC + b'\x74\x2f', 'cp1253'], # WPC1253 Greek
|
||||
'WPC1254':
|
||||
[ESC + b'\x74\x30', 'cp1254'], # WPC1254 Turkish
|
||||
'WPC1255':
|
||||
[ESC + b'\x74\x31', 'cp1255'], # WPC1255 Hebrew
|
||||
'WPC1256':
|
||||
[ESC + b'\x74\x32', 'cp1256'], # WPC1256 Arabic
|
||||
'WPC1257':
|
||||
[ESC + b'\x74\x33', 'cp1257'], # WPC1257 Baltic Rim
|
||||
'WPC1258':
|
||||
[ESC + b'\x74\x34', 'cp1258'], # WPC1258 Vietnamese
|
||||
'KZ1048':
|
||||
[ESC + b'\x74\x35', 'kz1048'], # KZ-1048 Kazakhstan
|
||||
}
|
||||
# CHARCODE = {
|
||||
# 'PC437':
|
||||
# [ESC + b'\x74\x00', 'cp437'], # PC437 USA
|
||||
# 'KATAKANA':
|
||||
# [ESC + b'\x74\x01', ''], # KATAKANA (JAPAN)
|
||||
# 'PC850':
|
||||
# [ESC + b'\x74\x02', 'cp850'], # PC850 Multilingual
|
||||
# 'PC860':
|
||||
# [ESC + b'\x74\x03', 'cp860'], # PC860 Portuguese
|
||||
# 'PC863':
|
||||
# [ESC + b'\x74\x04', 'cp863'], # PC863 Canadian-French
|
||||
# 'PC865':
|
||||
# [ESC + b'\x74\x05', 'cp865'], # PC865 Nordic
|
||||
# 'KANJI6':
|
||||
# [ESC + b'\x74\x06', ''], # One-pass Kanji, Hiragana
|
||||
# 'KANJI7':
|
||||
# [ESC + b'\x74\x07', ''], # One-pass Kanji
|
||||
# 'KANJI8':
|
||||
# [ESC + b'\x74\x08', ''], # One-pass Kanji
|
||||
# 'PC851':
|
||||
# [ESC + b'\x74\x0b', 'cp851'], # PC851 Greek
|
||||
# 'PC853':
|
||||
# [ESC + b'\x74\x0c', 'cp853'], # PC853 Turkish
|
||||
# 'PC857':
|
||||
# [ESC + b'\x74\x0d', 'cp857'], # PC857 Turkish
|
||||
# 'PC737':
|
||||
# [ESC + b'\x74\x0e', 'cp737'], # PC737 Greek
|
||||
# '8859_7':
|
||||
# [ESC + b'\x74\x0f', 'iso8859_7'], # ISO8859-7 Greek
|
||||
# 'WPC1252':
|
||||
# [ESC + b'\x74\x10', 'cp1252'], # WPC1252
|
||||
# 'PC866':
|
||||
# [ESC + b'\x74\x11', 'cp866'], # PC866 Cyrillic #2
|
||||
# 'PC852':
|
||||
# [ESC + b'\x74\x12', 'cp852'], # PC852 Latin2
|
||||
# 'PC858':
|
||||
# [ESC + b'\x74\x13', 'cp858'], # PC858 Euro
|
||||
# 'KU42':
|
||||
# [ESC + b'\x74\x14', ''], # KU42 Thai
|
||||
# 'TIS11':
|
||||
# [ESC + b'\x74\x15', ''], # TIS11 Thai
|
||||
# 'TIS18':
|
||||
# [ESC + b'\x74\x1a', ''], # TIS18 Thai
|
||||
# 'TCVN3':
|
||||
# [ESC + b'\x74\x1e', ''], # TCVN3 Vietnamese
|
||||
# 'TCVN3B':
|
||||
# [ESC + b'\x74\x1f', ''], # TCVN3 Vietnamese
|
||||
# 'PC720':
|
||||
# [ESC + b'\x74\x20', 'cp720'], # PC720 Arabic
|
||||
# 'WPC775':
|
||||
# [ESC + b'\x74\x21', ''], # WPC775 Baltic Rim
|
||||
# 'PC855':
|
||||
# [ESC + b'\x74\x22', 'cp855'], # PC855 Cyrillic
|
||||
# 'PC861':
|
||||
# [ESC + b'\x74\x23', 'cp861'], # PC861 Icelandic
|
||||
# 'PC862':
|
||||
# [ESC + b'\x74\x24', 'cp862'], # PC862 Hebrew
|
||||
# 'PC864':
|
||||
# [ESC + b'\x74\x25', 'cp864'], # PC864 Arabic
|
||||
# 'PC869':
|
||||
# [ESC + b'\x74\x26', 'cp869'], # PC869 Greek
|
||||
# '8859_2':
|
||||
# [ESC + b'\x74\x27', 'iso8859_2'], # ISO8859-2 Latin2
|
||||
# '8859_9':
|
||||
# [ESC + b'\x74\x28', 'iso8859_9'], # ISO8859-2 Latin9
|
||||
# 'PC1098':
|
||||
# [ESC + b'\x74\x29', 'cp1098'], # PC1098 Farsi
|
||||
# 'PC1118':
|
||||
# [ESC + b'\x74\x2a', 'cp1118'], # PC1118 Lithuanian
|
||||
# 'PC1119':
|
||||
# [ESC + b'\x74\x2b', 'cp1119'], # PC1119 Lithuanian
|
||||
# 'PC1125':
|
||||
# [ESC + b'\x74\x2c', 'cp1125'], # PC1125 Ukrainian
|
||||
# 'WPC1250':
|
||||
# [ESC + b'\x74\x2d', 'cp1250'], # WPC1250 Latin2
|
||||
# 'WPC1251':
|
||||
# [ESC + b'\x74\x2e', 'cp1251'], # WPC1251 Cyrillic
|
||||
# 'WPC1253':
|
||||
# [ESC + b'\x74\x2f', 'cp1253'], # WPC1253 Greek
|
||||
# 'WPC1254':
|
||||
# [ESC + b'\x74\x30', 'cp1254'], # WPC1254 Turkish
|
||||
# 'WPC1255':
|
||||
# [ESC + b'\x74\x31', 'cp1255'], # WPC1255 Hebrew
|
||||
# 'WPC1256':
|
||||
# [ESC + b'\x74\x32', 'cp1256'], # WPC1256 Arabic
|
||||
# 'WPC1257':
|
||||
# [ESC + b'\x74\x33', 'cp1257'], # WPC1257 Baltic Rim
|
||||
# 'WPC1258':
|
||||
# [ESC + b'\x74\x34', 'cp1258'], # WPC1258 Vietnamese
|
||||
# 'KZ1048':
|
||||
# [ESC + b'\x74\x35', 'kz1048'], # KZ-1048 Kazakhstan
|
||||
# }
|
||||
|
||||
# Barcode format
|
||||
_SET_BARCODE_TXT_POS = lambda n: GS + b'H' + n
|
||||
|
|
|
@ -36,12 +36,12 @@ class Escpos(object):
|
|||
"""
|
||||
device = None
|
||||
|
||||
def __init__(self, profile=None, **kwargs):
|
||||
def __init__(self, profile=None, magic_encode_args=None, **kwargs):
|
||||
""" Initialize ESCPOS Printer
|
||||
|
||||
:param profile: Printer profile"""
|
||||
self.profile = get_profile(profile)
|
||||
self.magic = MagicEncode(**kwargs)
|
||||
self.magic = MagicEncode(self, **(magic_encode_args or {}))
|
||||
|
||||
def __del__(self):
|
||||
""" call self.close upon deletion """
|
||||
|
@ -228,11 +228,9 @@ class Escpos(object):
|
|||
:raises: :py:exc:`~escpos.exceptions.CharCodeError`
|
||||
"""
|
||||
if code.upper() == "AUTO":
|
||||
self.magic.force_encoding = False
|
||||
self.magic.force_encoding(False)
|
||||
else:
|
||||
self.magic.codepage_sequence(code)
|
||||
self.magic.encoding = code
|
||||
self.magic.force_encoding = True
|
||||
self.magic.force_encoding(code)
|
||||
|
||||
def barcode(self, code, bc, height=64, width=3, pos="BELOW", font="A", align_ct=True, function_type="A"):
|
||||
""" Print Barcode
|
||||
|
@ -373,7 +371,7 @@ class Escpos(object):
|
|||
:raises: :py:exc:`~escpos.exceptions.TextError`
|
||||
"""
|
||||
txt = six.text_type(txt)
|
||||
self._raw(self.magic.encode_text(txt=txt))
|
||||
self.magic.write(txt)
|
||||
|
||||
def block_text(self, txt, font=None, columns=None):
|
||||
""" Text is printed wrapped to specified columns
|
||||
|
|
|
@ -17,8 +17,9 @@ from __future__ import division
|
|||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from .constants import CHARCODE
|
||||
from .constants import CODEPAGE_CHANGE
|
||||
from .exceptions import CharCodeError, Error
|
||||
from .capabilities import get_profile
|
||||
import copy
|
||||
import six
|
||||
|
||||
|
@ -27,153 +28,230 @@ try:
|
|||
except ImportError:
|
||||
jcconv = None
|
||||
|
||||
|
||||
def encode_katakana(text):
|
||||
"""I don't think this quite works yet."""
|
||||
encoded = []
|
||||
for char in text:
|
||||
if jcconv:
|
||||
# try to convert japanese text to half-katakanas
|
||||
char = jcconv.kata2half(jcconv.hira2kata(char))
|
||||
# TODO: "the conversion may result in multiple characters"
|
||||
# When? What should we do about it?
|
||||
|
||||
if char in TXT_ENC_KATAKANA_MAP:
|
||||
encoded.append(TXT_ENC_KATAKANA_MAP[char])
|
||||
else:
|
||||
encoded.append(char)
|
||||
print(encoded)
|
||||
return b"".join(encoded)
|
||||
|
||||
|
||||
|
||||
# TODO: When the capabilities.yml format is finished, this should be
|
||||
# in the profile itself.
|
||||
def get_encodings_from_profile(profile):
|
||||
mapping = {k: v.lower() for k, v in profile.codePageMap.items()}
|
||||
if hasattr(profile, 'codePages'):
|
||||
code_pages = [n.lower() for n in profile.codePages]
|
||||
return {k: v for k, v in mapping.items() if v in code_pages}
|
||||
else:
|
||||
return mapping
|
||||
|
||||
|
||||
class CodePages:
|
||||
def get_all(self):
|
||||
return get_encodings_from_profile(get_profile()).values()
|
||||
|
||||
def encode(self, text, encoding, errors='strict'):
|
||||
# Python has not have this builtin?
|
||||
if encoding.upper() == 'KATAKANA':
|
||||
return encode_katakana(text)
|
||||
|
||||
return text.encode(encoding, errors=errors)
|
||||
|
||||
def get_encoding(self, encoding):
|
||||
# resolve the encoding alias
|
||||
return encoding.lower()
|
||||
|
||||
code_pages = CodePages()
|
||||
|
||||
|
||||
class Encoder(object):
|
||||
"""Takes a list of available code spaces. Picks the right one for a
|
||||
given character.
|
||||
|
||||
Note: To determine the codespace, it needs to do the conversion, and
|
||||
thus already knows what the final byte in the target encoding would
|
||||
be. Nevertheless, the API of this class doesn't return the byte.
|
||||
|
||||
The caller use to do the character conversion itself.
|
||||
|
||||
$ python -m timeit -s "{u'ö':'a'}.get(u'ö')"
|
||||
100000000 loops, best of 3: 0.0133 usec per loop
|
||||
|
||||
$ python -m timeit -s "u'ö'.encode('latin1')"
|
||||
100000000 loops, best of 3: 0.0141 usec per loop
|
||||
"""
|
||||
|
||||
def __init__(self, codepages):
|
||||
self.codepages = codepages
|
||||
self.reverse = {v:k for k, v in codepages.items()}
|
||||
self.available_encodings = set(codepages.values())
|
||||
self.used_encodings = set()
|
||||
|
||||
def get_sequence(self, encoding):
|
||||
return self.reverse[encoding]
|
||||
|
||||
def get_encoding(self, encoding):
|
||||
"""resolve aliases
|
||||
|
||||
check that the profile allows this encoding
|
||||
"""
|
||||
encoding = code_pages.get_encoding(encoding)
|
||||
if not encoding in self.available_encodings:
|
||||
raise ValueError('This encoding cannot be used for the current profile')
|
||||
return encoding
|
||||
|
||||
def get_encodings(self):
|
||||
"""
|
||||
- remove the ones not supported
|
||||
- order by used first, then others
|
||||
- do not use a cache, because encode already is so fast
|
||||
"""
|
||||
return self.available_encodings
|
||||
|
||||
def can_encode(self, encoding, char):
|
||||
try:
|
||||
encoded = code_pages.encode(char, encoding)
|
||||
assert type(encoded) is bytes
|
||||
return encoded
|
||||
except LookupError:
|
||||
# We don't have this encoding
|
||||
return False
|
||||
except UnicodeEncodeError:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def find_suitable_codespace(self, char):
|
||||
"""The order of our search is a specific one:
|
||||
|
||||
1. code pages that we already tried before; there is a good
|
||||
chance they might work again, reducing the search space,
|
||||
and by re-using already used encodings we might also
|
||||
reduce the number of codepage change instructiosn we have
|
||||
to send. Still, any performance gains will presumably be
|
||||
fairly minor.
|
||||
|
||||
2. code pages in lower ESCPOS slots first. Presumably, they
|
||||
are more likely to be supported, so if a printer profile
|
||||
is missing or incomplete, we might increase our change
|
||||
that the code page we pick for this character is actually
|
||||
supported.
|
||||
|
||||
# XXX actually do speed up the search
|
||||
"""
|
||||
for encoding in self.get_encodings():
|
||||
if self.can_encode(encoding, char):
|
||||
# This encoding worked; at it to the set of used ones.
|
||||
self.used_encodings.add(encoding)
|
||||
return encoding
|
||||
|
||||
|
||||
class MagicEncode(object):
|
||||
""" Magic Encode Class
|
||||
|
||||
It tries to automatically encode utf-8 input into the right coding. When encoding is impossible a configurable
|
||||
symbol will be inserted.
|
||||
"""
|
||||
def __init__(self, startencoding='PC437', force_encoding=False, defaultsymbol=b'', defaultencoding='PC437'):
|
||||
# running these functions makes sure that the encoding is suitable
|
||||
MagicEncode.codepage_name(startencoding)
|
||||
MagicEncode.codepage_name(defaultencoding)
|
||||
|
||||
self.encoding = startencoding
|
||||
encoding: If you know the current encoding of the printer when
|
||||
initializing this class, set it here. If the current encoding is
|
||||
unknown, the first character emitted will be a codepage switch.
|
||||
"""
|
||||
def __init__(self, driver, encoding=None, disabled=False,
|
||||
defaultsymbol='?', encoder=None):
|
||||
if disabled and not encoding:
|
||||
raise Error('If you disable magic encode, you need to define an encoding!')
|
||||
|
||||
self.driver = driver
|
||||
self.encoder = encoder or Encoder(get_encodings_from_profile(driver.profile))
|
||||
|
||||
self.encoding = self.encoder.get_encoding(encoding) if encoding else None
|
||||
self.defaultsymbol = defaultsymbol
|
||||
if type(self.defaultsymbol) is not six.binary_type:
|
||||
raise Error("The supplied symbol {sym} has to be a binary string".format(sym=defaultsymbol))
|
||||
self.defaultencoding = defaultencoding
|
||||
self.force_encoding = force_encoding
|
||||
self.disabled = disabled
|
||||
|
||||
def set_encoding(self, encoding='PC437', force_encoding=False):
|
||||
"""sets an encoding (normally not used)
|
||||
def force_encoding(self, encoding):
|
||||
"""Sets a fixed encoding. The change is emitted right away.
|
||||
|
||||
This function should normally not be used since it manipulates the automagic behaviour. However, if you want to
|
||||
force a certain codepage, then you can use this function.
|
||||
|
||||
:param encoding: must be a valid encoding from CHARCODE
|
||||
:param force_encoding: whether the encoding should not be changed automatically
|
||||
From now one, this buffer will switch the code page anymore.
|
||||
However, it will still keep track of the current code page.
|
||||
"""
|
||||
self.codepage_name(encoding)
|
||||
self.encoding = encoding
|
||||
self.force_encoding = force_encoding
|
||||
if not encoding:
|
||||
self.disabled = False
|
||||
else:
|
||||
self.write_with_encoding(encoding, None)
|
||||
self.disabled = True
|
||||
|
||||
@staticmethod
|
||||
def codepage_sequence(codepage):
|
||||
"""returns the corresponding codepage-sequence"""
|
||||
try:
|
||||
return CHARCODE[codepage][0]
|
||||
except KeyError:
|
||||
raise CharCodeError("The encoding {enc} is unknown.".format(enc=codepage))
|
||||
|
||||
@staticmethod
|
||||
def codepage_name(codepage):
|
||||
"""returns the corresponding codepage-name (for python)"""
|
||||
try:
|
||||
name = CHARCODE[codepage][1]
|
||||
if name == '':
|
||||
raise CharCodeError("The codepage {enc} does not have a connected python-codepage".format(enc=codepage))
|
||||
return name
|
||||
except KeyError:
|
||||
raise CharCodeError("The encoding {enc} is unknown.".format(enc=codepage))
|
||||
|
||||
def encode_char(self, char):
|
||||
def write(self, text):
|
||||
"""Write the text, automatically switching encodings.
|
||||
"""
|
||||
Encodes a single unicode character into a sequence of
|
||||
esc-pos code page change instructions and character declarations
|
||||
"""
|
||||
if type(char) is not six.text_type:
|
||||
raise Error("The supplied text has to be unicode, but is of type {type}.".format(
|
||||
type=type(char)
|
||||
))
|
||||
encoded = b''
|
||||
encoding = self.encoding # we reuse the last encoding to prevent code page switches at every character
|
||||
remaining = copy.copy(CHARCODE)
|
||||
|
||||
while True: # Trying all encoding until one succeeds
|
||||
try:
|
||||
if encoding == 'KATAKANA': # Japanese characters
|
||||
if jcconv:
|
||||
# try to convert japanese text to half-katakanas
|
||||
kata = jcconv.kata2half(jcconv.hira2kata(char))
|
||||
if kata != char:
|
||||
self.extra_chars += len(kata) - 1
|
||||
# the conversion may result in multiple characters
|
||||
return self.encode_str(kata)
|
||||
else:
|
||||
kata = char
|
||||
|
||||
if kata in TXT_ENC_KATAKANA_MAP:
|
||||
encoded = TXT_ENC_KATAKANA_MAP[kata]
|
||||
break
|
||||
else:
|
||||
raise ValueError()
|
||||
else:
|
||||
try:
|
||||
enc_name = MagicEncode.codepage_name(encoding)
|
||||
encoded = char.encode(enc_name)
|
||||
assert type(encoded) is bytes
|
||||
except LookupError:
|
||||
raise ValueError("The encoding {enc} seems to not exist in Python".format(enc=encoding))
|
||||
except CharCodeError:
|
||||
raise ValueError("The encoding {enc} is not fully configured in constants".format(
|
||||
enc=encoding
|
||||
))
|
||||
break
|
||||
|
||||
except ValueError: # the encoding failed, select another one and retry
|
||||
if encoding in remaining:
|
||||
del remaining[encoding]
|
||||
if len(remaining) >= 1:
|
||||
encoding = list(remaining)[0]
|
||||
else:
|
||||
encoding = self.defaultencoding
|
||||
encoded = self.defaultsymbol # could not encode, output error character
|
||||
break
|
||||
|
||||
if encoding != self.encoding:
|
||||
# if the encoding changed, remember it and prefix the character with
|
||||
# the esc-pos encoding change sequence
|
||||
self.encoding = encoding
|
||||
encoded = CHARCODE[encoding][0] + encoded
|
||||
|
||||
return encoded
|
||||
|
||||
def encode_str(self, txt):
|
||||
# make sure the right codepage is set in the printer
|
||||
buffer = self.codepage_sequence(self.encoding)
|
||||
if self.force_encoding:
|
||||
buffer += txt.encode(self.codepage_name(self.encoding))
|
||||
else:
|
||||
for c in txt:
|
||||
buffer += self.encode_char(c)
|
||||
return buffer
|
||||
|
||||
def encode_text(self, txt):
|
||||
"""returns a byte-string with encoded text
|
||||
|
||||
:param txt: text that shall be encoded
|
||||
:return: byte-string for the printer
|
||||
"""
|
||||
if not txt:
|
||||
if self.disabled:
|
||||
self.write_with_encoding(self.encoding, text)
|
||||
return
|
||||
|
||||
self.extra_chars = 0
|
||||
# TODO: Currently this very simple loop means we send every
|
||||
# character individually to the printer. We can probably
|
||||
# improve performace by searching the text for the first
|
||||
# character that cannot be rendered using the current code
|
||||
# page, and then sending all of those characters at once.
|
||||
# Or, should a lower-level buffer be responsible for that?
|
||||
|
||||
txt = self.encode_str(txt)
|
||||
for char in text:
|
||||
# See if the current code page works for this character.
|
||||
# The encoder object will use a cache to be able to answer
|
||||
# this question fairly easily.
|
||||
if self.encoding and self.encoder.can_encode(self.encoding, char):
|
||||
self.write_with_encoding(self.encoding, char)
|
||||
continue
|
||||
|
||||
# if the utf-8 -> codepage conversion inserted extra characters,
|
||||
# remove double spaces to try to restore the original string length
|
||||
# and prevent printing alignment issues
|
||||
while self.extra_chars > 0:
|
||||
dspace = txt.find(' ')
|
||||
if dspace > 0:
|
||||
txt = txt[:dspace] + txt[dspace+1:]
|
||||
self.extra_chars -= 1
|
||||
else:
|
||||
break
|
||||
# We have to find another way to print this character.
|
||||
# See if any of the code pages that the printer profile supports
|
||||
# can encode this character.
|
||||
codespace = self.encoder.find_suitable_codespace(char)
|
||||
if not codespace:
|
||||
self._handle_character_failed(char)
|
||||
continue
|
||||
|
||||
return txt
|
||||
self.write_with_encoding(codespace, char)
|
||||
|
||||
def _handle_character_failed(self, char):
|
||||
"""Called when no codepage was found to render a character.
|
||||
"""
|
||||
# Writing the default symbol via write() allows us to avoid
|
||||
# unnecesary codepage switches.
|
||||
self.write(self.defaultsymbol)
|
||||
|
||||
def write_with_encoding(self, encoding, text):
|
||||
if text is not None and type(text) is not six.text_type:
|
||||
raise Error("The supplied text has to be unicode, but is of type {type}.".format(
|
||||
type=type(text)
|
||||
))
|
||||
|
||||
encoding = self.encoder.get_encoding(encoding)
|
||||
|
||||
# We always know the current code page; if the new codepage
|
||||
# is different, emit a change command.
|
||||
if encoding != self.encoding:
|
||||
self.encoding = encoding
|
||||
self.driver._raw(b'{}{}'.format(
|
||||
CODEPAGE_CHANGE,
|
||||
six.int2byte(self.encoder.get_sequence(encoding))
|
||||
))
|
||||
|
||||
if text:
|
||||
self.driver._raw(code_pages.encode(text, encoding, errors="replace"))
|
||||
|
||||
|
||||
# todo emoticons mit charmap encoden
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
import pytest
|
||||
from escpos.printer import Dummy
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def driver():
|
||||
return Dummy()
|
|
@ -13,103 +13,97 @@ from __future__ import division
|
|||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import pytest
|
||||
from nose.tools import raises, assert_raises
|
||||
from hypothesis import given, example
|
||||
import hypothesis.strategies as st
|
||||
from escpos.magicencode import MagicEncode
|
||||
from escpos.magicencode import MagicEncode, Encoder, encode_katakana
|
||||
from escpos.exceptions import CharCodeError, Error
|
||||
from escpos.constants import CHARCODE
|
||||
|
||||
@raises(CharCodeError)
|
||||
def test_magic_encode_unkown_char_constant_as_startenc():
|
||||
"""tests whether MagicEncode raises the proper Exception when an unknown charcode-name is passed as startencoding"""
|
||||
MagicEncode(startencoding="something")
|
||||
|
||||
@raises(CharCodeError)
|
||||
def test_magic_encode_unkown_char_constant_as_defaultenc():
|
||||
"""tests whether MagicEncode raises the proper Exception when an unknown charcode-name is passed as defaultenc."""
|
||||
MagicEncode(defaultencoding="something")
|
||||
|
||||
def test_magic_encode_wo_arguments():
|
||||
"""tests whether MagicEncode works in the standard configuration"""
|
||||
MagicEncode()
|
||||
|
||||
@raises(Error)
|
||||
def test_magic_encode_w_non_binary_defaultsymbol():
|
||||
"""tests whether MagicEncode catches non-binary defaultsymbols"""
|
||||
MagicEncode(defaultsymbol="non-binary")
|
||||
|
||||
@given(symbol=st.binary())
|
||||
def test_magic_encode_w_binary_defaultsymbol(symbol):
|
||||
"""tests whether MagicEncode works with any binary symbol"""
|
||||
MagicEncode(defaultsymbol=symbol)
|
||||
|
||||
@given(st.text())
|
||||
@example("カタカナ")
|
||||
@example("あいうえお")
|
||||
@example("ハンカクカタカナ")
|
||||
def test_magic_encode_encode_text_unicode_string(text):
|
||||
"""tests whether MagicEncode can accept a unicode string"""
|
||||
me = MagicEncode()
|
||||
me.encode_text(text)
|
||||
|
||||
@given(char=st.characters())
|
||||
def test_magic_encode_encode_char(char):
|
||||
"""tests the encode_char-method of MagicEncode"""
|
||||
me = MagicEncode()
|
||||
me.encode_char(char)
|
||||
|
||||
@raises(Error)
|
||||
@given(char=st.binary())
|
||||
def test_magic_encode_encode_char_binary(char):
|
||||
"""tests the encode_char-method of MagicEncode with binary input"""
|
||||
me = MagicEncode()
|
||||
me.encode_char(char)
|
||||
|
||||
|
||||
def test_magic_encode_string_with_katakana_and_hiragana():
|
||||
"""tests the encode_string-method with katakana and hiragana"""
|
||||
me = MagicEncode()
|
||||
me.encode_str("カタカナ")
|
||||
me.encode_str("あいうえお")
|
||||
|
||||
@raises(CharCodeError)
|
||||
def test_magic_encode_codepage_sequence_unknown_key():
|
||||
"""tests whether MagicEncode.codepage_sequence raises the proper Exception with unknown charcode-names"""
|
||||
MagicEncode.codepage_sequence("something")
|
||||
class TestEncoder:
|
||||
|
||||
@raises(CharCodeError)
|
||||
def test_magic_encode_codepage_name_unknown_key():
|
||||
"""tests whether MagicEncode.codepage_name raises the proper Exception with unknown charcode-names"""
|
||||
MagicEncode.codepage_name("something")
|
||||
def test_can_encode(self):
|
||||
assert not Encoder({1: 'cp437'}).can_encode('cp437', u'€')
|
||||
assert Encoder({1: 'cp437'}).can_encode('cp437', u'á')
|
||||
assert not Encoder({1: 'foobar'}).can_encode('foobar', 'a')
|
||||
|
||||
def test_magic_encode_constants_getter():
|
||||
"""tests whether the constants are properly fetched"""
|
||||
for key in CHARCODE:
|
||||
name = CHARCODE[key][1]
|
||||
if name == '':
|
||||
assert_raises(CharCodeError, MagicEncode.codepage_name, key)
|
||||
else:
|
||||
assert name == MagicEncode.codepage_name(key)
|
||||
assert MagicEncode.codepage_sequence(key) == CHARCODE[key][0]
|
||||
def test_find_suitable_encoding(self):
|
||||
assert not Encoder({1: 'cp437'}).find_suitable_codespace(u'€')
|
||||
assert Encoder({1: 'cp858'}).find_suitable_codespace(u'€') == 'cp858'
|
||||
|
||||
@given(st.text())
|
||||
def test_magic_encode_force_encoding(text):
|
||||
"""test whether force_encoding works as expected"""
|
||||
me = MagicEncode()
|
||||
assert me.force_encoding is False
|
||||
me.set_encoding(encoding='PC850', force_encoding=True)
|
||||
assert me.encoding == 'PC850'
|
||||
assert me.force_encoding is True
|
||||
try:
|
||||
me.encode_text(text)
|
||||
except UnicodeEncodeError:
|
||||
# we discard these errors as they are to be expected
|
||||
# what we want to check here is, whether encoding or codepage will switch through some of the magic code
|
||||
# being called accidentally
|
||||
pass
|
||||
assert me.encoding == 'PC850'
|
||||
assert me.force_encoding is True
|
||||
@raises(ValueError)
|
||||
def test_get_encoding(self):
|
||||
Encoder({}).get_encoding('latin1')
|
||||
|
||||
|
||||
class TestMagicEncode:
|
||||
|
||||
class TestInit:
|
||||
|
||||
def test_disabled_requires_encoding(self, driver):
|
||||
with pytest.raises(Error):
|
||||
MagicEncode(driver, disabled=True)
|
||||
|
||||
class TestWriteWithEncoding:
|
||||
|
||||
def test_init_from_none(self, driver):
|
||||
encode = MagicEncode(driver, encoding=None)
|
||||
encode.write_with_encoding('cp858', '€ ist teuro.')
|
||||
assert driver.output == b'\x1bt\xd5 ist teuro.'
|
||||
|
||||
def test_change_from_another(self, driver):
|
||||
encode = MagicEncode(driver, encoding='cp437')
|
||||
encode.write_with_encoding('cp858', '€ ist teuro.')
|
||||
assert driver.output == b'\x1bt\xd5 ist teuro.'
|
||||
|
||||
def test_no_change(self, driver):
|
||||
encode = MagicEncode(driver, encoding='cp858')
|
||||
encode.write_with_encoding('cp858', '€ ist teuro.')
|
||||
assert driver.output == b'\xd5 ist teuro.'
|
||||
|
||||
class TestWrite:
|
||||
|
||||
def test_write(self, driver):
|
||||
encode = MagicEncode(driver)
|
||||
encode.write('€ ist teuro.')
|
||||
assert driver.output == b'\x1bt\xa4 ist teuro.'
|
||||
|
||||
def test_write_disabled(self, driver):
|
||||
encode = MagicEncode(driver, encoding='cp437', disabled=True)
|
||||
encode.write('€ ist teuro.')
|
||||
assert driver.output == b'? ist teuro.'
|
||||
|
||||
def test_write_no_codepage(self, driver):
|
||||
encode = MagicEncode(
|
||||
driver, defaultsymbol="_", encoder=Encoder({1: 'cp437'}),
|
||||
encoding='cp437')
|
||||
encode.write(u'€ ist teuro.')
|
||||
assert driver.output == b'_ ist teuro.'
|
||||
|
||||
class TestForceEncoding:
|
||||
|
||||
def test(self, driver):
|
||||
encode = MagicEncode(driver)
|
||||
encode.force_encoding('cp437')
|
||||
assert driver.output == b'\x1bt'
|
||||
|
||||
encode.write('€ ist teuro.')
|
||||
assert driver.output == b'\x1bt? ist teuro.'
|
||||
|
||||
|
||||
class TestKatakana:
|
||||
@given(st.text())
|
||||
@example("カタカナ")
|
||||
@example("あいうえお")
|
||||
@example("ハンカクカタカナ")
|
||||
def test_accept(self, text):
|
||||
encode_katakana(text)
|
||||
|
||||
def test_result(self):
|
||||
assert encode_katakana('カタカナ') == b'\xb6\xc0\xb6\xc5'
|
||||
assert encode_katakana("あいうえお") == b'\xb1\xb2\xb3\xb4\xb5'
|
||||
|
||||
|
||||
# TODO Idee für unittest: hypothesis-strings erzeugen, in encode_text werfen
|
||||
|
|
Loading…
Reference in New Issue