diff --git a/src/escpos/capabilities.py b/src/escpos/capabilities.py index 3dd5c32..1330964 100644 --- a/src/escpos/capabilities.py +++ b/src/escpos/capabilities.py @@ -7,8 +7,9 @@ import yaml # Load external printer database with open(path.join(path.dirname(__file__), 'capabilities.json')) as f: CAPABILITIES = yaml.load(f) + PROFILES = CAPABILITIES['profiles'] -ENCODINGS = CAPABILITIES['encodings'] + class NotSupported(Exception): @@ -51,6 +52,12 @@ class BaseProfile(object): """ return self.features.get(feature) + def get_code_pages(self): + """Return the support code pages as a {name: index} dict. + """ + return {v.lower(): k for k, v in self.codePages.items()} + + def get_profile(name=None, **kwargs): """Get the profile by name; if no name is given, return the diff --git a/src/escpos/codepages.py b/src/escpos/codepages.py new file mode 100644 index 0000000..9666fb2 --- /dev/null +++ b/src/escpos/codepages.py @@ -0,0 +1,32 @@ +from .capabilities import CAPABILITIES + + +class CodePageManager: + """Holds information about all the code pages (as defined + in escpos-printer-db). + """ + + def __init__(self, data): + self.data = data + + def get_all(self): + return self.data.values() + + def encode(self, text, encoding, errors='strict'): + """Adds support for Japanese to the builtin str.encode(). + + TODO: Add support for custom code page data from + escpos-printer-db. + """ + # Python has not have this builtin? + if encoding.upper() == 'KATAKANA': + return encode_katakana(text) + + return text.encode(encoding, errors=errors) + + def get_encoding(self, encoding): + # resolve the encoding alias + return encoding.lower() + + +CodePages = CodePageManager(CAPABILITIES['encodings']) \ No newline at end of file diff --git a/src/escpos/constants.py b/src/escpos/constants.py index 2bec85c..c4e63af 100644 --- a/src/escpos/constants.py +++ b/src/escpos/constants.py @@ -123,101 +123,9 @@ LINESPACING_FUNCS = { 180: ESC + b'3', # line_spacing/180 of an inch, 0 <= line_spacing <= 255 } - +# Prefix to change the codepage. You need to attach a byte to indicate +# the codepage to use. We use escpos-printer-db as the data source. CODEPAGE_CHANGE = ESC + b'\x74' -# Char code table -# CHARCODE = { -# 'PC437': -# [ESC + b'\x74\x00', 'cp437'], # PC437 USA -# 'KATAKANA': -# [ESC + b'\x74\x01', ''], # KATAKANA (JAPAN) -# 'PC850': -# [ESC + b'\x74\x02', 'cp850'], # PC850 Multilingual -# 'PC860': -# [ESC + b'\x74\x03', 'cp860'], # PC860 Portuguese -# 'PC863': -# [ESC + b'\x74\x04', 'cp863'], # PC863 Canadian-French -# 'PC865': -# [ESC + b'\x74\x05', 'cp865'], # PC865 Nordic -# 'KANJI6': -# [ESC + b'\x74\x06', ''], # One-pass Kanji, Hiragana -# 'KANJI7': -# [ESC + b'\x74\x07', ''], # One-pass Kanji -# 'KANJI8': -# [ESC + b'\x74\x08', ''], # One-pass Kanji -# 'PC851': -# [ESC + b'\x74\x0b', 'cp851'], # PC851 Greek -# 'PC853': -# [ESC + b'\x74\x0c', 'cp853'], # PC853 Turkish -# 'PC857': -# [ESC + b'\x74\x0d', 'cp857'], # PC857 Turkish -# 'PC737': -# [ESC + b'\x74\x0e', 'cp737'], # PC737 Greek -# '8859_7': -# [ESC + b'\x74\x0f', 'iso8859_7'], # ISO8859-7 Greek -# 'WPC1252': -# [ESC + b'\x74\x10', 'cp1252'], # WPC1252 -# 'PC866': -# [ESC + b'\x74\x11', 'cp866'], # PC866 Cyrillic #2 -# 'PC852': -# [ESC + b'\x74\x12', 'cp852'], # PC852 Latin2 -# 'PC858': -# [ESC + b'\x74\x13', 'cp858'], # PC858 Euro -# 'KU42': -# [ESC + b'\x74\x14', ''], # KU42 Thai -# 'TIS11': -# [ESC + b'\x74\x15', ''], # TIS11 Thai -# 'TIS18': -# [ESC + b'\x74\x1a', ''], # TIS18 Thai -# 'TCVN3': -# [ESC + b'\x74\x1e', ''], # TCVN3 Vietnamese -# 'TCVN3B': -# [ESC + b'\x74\x1f', ''], # TCVN3 Vietnamese -# 'PC720': -# [ESC + b'\x74\x20', 'cp720'], # PC720 Arabic -# 'WPC775': -# [ESC + b'\x74\x21', ''], # WPC775 Baltic Rim -# 'PC855': -# [ESC + b'\x74\x22', 'cp855'], # PC855 Cyrillic -# 'PC861': -# [ESC + b'\x74\x23', 'cp861'], # PC861 Icelandic -# 'PC862': -# [ESC + b'\x74\x24', 'cp862'], # PC862 Hebrew -# 'PC864': -# [ESC + b'\x74\x25', 'cp864'], # PC864 Arabic -# 'PC869': -# [ESC + b'\x74\x26', 'cp869'], # PC869 Greek -# '8859_2': -# [ESC + b'\x74\x27', 'iso8859_2'], # ISO8859-2 Latin2 -# '8859_9': -# [ESC + b'\x74\x28', 'iso8859_9'], # ISO8859-2 Latin9 -# 'PC1098': -# [ESC + b'\x74\x29', 'cp1098'], # PC1098 Farsi -# 'PC1118': -# [ESC + b'\x74\x2a', 'cp1118'], # PC1118 Lithuanian -# 'PC1119': -# [ESC + b'\x74\x2b', 'cp1119'], # PC1119 Lithuanian -# 'PC1125': -# [ESC + b'\x74\x2c', 'cp1125'], # PC1125 Ukrainian -# 'WPC1250': -# [ESC + b'\x74\x2d', 'cp1250'], # WPC1250 Latin2 -# 'WPC1251': -# [ESC + b'\x74\x2e', 'cp1251'], # WPC1251 Cyrillic -# 'WPC1253': -# [ESC + b'\x74\x2f', 'cp1253'], # WPC1253 Greek -# 'WPC1254': -# [ESC + b'\x74\x30', 'cp1254'], # WPC1254 Turkish -# 'WPC1255': -# [ESC + b'\x74\x31', 'cp1255'], # WPC1255 Hebrew -# 'WPC1256': -# [ESC + b'\x74\x32', 'cp1256'], # WPC1256 Arabic -# 'WPC1257': -# [ESC + b'\x74\x33', 'cp1257'], # WPC1257 Baltic Rim -# 'WPC1258': -# [ESC + b'\x74\x34', 'cp1258'], # WPC1258 Vietnamese -# 'KZ1048': -# [ESC + b'\x74\x35', 'kz1048'], # KZ-1048 Kazakhstan -# } # Barcode format _SET_BARCODE_TXT_POS = lambda n: GS + b'H' + n diff --git a/src/escpos/katakana.py b/src/escpos/katakana.py new file mode 100644 index 0000000..7c2e2c7 --- /dev/null +++ b/src/escpos/katakana.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- +"""Helpers to encode Japanese characters. + +I doubt that this currently works correctly. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + + +try: + import jcconv +except ImportError: + jcconv = None + + +def encode_katakana(text): + """I don't think this quite works yet.""" + encoded = [] + for char in text: + if jcconv: + # try to convert japanese text to half-katakanas + char = jcconv.kata2half(jcconv.hira2kata(char)) + # TODO: "the conversion may result in multiple characters" + # When? What should we do about it? + + if char in TXT_ENC_KATAKANA_MAP: + encoded.append(TXT_ENC_KATAKANA_MAP[char]) + else: + pass + return b"".join(encoded) + + + +TXT_ENC_KATAKANA_MAP = { + # Maps UTF-8 Katakana symbols to KATAKANA Page Codes + + # Half-Width Katakanas + '。': b'\xa1', + '「': b'\xa2', + '」': b'\xa3', + '、': b'\xa4', + '・': b'\xa5', + 'ヲ': b'\xa6', + 'ァ': b'\xa7', + 'ィ': b'\xa8', + 'ゥ': b'\xa9', + 'ェ': b'\xaa', + 'ォ': b'\xab', + 'ャ': b'\xac', + 'ュ': b'\xad', + 'ョ': b'\xae', + 'ッ': b'\xaf', + 'ー': b'\xb0', + 'ア': b'\xb1', + 'イ': b'\xb2', + 'ウ': b'\xb3', + 'エ': b'\xb4', + 'オ': b'\xb5', + 'カ': b'\xb6', + 'キ': b'\xb7', + 'ク': b'\xb8', + 'ケ': b'\xb9', + 'コ': b'\xba', + 'サ': b'\xbb', + 'シ': b'\xbc', + 'ス': b'\xbd', + 'セ': b'\xbe', + 'ソ': b'\xbf', + 'タ': b'\xc0', + 'チ': b'\xc1', + 'ツ': b'\xc2', + 'テ': b'\xc3', + 'ト': b'\xc4', + 'ナ': b'\xc5', + 'ニ': b'\xc6', + 'ヌ': b'\xc7', + 'ネ': b'\xc8', + 'ノ': b'\xc9', + 'ハ': b'\xca', + 'ヒ': b'\xcb', + 'フ': b'\xcc', + 'ヘ': b'\xcd', + 'ホ': b'\xce', + 'マ': b'\xcf', + 'ミ': b'\xd0', + 'ム': b'\xd1', + 'メ': b'\xd2', + 'モ': b'\xd3', + 'ヤ': b'\xd4', + 'ユ': b'\xd5', + 'ヨ': b'\xd6', + 'ラ': b'\xd7', + 'リ': b'\xd8', + 'ル': b'\xd9', + 'レ': b'\xda', + 'ロ': b'\xdb', + 'ワ': b'\xdc', + 'ン': b'\xdd', + '゙': b'\xde', + '゚': b'\xdf', +} diff --git a/src/escpos/magicencode.py b/src/escpos/magicencode.py index ed8a4bc..8cfecf9 100644 --- a/src/escpos/magicencode.py +++ b/src/escpos/magicencode.py @@ -20,68 +20,16 @@ from __future__ import unicode_literals from .constants import CODEPAGE_CHANGE from .exceptions import CharCodeError, Error from .capabilities import get_profile +from .codepages import CodePages import copy import six -try: - import jcconv -except ImportError: - jcconv = None - - -def encode_katakana(text): - """I don't think this quite works yet.""" - encoded = [] - for char in text: - if jcconv: - # try to convert japanese text to half-katakanas - char = jcconv.kata2half(jcconv.hira2kata(char)) - # TODO: "the conversion may result in multiple characters" - # When? What should we do about it? - - if char in TXT_ENC_KATAKANA_MAP: - encoded.append(TXT_ENC_KATAKANA_MAP[char]) - else: - encoded.append(char) - print(encoded) - return b"".join(encoded) - - - -# TODO: When the capabilities.yml format is finished, this should be -# in the profile itself. -def get_encodings_from_profile(profile): - mapping = {k: v.lower() for k, v in profile.codePageMap.items()} - if hasattr(profile, 'codePages'): - code_pages = [n.lower() for n in profile.codePages] - return {k: v for k, v in mapping.items() if v in code_pages} - else: - return mapping - - -class CodePages: - def get_all(self): - return get_encodings_from_profile(get_profile()).values() - - def encode(self, text, encoding, errors='strict'): - # Python has not have this builtin? - if encoding.upper() == 'KATAKANA': - return encode_katakana(text) - - return text.encode(encoding, errors=errors) - - def get_encoding(self, encoding): - # resolve the encoding alias - return encoding.lower() - -code_pages = CodePages() - class Encoder(object): """Takes a list of available code spaces. Picks the right one for a given character. - Note: To determine the codespace, it needs to do the conversion, and + Note: To determine the code page, it needs to do the conversion, and thus already knows what the final byte in the target encoding would be. Nevertheless, the API of this class doesn't return the byte. @@ -94,36 +42,32 @@ class Encoder(object): 100000000 loops, best of 3: 0.0141 usec per loop """ - def __init__(self, codepages): - self.codepages = codepages - self.reverse = {v:k for k, v in codepages.items()} - self.available_encodings = set(codepages.values()) + def __init__(self, codepage_map): + self.codepages = codepage_map + self.available_encodings = set(codepage_map.keys()) self.used_encodings = set() def get_sequence(self, encoding): - return self.reverse[encoding] + return int(self.codepages[encoding]) def get_encoding(self, encoding): - """resolve aliases + """Given an encoding provided by the user, will return a + canonical encoding name; and also validate that the encoding + is supported. - check that the profile allows this encoding + TOOD: Support encoding aliases. """ - encoding = code_pages.get_encoding(encoding) - if not encoding in self.available_encodings: - raise ValueError('This encoding cannot be used for the current profile') + encoding = CodePages.get_encoding(encoding) + if not encoding in self.codepages: + raise ValueError(( + 'Encoding "{}" cannot be used for the current profile. ' + 'Valid encodings are: {}' + ).format(encoding, ','.join(self.codepages.keys()))) return encoding - def get_encodings(self): - """ - - remove the ones not supported - - order by used first, then others - - do not use a cache, because encode already is so fast - """ - return self.available_encodings - def can_encode(self, encoding, char): try: - encoded = code_pages.encode(char, encoding) + encoded = CodePages.encode(char, encoding) assert type(encoded) is bytes return encoded except LookupError: @@ -134,7 +78,7 @@ class Encoder(object): return True - def find_suitable_codespace(self, char): + def find_suitable_encoding(self, char): """The order of our search is a specific one: 1. code pages that we already tried before; there is a good @@ -150,9 +94,16 @@ class Encoder(object): that the code page we pick for this character is actually supported. - # XXX actually do speed up the search + # TODO actually do speed up the search """ - for encoding in self.get_encodings(): + """ + - remove the ones not supported + - order by used first, then others + - do not use a cache, because encode already is so fast + """ + sorted_encodings = self.codepages.keys() + + for encoding in sorted_encodings: if self.can_encode(encoding, char): # This encoding worked; at it to the set of used ones. self.used_encodings.add(encoding) @@ -160,14 +111,20 @@ class Encoder(object): class MagicEncode(object): - """ Magic Encode Class + """A helper that helps us to automatically switch to the right + code page to encode any given Unicode character. - It tries to automatically encode utf-8 input into the right coding. When encoding is impossible a configurable - symbol will be inserted. + This will consider the printers supported codepages, according + to the printer profile, and if a character cannot be encoded + with the current profile, it will attempt to find a suitable one. - encoding: If you know the current encoding of the printer when - initializing this class, set it here. If the current encoding is - unknown, the first character emitted will be a codepage switch. + If the printer does not support a suitable code page, it can + insert an error character. + + :param encoding: If you know the current encoding of the printer + when initializing this class, set it here. If the current + encoding is unknown, the first character emitted will be a + codepage switch. """ def __init__(self, driver, encoding=None, disabled=False, defaultsymbol='?', encoder=None): @@ -175,7 +132,7 @@ class MagicEncode(object): raise Error('If you disable magic encode, you need to define an encoding!') self.driver = driver - self.encoder = encoder or Encoder(get_encodings_from_profile(driver.profile)) + self.encoder = encoder or Encoder(driver.profile.get_code_pages()) self.encoding = self.encoder.get_encoding(encoding) if encoding else None self.defaultsymbol = defaultsymbol @@ -219,12 +176,12 @@ class MagicEncode(object): # We have to find another way to print this character. # See if any of the code pages that the printer profile supports # can encode this character. - codespace = self.encoder.find_suitable_codespace(char) - if not codespace: + encoding = self.encoder.find_suitable_encoding(char) + if not encoding: self._handle_character_failed(char) continue - self.write_with_encoding(codespace, char) + self.write_with_encoding(encoding, char) def _handle_character_failed(self, char): """Called when no codepage was found to render a character. @@ -239,8 +196,6 @@ class MagicEncode(object): type=type(text) )) - encoding = self.encoder.get_encoding(encoding) - # We always know the current code page; if the new codepage # is different, emit a change command. if encoding != self.encoding: @@ -251,78 +206,4 @@ class MagicEncode(object): )) if text: - self.driver._raw(code_pages.encode(text, encoding, errors="replace")) - - -# todo emoticons mit charmap encoden -# todo Escpos liste von unterdrückten charcodes mitgeben -# TODO Sichtbarkeit der Methode anpassen (Eigentlich braucht man nur die set_encode und die encode_text) - -TXT_ENC_KATAKANA_MAP = { - # Maps UTF-8 Katakana symbols to KATAKANA Page Codes - - # Half-Width Katakanas - '。': b'\xa1', - '「': b'\xa2', - '」': b'\xa3', - '、': b'\xa4', - '・': b'\xa5', - 'ヲ': b'\xa6', - 'ァ': b'\xa7', - 'ィ': b'\xa8', - 'ゥ': b'\xa9', - 'ェ': b'\xaa', - 'ォ': b'\xab', - 'ャ': b'\xac', - 'ュ': b'\xad', - 'ョ': b'\xae', - 'ッ': b'\xaf', - 'ー': b'\xb0', - 'ア': b'\xb1', - 'イ': b'\xb2', - 'ウ': b'\xb3', - 'エ': b'\xb4', - 'オ': b'\xb5', - 'カ': b'\xb6', - 'キ': b'\xb7', - 'ク': b'\xb8', - 'ケ': b'\xb9', - 'コ': b'\xba', - 'サ': b'\xbb', - 'シ': b'\xbc', - 'ス': b'\xbd', - 'セ': b'\xbe', - 'ソ': b'\xbf', - 'タ': b'\xc0', - 'チ': b'\xc1', - 'ツ': b'\xc2', - 'テ': b'\xc3', - 'ト': b'\xc4', - 'ナ': b'\xc5', - 'ニ': b'\xc6', - 'ヌ': b'\xc7', - 'ネ': b'\xc8', - 'ノ': b'\xc9', - 'ハ': b'\xca', - 'ヒ': b'\xcb', - 'フ': b'\xcc', - 'ヘ': b'\xcd', - 'ホ': b'\xce', - 'マ': b'\xcf', - 'ミ': b'\xd0', - 'ム': b'\xd1', - 'メ': b'\xd2', - 'モ': b'\xd3', - 'ヤ': b'\xd4', - 'ユ': b'\xd5', - 'ヨ': b'\xd6', - 'ラ': b'\xd7', - 'リ': b'\xd8', - 'ル': b'\xd9', - 'レ': b'\xda', - 'ロ': b'\xdb', - 'ワ': b'\xdc', - 'ン': b'\xdd', - '゙': b'\xde', - '゚': b'\xdf', -} + self.driver._raw(CodePages.encode(text, encoding, errors="replace")) diff --git a/test/test_function_text.py b/test/test_function_text.py index 5aac224..d4de426 100644 --- a/test/test_function_text.py +++ b/test/test_function_text.py @@ -12,23 +12,29 @@ from __future__ import division from __future__ import print_function from __future__ import unicode_literals +import pytest import mock -from hypothesis import given +from hypothesis import given, assume import hypothesis.strategies as st from escpos.printer import Dummy +def get_printer(): + return Dummy(magic_encode_args={'disabled': True, 'encoding': 'cp437'}) + + @given(text=st.text()) -def test_function_text_dies_ist_ein_test_lf(text): - """test the text printing function with simple string and compare output""" - instance = Dummy() - instance.magic.encode_text = mock.Mock() +def test_text(text): + """Test that text() calls the MagicEncode object. + """ + instance = get_printer() + instance.magic.write = mock.Mock() instance.text(text) - instance.magic.encode_text.assert_called_with(txt=text) + instance.magic.write.assert_called_with(text) def test_block_text(): - printer = Dummy() + printer = get_printer() printer.block_text( "All the presidents men were eating falafel for breakfast.", font='a') assert printer.output == \ diff --git a/test/test_magicencode.py b/test/test_magicencode.py index 24fb0db..60a5a66 100644 --- a/test/test_magicencode.py +++ b/test/test_magicencode.py @@ -17,7 +17,8 @@ import pytest from nose.tools import raises, assert_raises from hypothesis import given, example import hypothesis.strategies as st -from escpos.magicencode import MagicEncode, Encoder, encode_katakana +from escpos.magicencode import MagicEncode, Encoder +from escpos.katakana import encode_katakana from escpos.exceptions import CharCodeError, Error @@ -25,13 +26,13 @@ from escpos.exceptions import CharCodeError, Error class TestEncoder: def test_can_encode(self): - assert not Encoder({1: 'cp437'}).can_encode('cp437', u'€') - assert Encoder({1: 'cp437'}).can_encode('cp437', u'á') - assert not Encoder({1: 'foobar'}).can_encode('foobar', 'a') + assert not Encoder({'cp437': 1}).can_encode('cp437', u'€') + assert Encoder({'cp437': 1}).can_encode('cp437', u'á') + assert not Encoder({'foobar': 1}).can_encode('foobar', 'a') def test_find_suitable_encoding(self): - assert not Encoder({1: 'cp437'}).find_suitable_codespace(u'€') - assert Encoder({1: 'cp858'}).find_suitable_codespace(u'€') == 'cp858' + assert not Encoder({'cp437': 1}).find_suitable_encoding(u'€') + assert Encoder({'cp858': 1}).find_suitable_encoding(u'€') == 'cp858' @raises(ValueError) def test_get_encoding(self): @@ -51,12 +52,12 @@ class TestMagicEncode: def test_init_from_none(self, driver): encode = MagicEncode(driver, encoding=None) encode.write_with_encoding('cp858', '€ ist teuro.') - assert driver.output == b'\x1bt\xd5 ist teuro.' + assert driver.output == b'\x1bt\x13\xd5 ist teuro.' def test_change_from_another(self, driver): encode = MagicEncode(driver, encoding='cp437') encode.write_with_encoding('cp858', '€ ist teuro.') - assert driver.output == b'\x1bt\xd5 ist teuro.' + assert driver.output == b'\x1bt\x13\xd5 ist teuro.' def test_no_change(self, driver): encode = MagicEncode(driver, encoding='cp858') @@ -68,7 +69,7 @@ class TestMagicEncode: def test_write(self, driver): encode = MagicEncode(driver) encode.write('€ ist teuro.') - assert driver.output == b'\x1bt\xa4 ist teuro.' + assert driver.output == b'\x1bt\x0f\xa4 ist teuro.' def test_write_disabled(self, driver): encode = MagicEncode(driver, encoding='cp437', disabled=True) @@ -77,7 +78,7 @@ class TestMagicEncode: def test_write_no_codepage(self, driver): encode = MagicEncode( - driver, defaultsymbol="_", encoder=Encoder({1: 'cp437'}), + driver, defaultsymbol="_", encoder=Encoder({'cp437': 1}), encoding='cp437') encode.write(u'€ ist teuro.') assert driver.output == b'_ ist teuro.' @@ -87,10 +88,10 @@ class TestMagicEncode: def test(self, driver): encode = MagicEncode(driver) encode.force_encoding('cp437') - assert driver.output == b'\x1bt' + assert driver.output == b'\x1bt\x00' encode.write('€ ist teuro.') - assert driver.output == b'\x1bt? ist teuro.' + assert driver.output == b'\x1bt\x00? ist teuro.' class TestKatakana: