"Merge pull request #1 from mike42/miracle2k-text-encoding\n\nText encoding conversion to lookup tables"

This commit is contained in:
Michael Elsdörfer 2016-09-22 22:58:37 +02:00
commit a3ca2c2a16
13 changed files with 116 additions and 49 deletions

1
.gitignore vendored
View File

@ -5,6 +5,7 @@
$~ $~
.idea/ .idea/
.directory .directory
.cache/
# temporary data # temporary data
temp temp

3
.gitmodules vendored Normal file
View File

@ -0,0 +1,3 @@
[submodule "capabilities-data"]
path = capabilities-data
url = https://github.com/receipt-print-hq/escpos-printer-db.git

View File

@ -29,7 +29,7 @@ of every file of code:
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
from __future__ import unicode_literals from __future__ import unicode_literals
Furthermore please be aware of the differences between Python 2 and 3. For Furthermore please be aware of the differences between Python 2 and 3. For
example `this guide <https://docs.python.org/3/howto/pyporting.html>`_ is helpful. example `this guide <https://docs.python.org/3/howto/pyporting.html>`_ is helpful.
Special care has to be taken when dealing with strings and byte-strings. Please note Special care has to be taken when dealing with strings and byte-strings. Please note
@ -45,8 +45,7 @@ The checks by Landscape and QuantifiedCode that run on every PR will provide you
GIT GIT
^^^ ^^^
The master-branch contains code that has been released to PyPi. A release is marked with a tag The master-branch contains code that has been released to PyPi. A release is marked with a tag
corresponding to the version. Issues are closed when they have been resolved in a released version corresponding to the version. Issues are closed when they have been resolved in the development-branch.
of the package.
When you have a change to make, begin by creating a new branch from the HEAD of `python-escpos/development`. When you have a change to make, begin by creating a new branch from the HEAD of `python-escpos/development`.
Name your branch to indicate what you are trying to achieve. Good branch names might Name your branch to indicate what you are trying to achieve. Good branch names might

View File

@ -3,6 +3,7 @@ include *.txt
include COPYING include COPYING
include INSTALL include INSTALL
include tox.ini include tox.ini
include capabilities-data/dist/capabilities.json
recursive-include doc *.bat recursive-include doc *.bat
recursive-include doc *.ico recursive-include doc *.ico
recursive-include doc *.py recursive-include doc *.py

1
capabilities-data Submodule

@ -0,0 +1 @@
Subproject commit 8744f9397ef6b58aee502aa75ac1efad31c9f5d7

View File

@ -84,8 +84,8 @@ text("text")
Prints raw text. Raises ``TextError`` exception. Prints raw text. Raises ``TextError`` exception.
set("align", "font", "type", width, height, invert, smooth, flip) set("align", "font", "text_type", width, height, invert, smooth, flip)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Set text properties. Set text properties.
@ -96,7 +96,7 @@ Set text properties.
* RIGHT > > *Default:* left * RIGHT > > *Default:* left
* ``font`` type could be ``A`` or ``B``. *Default:* A * ``font`` type could be ``A`` or ``B``. *Default:* A
* ``type`` type could be ``B`` (Bold), ``U`` (Underline) or ``normal``. *Default:* normal * ``text_type`` type could be ``B`` (Bold), ``U`` (Underline) or ``normal``. *Default:* normal
* ``width`` is a numeric value, 1 is for regular size, and 2 is twice the standard size. *Default*: 1 * ``width`` is a numeric value, 1 is for regular size, and 2 is twice the standard size. *Default*: 1
* ``height`` is a numeric value, 1 is for regular size and 2 is twice the standard size. *Default*: 1 * ``height`` is a numeric value, 1 is for regular size and 2 is twice the standard size. *Default*: 1
* ``invert`` is a boolean value, True enables white on black printing. *Default*: False * ``invert`` is a boolean value, True enables white on black printing. *Default*: False

View File

@ -72,7 +72,7 @@ setup(
author='Manuel F Martinez and others', author='Manuel F Martinez and others',
author_email='manpaz@bashlinux.com', author_email='manpaz@bashlinux.com',
maintainer='Patrick Kanzler', maintainer='Patrick Kanzler',
maintainer_email='patrick.kanzler@fablab.fau.de', maintainer_email='dev@pkanzler.de',
keywords=[ keywords=[
'ESC/POS', 'ESC/POS',
'thermoprinter', 'thermoprinter',
@ -113,6 +113,7 @@ setup(
'pyyaml', 'pyyaml',
'argparse', 'argparse',
'argcomplete', 'argcomplete',
'future'
], ],
setup_requires=[ setup_requires=[
'setuptools_scm', 'setuptools_scm',

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
../../capabilities-data/dist/capabilities.json

View File

@ -58,7 +58,7 @@ class BaseProfile(object):
def get_code_pages(self): def get_code_pages(self):
"""Return the support code pages as a {name: index} dict. """Return the support code pages as a {name: index} dict.
""" """
return {v.lower(): k for k, v in self.codePages.items()} return {v: k for k, v in self.codePages.items()}

View File

@ -12,21 +12,11 @@ class CodePageManager:
def get_all(self): def get_all(self):
return self.data.values() return self.data.values()
def encode(self, text, encoding, errors='strict'): def get_encoding_name(self, encoding):
"""Adds support for Japanese to the builtin str.encode(). # TODO resolve the encoding alias
return encoding.upper()
TODO: Add support for custom code page data from
escpos-printer-db.
"""
# Python has not have this builtin?
if encoding.upper() == 'KATAKANA':
return encode_katakana(text)
return text.encode(encoding, errors=errors)
def get_encoding(self, encoding): def get_encoding(self, encoding):
# resolve the encoding alias return self.data[encoding]
return encoding.lower()
CodePages = CodePageManager(CAPABILITIES['encodings']) CodePages = CodePageManager(CAPABILITIES['encodings'])

View File

@ -17,6 +17,7 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
from __future__ import unicode_literals from __future__ import unicode_literals
from builtins import bytes
from .constants import CODEPAGE_CHANGE from .constants import CODEPAGE_CHANGE
from .exceptions import CharCodeError, Error from .exceptions import CharCodeError, Error
from .capabilities import get_profile from .capabilities import get_profile
@ -45,19 +46,20 @@ class Encoder(object):
def __init__(self, codepage_map): def __init__(self, codepage_map):
self.codepages = codepage_map self.codepages = codepage_map
self.available_encodings = set(codepage_map.keys()) self.available_encodings = set(codepage_map.keys())
self.available_characters = {}
self.used_encodings = set() self.used_encodings = set()
def get_sequence(self, encoding): def get_sequence(self, encoding):
return int(self.codepages[encoding]) return int(self.codepages[encoding])
def get_encoding(self, encoding): def get_encoding_name(self, encoding):
"""Given an encoding provided by the user, will return a """Given an encoding provided by the user, will return a
canonical encoding name; and also validate that the encoding canonical encoding name; and also validate that the encoding
is supported. is supported.
TODO: Support encoding aliases: pc437 instead of cp437. TODO: Support encoding aliases: pc437 instead of cp437.
""" """
encoding = CodePages.get_encoding(encoding) encoding = CodePages.get_encoding_name(encoding)
if not encoding in self.codepages: if not encoding in self.codepages:
raise ValueError(( raise ValueError((
'Encoding "{}" cannot be used for the current profile. ' 'Encoding "{}" cannot be used for the current profile. '
@ -65,18 +67,87 @@ class Encoder(object):
).format(encoding, ','.join(self.codepages.keys()))) ).format(encoding, ','.join(self.codepages.keys())))
return encoding return encoding
def _get_codepage_char_list(self, encoding):
"""Get codepage character list
Gets characters 128-255 for a given code page, as an array.
:param encoding: The name of the encoding. This must appear in the CodePage list
"""
codepage = CodePages.get_encoding(encoding)
if 'data' in codepage:
encodable_chars = list("".join(codepage['data']))
assert(len(encodable_chars) == 128)
return encodable_chars
elif 'python_encode' in codepage:
encodable_chars = [u" "] * 128
for i in range(0, 128):
codepoint = i + 128
try:
encodable_chars[i] = bytes([codepoint]).decode(codepage['python_encode'])
except UnicodeDecodeError:
# Non-encodable character, just skip it
pass
return encodable_chars
raise LookupError("Can't find a known encoding for {}".format(encoding))
def _get_codepage_char_map(self, encoding):
""" Get codepage character map
Process an encoding and return a map of UTF-characters to code points
in this encoding.
This is generated once only, and returned from a cache.
:param encoding: The name of the encoding.
"""
# Skip things that were loaded previously
if encoding in self.available_characters:
return self.available_characters[encoding]
codepage_char_list = self._get_codepage_char_list(encoding)
codepage_char_map = dict((utf8, i + 128) for (i, utf8) in enumerate(codepage_char_list))
self.available_characters[encoding] = codepage_char_map
return codepage_char_map
def can_encode(self, encoding, char): def can_encode(self, encoding, char):
"""Determine if a character is encodeable in the given code page.
:param encoding: The name of the encoding.
:param char: The character to attempt to encode.
"""
available_map = {}
try: try:
encoded = CodePages.encode(char, encoding) available_map = self._get_codepage_char_map(encoding)
assert type(encoded) is bytes
return encoded
except LookupError: except LookupError:
# We don't have this encoding
return False
except UnicodeEncodeError:
return False return False
return True # Decide whether this character is encodeable in this code page
is_ascii = ord(char) < 128
is_encodable = char in available_map
return is_ascii or is_encodable
def _encode_char(self, char, charmap, defaultchar):
""" Encode a single character with the given encoding map
:param char: char to encode
:param charmap: dictionary for mapping characters in this code page
"""
if ord(char) < 128:
return ord(char)
if char in charmap:
return charmap[char]
return ord(defaultchar)
def encode(self, text, encoding, defaultchar='?'):
""" Encode text under the given encoding
:param text: Text to encode
:param encoding: Encoding name to use (must be defined in capabilities)
:param defaultchar: Fallback for non-encodable characters
"""
codepage_char_map = self._get_codepage_char_map(encoding)
output_bytes = bytes([self._encode_char(char, codepage_char_map, defaultchar) for char in text])
return output_bytes
def __encoding_sort_func(self, item): def __encoding_sort_func(self, item):
key, index = item key, index = item
@ -151,7 +222,7 @@ class MagicEncode(object):
self.driver = driver self.driver = driver
self.encoder = encoder or Encoder(driver.profile.get_code_pages()) self.encoder = encoder or Encoder(driver.profile.get_code_pages())
self.encoding = self.encoder.get_encoding(encoding) if encoding else None self.encoding = self.encoder.get_encoding_name(encoding) if encoding else None
self.defaultsymbol = defaultsymbol self.defaultsymbol = defaultsymbol
self.disabled = disabled self.disabled = disabled
@ -216,4 +287,4 @@ class MagicEncode(object):
six.int2byte(self.encoder.get_sequence(encoding))) six.int2byte(self.encoder.get_sequence(encoding)))
if text: if text:
self.driver._raw(CodePages.encode(text, encoding, errors="replace")) self.driver._raw(self.encoder.encode(text, encoding))

View File

@ -20,7 +20,7 @@ from escpos.printer import Dummy
def get_printer(): def get_printer():
return Dummy(magic_encode_args={'disabled': True, 'encoding': 'cp437'}) return Dummy(magic_encode_args={'disabled': True, 'encoding': 'CP437'})
@given(text=st.text()) @given(text=st.text())

View File

@ -26,17 +26,17 @@ from escpos.exceptions import CharCodeError, Error
class TestEncoder: class TestEncoder:
def test_can_encode(self): def test_can_encode(self):
assert not Encoder({'cp437': 1}).can_encode('cp437', u'') assert not Encoder({'CP437': 1}).can_encode('CP437', u'')
assert Encoder({'cp437': 1}).can_encode('cp437', u'á') assert Encoder({'CP437': 1}).can_encode('CP437', u'á')
assert not Encoder({'foobar': 1}).can_encode('foobar', 'a') assert not Encoder({'foobar': 1}).can_encode('foobar', 'a')
def test_find_suitable_encoding(self): def test_find_suitable_encoding(self):
assert not Encoder({'cp437': 1}).find_suitable_encoding(u'') assert not Encoder({'CP437': 1}).find_suitable_encoding(u'')
assert Encoder({'cp858': 1}).find_suitable_encoding(u'') == 'cp858' assert Encoder({'CP858': 1}).find_suitable_encoding(u'') == 'CP858'
@raises(ValueError) @raises(ValueError)
def test_get_encoding(self): def test_get_encoding(self):
Encoder({}).get_encoding('latin1') Encoder({}).get_encoding_name('latin1')
class TestMagicEncode: class TestMagicEncode:
@ -51,17 +51,17 @@ class TestMagicEncode:
def test_init_from_none(self, driver): def test_init_from_none(self, driver):
encode = MagicEncode(driver, encoding=None) encode = MagicEncode(driver, encoding=None)
encode.write_with_encoding('cp858', '€ ist teuro.') encode.write_with_encoding('CP858', '€ ist teuro.')
assert driver.output == b'\x1bt\x13\xd5 ist teuro.' assert driver.output == b'\x1bt\x13\xd5 ist teuro.'
def test_change_from_another(self, driver): def test_change_from_another(self, driver):
encode = MagicEncode(driver, encoding='cp437') encode = MagicEncode(driver, encoding='CP437')
encode.write_with_encoding('cp858', '€ ist teuro.') encode.write_with_encoding('CP858', '€ ist teuro.')
assert driver.output == b'\x1bt\x13\xd5 ist teuro.' assert driver.output == b'\x1bt\x13\xd5 ist teuro.'
def test_no_change(self, driver): def test_no_change(self, driver):
encode = MagicEncode(driver, encoding='cp858') encode = MagicEncode(driver, encoding='CP858')
encode.write_with_encoding('cp858', '€ ist teuro.') encode.write_with_encoding('CP858', '€ ist teuro.')
assert driver.output == b'\xd5 ist teuro.' assert driver.output == b'\xd5 ist teuro.'
class TestWrite: class TestWrite:
@ -72,14 +72,14 @@ class TestMagicEncode:
assert driver.output == b'\x1bt\x0f\xa4 ist teuro.' assert driver.output == b'\x1bt\x0f\xa4 ist teuro.'
def test_write_disabled(self, driver): def test_write_disabled(self, driver):
encode = MagicEncode(driver, encoding='cp437', disabled=True) encode = MagicEncode(driver, encoding='CP437', disabled=True)
encode.write('€ ist teuro.') encode.write('€ ist teuro.')
assert driver.output == b'? ist teuro.' assert driver.output == b'? ist teuro.'
def test_write_no_codepage(self, driver): def test_write_no_codepage(self, driver):
encode = MagicEncode( encode = MagicEncode(
driver, defaultsymbol="_", encoder=Encoder({'cp437': 1}), driver, defaultsymbol="_", encoder=Encoder({'CP437': 1}),
encoding='cp437') encoding='CP437')
encode.write(u'€ ist teuro.') encode.write(u'€ ist teuro.')
assert driver.output == b'_ ist teuro.' assert driver.output == b'_ ist teuro.'
@ -87,7 +87,7 @@ class TestMagicEncode:
def test(self, driver): def test(self, driver):
encode = MagicEncode(driver) encode = MagicEncode(driver)
encode.force_encoding('cp437') encode.force_encoding('CP437')
assert driver.output == b'\x1bt\x00' assert driver.output == b'\x1bt\x00'
encode.write('€ ist teuro.') encode.write('€ ist teuro.')