"Merge pull request #1 from mike42/miracle2k-text-encoding\n\nText encoding conversion to lookup tables"

This commit is contained in:
Michael Elsdörfer 2016-09-22 22:58:37 +02:00
commit a3ca2c2a16
13 changed files with 116 additions and 49 deletions

1
.gitignore vendored
View File

@ -5,6 +5,7 @@
$~
.idea/
.directory
.cache/
# temporary data
temp

3
.gitmodules vendored Normal file
View File

@ -0,0 +1,3 @@
[submodule "capabilities-data"]
path = capabilities-data
url = https://github.com/receipt-print-hq/escpos-printer-db.git

View File

@ -45,8 +45,7 @@ The checks by Landscape and QuantifiedCode that run on every PR will provide you
GIT
^^^
The master-branch contains code that has been released to PyPi. A release is marked with a tag
corresponding to the version. Issues are closed when they have been resolved in a released version
of the package.
corresponding to the version. Issues are closed when they have been resolved in the development-branch.
When you have a change to make, begin by creating a new branch from the HEAD of `python-escpos/development`.
Name your branch to indicate what you are trying to achieve. Good branch names might

View File

@ -3,6 +3,7 @@ include *.txt
include COPYING
include INSTALL
include tox.ini
include capabilities-data/dist/capabilities.json
recursive-include doc *.bat
recursive-include doc *.ico
recursive-include doc *.py

1
capabilities-data Submodule

@ -0,0 +1 @@
Subproject commit 8744f9397ef6b58aee502aa75ac1efad31c9f5d7

View File

@ -84,8 +84,8 @@ text("text")
Prints raw text. Raises ``TextError`` exception.
set("align", "font", "type", width, height, invert, smooth, flip)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
set("align", "font", "text_type", width, height, invert, smooth, flip)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Set text properties.
@ -96,7 +96,7 @@ Set text properties.
* RIGHT > > *Default:* left
* ``font`` type could be ``A`` or ``B``. *Default:* A
* ``type`` type could be ``B`` (Bold), ``U`` (Underline) or ``normal``. *Default:* normal
* ``text_type`` type could be ``B`` (Bold), ``U`` (Underline) or ``normal``. *Default:* normal
* ``width`` is a numeric value, 1 is for regular size, and 2 is twice the standard size. *Default*: 1
* ``height`` is a numeric value, 1 is for regular size and 2 is twice the standard size. *Default*: 1
* ``invert`` is a boolean value, True enables white on black printing. *Default*: False

View File

@ -72,7 +72,7 @@ setup(
author='Manuel F Martinez and others',
author_email='manpaz@bashlinux.com',
maintainer='Patrick Kanzler',
maintainer_email='patrick.kanzler@fablab.fau.de',
maintainer_email='dev@pkanzler.de',
keywords=[
'ESC/POS',
'thermoprinter',
@ -113,6 +113,7 @@ setup(
'pyyaml',
'argparse',
'argcomplete',
'future'
],
setup_requires=[
'setuptools_scm',

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1 @@
../../capabilities-data/dist/capabilities.json

View File

@ -58,7 +58,7 @@ class BaseProfile(object):
def get_code_pages(self):
"""Return the support code pages as a {name: index} dict.
"""
return {v.lower(): k for k, v in self.codePages.items()}
return {v: k for k, v in self.codePages.items()}

View File

@ -12,21 +12,11 @@ class CodePageManager:
def get_all(self):
return self.data.values()
def encode(self, text, encoding, errors='strict'):
"""Adds support for Japanese to the builtin str.encode().
TODO: Add support for custom code page data from
escpos-printer-db.
"""
# Python has not have this builtin?
if encoding.upper() == 'KATAKANA':
return encode_katakana(text)
return text.encode(encoding, errors=errors)
def get_encoding_name(self, encoding):
# TODO resolve the encoding alias
return encoding.upper()
def get_encoding(self, encoding):
# resolve the encoding alias
return encoding.lower()
return self.data[encoding]
CodePages = CodePageManager(CAPABILITIES['encodings'])

View File

@ -17,6 +17,7 @@ from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from builtins import bytes
from .constants import CODEPAGE_CHANGE
from .exceptions import CharCodeError, Error
from .capabilities import get_profile
@ -45,19 +46,20 @@ class Encoder(object):
def __init__(self, codepage_map):
self.codepages = codepage_map
self.available_encodings = set(codepage_map.keys())
self.available_characters = {}
self.used_encodings = set()
def get_sequence(self, encoding):
return int(self.codepages[encoding])
def get_encoding(self, encoding):
def get_encoding_name(self, encoding):
"""Given an encoding provided by the user, will return a
canonical encoding name; and also validate that the encoding
is supported.
TODO: Support encoding aliases: pc437 instead of cp437.
"""
encoding = CodePages.get_encoding(encoding)
encoding = CodePages.get_encoding_name(encoding)
if not encoding in self.codepages:
raise ValueError((
'Encoding "{}" cannot be used for the current profile. '
@ -65,18 +67,87 @@ class Encoder(object):
).format(encoding, ','.join(self.codepages.keys())))
return encoding
def _get_codepage_char_list(self, encoding):
"""Get codepage character list
Gets characters 128-255 for a given code page, as an array.
:param encoding: The name of the encoding. This must appear in the CodePage list
"""
codepage = CodePages.get_encoding(encoding)
if 'data' in codepage:
encodable_chars = list("".join(codepage['data']))
assert(len(encodable_chars) == 128)
return encodable_chars
elif 'python_encode' in codepage:
encodable_chars = [u" "] * 128
for i in range(0, 128):
codepoint = i + 128
try:
encodable_chars[i] = bytes([codepoint]).decode(codepage['python_encode'])
except UnicodeDecodeError:
# Non-encodable character, just skip it
pass
return encodable_chars
raise LookupError("Can't find a known encoding for {}".format(encoding))
def _get_codepage_char_map(self, encoding):
""" Get codepage character map
Process an encoding and return a map of UTF-characters to code points
in this encoding.
This is generated once only, and returned from a cache.
:param encoding: The name of the encoding.
"""
# Skip things that were loaded previously
if encoding in self.available_characters:
return self.available_characters[encoding]
codepage_char_list = self._get_codepage_char_list(encoding)
codepage_char_map = dict((utf8, i + 128) for (i, utf8) in enumerate(codepage_char_list))
self.available_characters[encoding] = codepage_char_map
return codepage_char_map
def can_encode(self, encoding, char):
"""Determine if a character is encodeable in the given code page.
:param encoding: The name of the encoding.
:param char: The character to attempt to encode.
"""
available_map = {}
try:
encoded = CodePages.encode(char, encoding)
assert type(encoded) is bytes
return encoded
available_map = self._get_codepage_char_map(encoding)
except LookupError:
# We don't have this encoding
return False
except UnicodeEncodeError:
return False
return True
# Decide whether this character is encodeable in this code page
is_ascii = ord(char) < 128
is_encodable = char in available_map
return is_ascii or is_encodable
def _encode_char(self, char, charmap, defaultchar):
""" Encode a single character with the given encoding map
:param char: char to encode
:param charmap: dictionary for mapping characters in this code page
"""
if ord(char) < 128:
return ord(char)
if char in charmap:
return charmap[char]
return ord(defaultchar)
def encode(self, text, encoding, defaultchar='?'):
""" Encode text under the given encoding
:param text: Text to encode
:param encoding: Encoding name to use (must be defined in capabilities)
:param defaultchar: Fallback for non-encodable characters
"""
codepage_char_map = self._get_codepage_char_map(encoding)
output_bytes = bytes([self._encode_char(char, codepage_char_map, defaultchar) for char in text])
return output_bytes
def __encoding_sort_func(self, item):
key, index = item
@ -151,7 +222,7 @@ class MagicEncode(object):
self.driver = driver
self.encoder = encoder or Encoder(driver.profile.get_code_pages())
self.encoding = self.encoder.get_encoding(encoding) if encoding else None
self.encoding = self.encoder.get_encoding_name(encoding) if encoding else None
self.defaultsymbol = defaultsymbol
self.disabled = disabled
@ -216,4 +287,4 @@ class MagicEncode(object):
six.int2byte(self.encoder.get_sequence(encoding)))
if text:
self.driver._raw(CodePages.encode(text, encoding, errors="replace"))
self.driver._raw(self.encoder.encode(text, encoding))

View File

@ -20,7 +20,7 @@ from escpos.printer import Dummy
def get_printer():
return Dummy(magic_encode_args={'disabled': True, 'encoding': 'cp437'})
return Dummy(magic_encode_args={'disabled': True, 'encoding': 'CP437'})
@given(text=st.text())

View File

@ -26,17 +26,17 @@ from escpos.exceptions import CharCodeError, Error
class TestEncoder:
def test_can_encode(self):
assert not Encoder({'cp437': 1}).can_encode('cp437', u'')
assert Encoder({'cp437': 1}).can_encode('cp437', u'á')
assert not Encoder({'CP437': 1}).can_encode('CP437', u'')
assert Encoder({'CP437': 1}).can_encode('CP437', u'á')
assert not Encoder({'foobar': 1}).can_encode('foobar', 'a')
def test_find_suitable_encoding(self):
assert not Encoder({'cp437': 1}).find_suitable_encoding(u'')
assert Encoder({'cp858': 1}).find_suitable_encoding(u'') == 'cp858'
assert not Encoder({'CP437': 1}).find_suitable_encoding(u'')
assert Encoder({'CP858': 1}).find_suitable_encoding(u'') == 'CP858'
@raises(ValueError)
def test_get_encoding(self):
Encoder({}).get_encoding('latin1')
Encoder({}).get_encoding_name('latin1')
class TestMagicEncode:
@ -51,17 +51,17 @@ class TestMagicEncode:
def test_init_from_none(self, driver):
encode = MagicEncode(driver, encoding=None)
encode.write_with_encoding('cp858', '€ ist teuro.')
encode.write_with_encoding('CP858', '€ ist teuro.')
assert driver.output == b'\x1bt\x13\xd5 ist teuro.'
def test_change_from_another(self, driver):
encode = MagicEncode(driver, encoding='cp437')
encode.write_with_encoding('cp858', '€ ist teuro.')
encode = MagicEncode(driver, encoding='CP437')
encode.write_with_encoding('CP858', '€ ist teuro.')
assert driver.output == b'\x1bt\x13\xd5 ist teuro.'
def test_no_change(self, driver):
encode = MagicEncode(driver, encoding='cp858')
encode.write_with_encoding('cp858', '€ ist teuro.')
encode = MagicEncode(driver, encoding='CP858')
encode.write_with_encoding('CP858', '€ ist teuro.')
assert driver.output == b'\xd5 ist teuro.'
class TestWrite:
@ -72,14 +72,14 @@ class TestMagicEncode:
assert driver.output == b'\x1bt\x0f\xa4 ist teuro.'
def test_write_disabled(self, driver):
encode = MagicEncode(driver, encoding='cp437', disabled=True)
encode = MagicEncode(driver, encoding='CP437', disabled=True)
encode.write('€ ist teuro.')
assert driver.output == b'? ist teuro.'
def test_write_no_codepage(self, driver):
encode = MagicEncode(
driver, defaultsymbol="_", encoder=Encoder({'cp437': 1}),
encoding='cp437')
driver, defaultsymbol="_", encoder=Encoder({'CP437': 1}),
encoding='CP437')
encode.write(u'€ ist teuro.')
assert driver.output == b'_ ist teuro.'
@ -87,7 +87,7 @@ class TestMagicEncode:
def test(self, driver):
encode = MagicEncode(driver)
encode.force_encoding('cp437')
encode.force_encoding('CP437')
assert driver.output == b'\x1bt\x00'
encode.write('€ ist teuro.')