"Merge pull request #1 from mike42/miracle2k-text-encoding\n\nText encoding conversion to lookup tables"
This commit is contained in:
commit
a3ca2c2a16
|
@ -5,6 +5,7 @@
|
||||||
$~
|
$~
|
||||||
.idea/
|
.idea/
|
||||||
.directory
|
.directory
|
||||||
|
.cache/
|
||||||
|
|
||||||
# temporary data
|
# temporary data
|
||||||
temp
|
temp
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
[submodule "capabilities-data"]
|
||||||
|
path = capabilities-data
|
||||||
|
url = https://github.com/receipt-print-hq/escpos-printer-db.git
|
|
@ -45,8 +45,7 @@ The checks by Landscape and QuantifiedCode that run on every PR will provide you
|
||||||
GIT
|
GIT
|
||||||
^^^
|
^^^
|
||||||
The master-branch contains code that has been released to PyPi. A release is marked with a tag
|
The master-branch contains code that has been released to PyPi. A release is marked with a tag
|
||||||
corresponding to the version. Issues are closed when they have been resolved in a released version
|
corresponding to the version. Issues are closed when they have been resolved in the development-branch.
|
||||||
of the package.
|
|
||||||
|
|
||||||
When you have a change to make, begin by creating a new branch from the HEAD of `python-escpos/development`.
|
When you have a change to make, begin by creating a new branch from the HEAD of `python-escpos/development`.
|
||||||
Name your branch to indicate what you are trying to achieve. Good branch names might
|
Name your branch to indicate what you are trying to achieve. Good branch names might
|
||||||
|
|
|
@ -3,6 +3,7 @@ include *.txt
|
||||||
include COPYING
|
include COPYING
|
||||||
include INSTALL
|
include INSTALL
|
||||||
include tox.ini
|
include tox.ini
|
||||||
|
include capabilities-data/dist/capabilities.json
|
||||||
recursive-include doc *.bat
|
recursive-include doc *.bat
|
||||||
recursive-include doc *.ico
|
recursive-include doc *.ico
|
||||||
recursive-include doc *.py
|
recursive-include doc *.py
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Subproject commit 8744f9397ef6b58aee502aa75ac1efad31c9f5d7
|
|
@ -84,8 +84,8 @@ text("text")
|
||||||
|
|
||||||
Prints raw text. Raises ``TextError`` exception.
|
Prints raw text. Raises ``TextError`` exception.
|
||||||
|
|
||||||
set("align", "font", "type", width, height, invert, smooth, flip)
|
set("align", "font", "text_type", width, height, invert, smooth, flip)
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
Set text properties.
|
Set text properties.
|
||||||
|
|
||||||
|
@ -96,7 +96,7 @@ Set text properties.
|
||||||
* RIGHT > > *Default:* left
|
* RIGHT > > *Default:* left
|
||||||
|
|
||||||
* ``font`` type could be ``A`` or ``B``. *Default:* A
|
* ``font`` type could be ``A`` or ``B``. *Default:* A
|
||||||
* ``type`` type could be ``B`` (Bold), ``U`` (Underline) or ``normal``. *Default:* normal
|
* ``text_type`` type could be ``B`` (Bold), ``U`` (Underline) or ``normal``. *Default:* normal
|
||||||
* ``width`` is a numeric value, 1 is for regular size, and 2 is twice the standard size. *Default*: 1
|
* ``width`` is a numeric value, 1 is for regular size, and 2 is twice the standard size. *Default*: 1
|
||||||
* ``height`` is a numeric value, 1 is for regular size and 2 is twice the standard size. *Default*: 1
|
* ``height`` is a numeric value, 1 is for regular size and 2 is twice the standard size. *Default*: 1
|
||||||
* ``invert`` is a boolean value, True enables white on black printing. *Default*: False
|
* ``invert`` is a boolean value, True enables white on black printing. *Default*: False
|
||||||
|
|
3
setup.py
3
setup.py
|
@ -72,7 +72,7 @@ setup(
|
||||||
author='Manuel F Martinez and others',
|
author='Manuel F Martinez and others',
|
||||||
author_email='manpaz@bashlinux.com',
|
author_email='manpaz@bashlinux.com',
|
||||||
maintainer='Patrick Kanzler',
|
maintainer='Patrick Kanzler',
|
||||||
maintainer_email='patrick.kanzler@fablab.fau.de',
|
maintainer_email='dev@pkanzler.de',
|
||||||
keywords=[
|
keywords=[
|
||||||
'ESC/POS',
|
'ESC/POS',
|
||||||
'thermoprinter',
|
'thermoprinter',
|
||||||
|
@ -113,6 +113,7 @@ setup(
|
||||||
'pyyaml',
|
'pyyaml',
|
||||||
'argparse',
|
'argparse',
|
||||||
'argcomplete',
|
'argcomplete',
|
||||||
|
'future'
|
||||||
],
|
],
|
||||||
setup_requires=[
|
setup_requires=[
|
||||||
'setuptools_scm',
|
'setuptools_scm',
|
||||||
|
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
||||||
|
../../capabilities-data/dist/capabilities.json
|
|
@ -58,7 +58,7 @@ class BaseProfile(object):
|
||||||
def get_code_pages(self):
|
def get_code_pages(self):
|
||||||
"""Return the support code pages as a {name: index} dict.
|
"""Return the support code pages as a {name: index} dict.
|
||||||
"""
|
"""
|
||||||
return {v.lower(): k for k, v in self.codePages.items()}
|
return {v: k for k, v in self.codePages.items()}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -12,21 +12,11 @@ class CodePageManager:
|
||||||
def get_all(self):
|
def get_all(self):
|
||||||
return self.data.values()
|
return self.data.values()
|
||||||
|
|
||||||
def encode(self, text, encoding, errors='strict'):
|
def get_encoding_name(self, encoding):
|
||||||
"""Adds support for Japanese to the builtin str.encode().
|
# TODO resolve the encoding alias
|
||||||
|
return encoding.upper()
|
||||||
TODO: Add support for custom code page data from
|
|
||||||
escpos-printer-db.
|
|
||||||
"""
|
|
||||||
# Python has not have this builtin?
|
|
||||||
if encoding.upper() == 'KATAKANA':
|
|
||||||
return encode_katakana(text)
|
|
||||||
|
|
||||||
return text.encode(encoding, errors=errors)
|
|
||||||
|
|
||||||
def get_encoding(self, encoding):
|
def get_encoding(self, encoding):
|
||||||
# resolve the encoding alias
|
return self.data[encoding]
|
||||||
return encoding.lower()
|
|
||||||
|
|
||||||
|
|
||||||
CodePages = CodePageManager(CAPABILITIES['encodings'])
|
CodePages = CodePageManager(CAPABILITIES['encodings'])
|
|
@ -17,6 +17,7 @@ from __future__ import division
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from builtins import bytes
|
||||||
from .constants import CODEPAGE_CHANGE
|
from .constants import CODEPAGE_CHANGE
|
||||||
from .exceptions import CharCodeError, Error
|
from .exceptions import CharCodeError, Error
|
||||||
from .capabilities import get_profile
|
from .capabilities import get_profile
|
||||||
|
@ -45,19 +46,20 @@ class Encoder(object):
|
||||||
def __init__(self, codepage_map):
|
def __init__(self, codepage_map):
|
||||||
self.codepages = codepage_map
|
self.codepages = codepage_map
|
||||||
self.available_encodings = set(codepage_map.keys())
|
self.available_encodings = set(codepage_map.keys())
|
||||||
|
self.available_characters = {}
|
||||||
self.used_encodings = set()
|
self.used_encodings = set()
|
||||||
|
|
||||||
def get_sequence(self, encoding):
|
def get_sequence(self, encoding):
|
||||||
return int(self.codepages[encoding])
|
return int(self.codepages[encoding])
|
||||||
|
|
||||||
def get_encoding(self, encoding):
|
def get_encoding_name(self, encoding):
|
||||||
"""Given an encoding provided by the user, will return a
|
"""Given an encoding provided by the user, will return a
|
||||||
canonical encoding name; and also validate that the encoding
|
canonical encoding name; and also validate that the encoding
|
||||||
is supported.
|
is supported.
|
||||||
|
|
||||||
TODO: Support encoding aliases: pc437 instead of cp437.
|
TODO: Support encoding aliases: pc437 instead of cp437.
|
||||||
"""
|
"""
|
||||||
encoding = CodePages.get_encoding(encoding)
|
encoding = CodePages.get_encoding_name(encoding)
|
||||||
if not encoding in self.codepages:
|
if not encoding in self.codepages:
|
||||||
raise ValueError((
|
raise ValueError((
|
||||||
'Encoding "{}" cannot be used for the current profile. '
|
'Encoding "{}" cannot be used for the current profile. '
|
||||||
|
@ -65,18 +67,87 @@ class Encoder(object):
|
||||||
).format(encoding, ','.join(self.codepages.keys())))
|
).format(encoding, ','.join(self.codepages.keys())))
|
||||||
return encoding
|
return encoding
|
||||||
|
|
||||||
|
def _get_codepage_char_list(self, encoding):
|
||||||
|
"""Get codepage character list
|
||||||
|
|
||||||
|
Gets characters 128-255 for a given code page, as an array.
|
||||||
|
|
||||||
|
:param encoding: The name of the encoding. This must appear in the CodePage list
|
||||||
|
"""
|
||||||
|
codepage = CodePages.get_encoding(encoding)
|
||||||
|
if 'data' in codepage:
|
||||||
|
encodable_chars = list("".join(codepage['data']))
|
||||||
|
assert(len(encodable_chars) == 128)
|
||||||
|
return encodable_chars
|
||||||
|
elif 'python_encode' in codepage:
|
||||||
|
encodable_chars = [u" "] * 128
|
||||||
|
for i in range(0, 128):
|
||||||
|
codepoint = i + 128
|
||||||
|
try:
|
||||||
|
encodable_chars[i] = bytes([codepoint]).decode(codepage['python_encode'])
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
# Non-encodable character, just skip it
|
||||||
|
pass
|
||||||
|
return encodable_chars
|
||||||
|
raise LookupError("Can't find a known encoding for {}".format(encoding))
|
||||||
|
|
||||||
|
def _get_codepage_char_map(self, encoding):
|
||||||
|
""" Get codepage character map
|
||||||
|
|
||||||
|
Process an encoding and return a map of UTF-characters to code points
|
||||||
|
in this encoding.
|
||||||
|
|
||||||
|
This is generated once only, and returned from a cache.
|
||||||
|
|
||||||
|
:param encoding: The name of the encoding.
|
||||||
|
"""
|
||||||
|
# Skip things that were loaded previously
|
||||||
|
if encoding in self.available_characters:
|
||||||
|
return self.available_characters[encoding]
|
||||||
|
codepage_char_list = self._get_codepage_char_list(encoding)
|
||||||
|
codepage_char_map = dict((utf8, i + 128) for (i, utf8) in enumerate(codepage_char_list))
|
||||||
|
self.available_characters[encoding] = codepage_char_map
|
||||||
|
return codepage_char_map
|
||||||
|
|
||||||
def can_encode(self, encoding, char):
|
def can_encode(self, encoding, char):
|
||||||
|
"""Determine if a character is encodeable in the given code page.
|
||||||
|
|
||||||
|
:param encoding: The name of the encoding.
|
||||||
|
:param char: The character to attempt to encode.
|
||||||
|
"""
|
||||||
|
available_map = {}
|
||||||
try:
|
try:
|
||||||
encoded = CodePages.encode(char, encoding)
|
available_map = self._get_codepage_char_map(encoding)
|
||||||
assert type(encoded) is bytes
|
|
||||||
return encoded
|
|
||||||
except LookupError:
|
except LookupError:
|
||||||
# We don't have this encoding
|
|
||||||
return False
|
|
||||||
except UnicodeEncodeError:
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return True
|
# Decide whether this character is encodeable in this code page
|
||||||
|
is_ascii = ord(char) < 128
|
||||||
|
is_encodable = char in available_map
|
||||||
|
return is_ascii or is_encodable
|
||||||
|
|
||||||
|
def _encode_char(self, char, charmap, defaultchar):
|
||||||
|
""" Encode a single character with the given encoding map
|
||||||
|
|
||||||
|
:param char: char to encode
|
||||||
|
:param charmap: dictionary for mapping characters in this code page
|
||||||
|
"""
|
||||||
|
if ord(char) < 128:
|
||||||
|
return ord(char)
|
||||||
|
if char in charmap:
|
||||||
|
return charmap[char]
|
||||||
|
return ord(defaultchar)
|
||||||
|
|
||||||
|
def encode(self, text, encoding, defaultchar='?'):
|
||||||
|
""" Encode text under the given encoding
|
||||||
|
|
||||||
|
:param text: Text to encode
|
||||||
|
:param encoding: Encoding name to use (must be defined in capabilities)
|
||||||
|
:param defaultchar: Fallback for non-encodable characters
|
||||||
|
"""
|
||||||
|
codepage_char_map = self._get_codepage_char_map(encoding)
|
||||||
|
output_bytes = bytes([self._encode_char(char, codepage_char_map, defaultchar) for char in text])
|
||||||
|
return output_bytes
|
||||||
|
|
||||||
def __encoding_sort_func(self, item):
|
def __encoding_sort_func(self, item):
|
||||||
key, index = item
|
key, index = item
|
||||||
|
@ -151,7 +222,7 @@ class MagicEncode(object):
|
||||||
self.driver = driver
|
self.driver = driver
|
||||||
self.encoder = encoder or Encoder(driver.profile.get_code_pages())
|
self.encoder = encoder or Encoder(driver.profile.get_code_pages())
|
||||||
|
|
||||||
self.encoding = self.encoder.get_encoding(encoding) if encoding else None
|
self.encoding = self.encoder.get_encoding_name(encoding) if encoding else None
|
||||||
self.defaultsymbol = defaultsymbol
|
self.defaultsymbol = defaultsymbol
|
||||||
self.disabled = disabled
|
self.disabled = disabled
|
||||||
|
|
||||||
|
@ -216,4 +287,4 @@ class MagicEncode(object):
|
||||||
six.int2byte(self.encoder.get_sequence(encoding)))
|
six.int2byte(self.encoder.get_sequence(encoding)))
|
||||||
|
|
||||||
if text:
|
if text:
|
||||||
self.driver._raw(CodePages.encode(text, encoding, errors="replace"))
|
self.driver._raw(self.encoder.encode(text, encoding))
|
||||||
|
|
|
@ -20,7 +20,7 @@ from escpos.printer import Dummy
|
||||||
|
|
||||||
|
|
||||||
def get_printer():
|
def get_printer():
|
||||||
return Dummy(magic_encode_args={'disabled': True, 'encoding': 'cp437'})
|
return Dummy(magic_encode_args={'disabled': True, 'encoding': 'CP437'})
|
||||||
|
|
||||||
|
|
||||||
@given(text=st.text())
|
@given(text=st.text())
|
||||||
|
|
|
@ -26,17 +26,17 @@ from escpos.exceptions import CharCodeError, Error
|
||||||
class TestEncoder:
|
class TestEncoder:
|
||||||
|
|
||||||
def test_can_encode(self):
|
def test_can_encode(self):
|
||||||
assert not Encoder({'cp437': 1}).can_encode('cp437', u'€')
|
assert not Encoder({'CP437': 1}).can_encode('CP437', u'€')
|
||||||
assert Encoder({'cp437': 1}).can_encode('cp437', u'á')
|
assert Encoder({'CP437': 1}).can_encode('CP437', u'á')
|
||||||
assert not Encoder({'foobar': 1}).can_encode('foobar', 'a')
|
assert not Encoder({'foobar': 1}).can_encode('foobar', 'a')
|
||||||
|
|
||||||
def test_find_suitable_encoding(self):
|
def test_find_suitable_encoding(self):
|
||||||
assert not Encoder({'cp437': 1}).find_suitable_encoding(u'€')
|
assert not Encoder({'CP437': 1}).find_suitable_encoding(u'€')
|
||||||
assert Encoder({'cp858': 1}).find_suitable_encoding(u'€') == 'cp858'
|
assert Encoder({'CP858': 1}).find_suitable_encoding(u'€') == 'CP858'
|
||||||
|
|
||||||
@raises(ValueError)
|
@raises(ValueError)
|
||||||
def test_get_encoding(self):
|
def test_get_encoding(self):
|
||||||
Encoder({}).get_encoding('latin1')
|
Encoder({}).get_encoding_name('latin1')
|
||||||
|
|
||||||
|
|
||||||
class TestMagicEncode:
|
class TestMagicEncode:
|
||||||
|
@ -51,17 +51,17 @@ class TestMagicEncode:
|
||||||
|
|
||||||
def test_init_from_none(self, driver):
|
def test_init_from_none(self, driver):
|
||||||
encode = MagicEncode(driver, encoding=None)
|
encode = MagicEncode(driver, encoding=None)
|
||||||
encode.write_with_encoding('cp858', '€ ist teuro.')
|
encode.write_with_encoding('CP858', '€ ist teuro.')
|
||||||
assert driver.output == b'\x1bt\x13\xd5 ist teuro.'
|
assert driver.output == b'\x1bt\x13\xd5 ist teuro.'
|
||||||
|
|
||||||
def test_change_from_another(self, driver):
|
def test_change_from_another(self, driver):
|
||||||
encode = MagicEncode(driver, encoding='cp437')
|
encode = MagicEncode(driver, encoding='CP437')
|
||||||
encode.write_with_encoding('cp858', '€ ist teuro.')
|
encode.write_with_encoding('CP858', '€ ist teuro.')
|
||||||
assert driver.output == b'\x1bt\x13\xd5 ist teuro.'
|
assert driver.output == b'\x1bt\x13\xd5 ist teuro.'
|
||||||
|
|
||||||
def test_no_change(self, driver):
|
def test_no_change(self, driver):
|
||||||
encode = MagicEncode(driver, encoding='cp858')
|
encode = MagicEncode(driver, encoding='CP858')
|
||||||
encode.write_with_encoding('cp858', '€ ist teuro.')
|
encode.write_with_encoding('CP858', '€ ist teuro.')
|
||||||
assert driver.output == b'\xd5 ist teuro.'
|
assert driver.output == b'\xd5 ist teuro.'
|
||||||
|
|
||||||
class TestWrite:
|
class TestWrite:
|
||||||
|
@ -72,14 +72,14 @@ class TestMagicEncode:
|
||||||
assert driver.output == b'\x1bt\x0f\xa4 ist teuro.'
|
assert driver.output == b'\x1bt\x0f\xa4 ist teuro.'
|
||||||
|
|
||||||
def test_write_disabled(self, driver):
|
def test_write_disabled(self, driver):
|
||||||
encode = MagicEncode(driver, encoding='cp437', disabled=True)
|
encode = MagicEncode(driver, encoding='CP437', disabled=True)
|
||||||
encode.write('€ ist teuro.')
|
encode.write('€ ist teuro.')
|
||||||
assert driver.output == b'? ist teuro.'
|
assert driver.output == b'? ist teuro.'
|
||||||
|
|
||||||
def test_write_no_codepage(self, driver):
|
def test_write_no_codepage(self, driver):
|
||||||
encode = MagicEncode(
|
encode = MagicEncode(
|
||||||
driver, defaultsymbol="_", encoder=Encoder({'cp437': 1}),
|
driver, defaultsymbol="_", encoder=Encoder({'CP437': 1}),
|
||||||
encoding='cp437')
|
encoding='CP437')
|
||||||
encode.write(u'€ ist teuro.')
|
encode.write(u'€ ist teuro.')
|
||||||
assert driver.output == b'_ ist teuro.'
|
assert driver.output == b'_ ist teuro.'
|
||||||
|
|
||||||
|
@ -87,7 +87,7 @@ class TestMagicEncode:
|
||||||
|
|
||||||
def test(self, driver):
|
def test(self, driver):
|
||||||
encode = MagicEncode(driver)
|
encode = MagicEncode(driver)
|
||||||
encode.force_encoding('cp437')
|
encode.force_encoding('CP437')
|
||||||
assert driver.output == b'\x1bt\x00'
|
assert driver.output == b'\x1bt\x00'
|
||||||
|
|
||||||
encode.write('€ ist teuro.')
|
encode.write('€ ist teuro.')
|
||||||
|
|
Loading…
Reference in New Issue