"Merge pull request #1 from mike42/miracle2k-text-encoding\n\nText encoding conversion to lookup tables"
This commit is contained in:
commit
a3ca2c2a16
|
@ -5,6 +5,7 @@
|
|||
$~
|
||||
.idea/
|
||||
.directory
|
||||
.cache/
|
||||
|
||||
# temporary data
|
||||
temp
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
[submodule "capabilities-data"]
|
||||
path = capabilities-data
|
||||
url = https://github.com/receipt-print-hq/escpos-printer-db.git
|
|
@ -29,7 +29,7 @@ of every file of code:
|
|||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
Furthermore please be aware of the differences between Python 2 and 3. For
|
||||
example `this guide <https://docs.python.org/3/howto/pyporting.html>`_ is helpful.
|
||||
Special care has to be taken when dealing with strings and byte-strings. Please note
|
||||
|
@ -45,8 +45,7 @@ The checks by Landscape and QuantifiedCode that run on every PR will provide you
|
|||
GIT
|
||||
^^^
|
||||
The master-branch contains code that has been released to PyPi. A release is marked with a tag
|
||||
corresponding to the version. Issues are closed when they have been resolved in a released version
|
||||
of the package.
|
||||
corresponding to the version. Issues are closed when they have been resolved in the development-branch.
|
||||
|
||||
When you have a change to make, begin by creating a new branch from the HEAD of `python-escpos/development`.
|
||||
Name your branch to indicate what you are trying to achieve. Good branch names might
|
||||
|
|
|
@ -3,6 +3,7 @@ include *.txt
|
|||
include COPYING
|
||||
include INSTALL
|
||||
include tox.ini
|
||||
include capabilities-data/dist/capabilities.json
|
||||
recursive-include doc *.bat
|
||||
recursive-include doc *.ico
|
||||
recursive-include doc *.py
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Subproject commit 8744f9397ef6b58aee502aa75ac1efad31c9f5d7
|
|
@ -84,8 +84,8 @@ text("text")
|
|||
|
||||
Prints raw text. Raises ``TextError`` exception.
|
||||
|
||||
set("align", "font", "type", width, height, invert, smooth, flip)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
set("align", "font", "text_type", width, height, invert, smooth, flip)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Set text properties.
|
||||
|
||||
|
@ -96,7 +96,7 @@ Set text properties.
|
|||
* RIGHT > > *Default:* left
|
||||
|
||||
* ``font`` type could be ``A`` or ``B``. *Default:* A
|
||||
* ``type`` type could be ``B`` (Bold), ``U`` (Underline) or ``normal``. *Default:* normal
|
||||
* ``text_type`` type could be ``B`` (Bold), ``U`` (Underline) or ``normal``. *Default:* normal
|
||||
* ``width`` is a numeric value, 1 is for regular size, and 2 is twice the standard size. *Default*: 1
|
||||
* ``height`` is a numeric value, 1 is for regular size and 2 is twice the standard size. *Default*: 1
|
||||
* ``invert`` is a boolean value, True enables white on black printing. *Default*: False
|
||||
|
|
3
setup.py
3
setup.py
|
@ -72,7 +72,7 @@ setup(
|
|||
author='Manuel F Martinez and others',
|
||||
author_email='manpaz@bashlinux.com',
|
||||
maintainer='Patrick Kanzler',
|
||||
maintainer_email='patrick.kanzler@fablab.fau.de',
|
||||
maintainer_email='dev@pkanzler.de',
|
||||
keywords=[
|
||||
'ESC/POS',
|
||||
'thermoprinter',
|
||||
|
@ -113,6 +113,7 @@ setup(
|
|||
'pyyaml',
|
||||
'argparse',
|
||||
'argcomplete',
|
||||
'future'
|
||||
],
|
||||
setup_requires=[
|
||||
'setuptools_scm',
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
|||
../../capabilities-data/dist/capabilities.json
|
|
@ -58,7 +58,7 @@ class BaseProfile(object):
|
|||
def get_code_pages(self):
|
||||
"""Return the support code pages as a {name: index} dict.
|
||||
"""
|
||||
return {v.lower(): k for k, v in self.codePages.items()}
|
||||
return {v: k for k, v in self.codePages.items()}
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -12,21 +12,11 @@ class CodePageManager:
|
|||
def get_all(self):
|
||||
return self.data.values()
|
||||
|
||||
def encode(self, text, encoding, errors='strict'):
|
||||
"""Adds support for Japanese to the builtin str.encode().
|
||||
|
||||
TODO: Add support for custom code page data from
|
||||
escpos-printer-db.
|
||||
"""
|
||||
# Python has not have this builtin?
|
||||
if encoding.upper() == 'KATAKANA':
|
||||
return encode_katakana(text)
|
||||
|
||||
return text.encode(encoding, errors=errors)
|
||||
def get_encoding_name(self, encoding):
|
||||
# TODO resolve the encoding alias
|
||||
return encoding.upper()
|
||||
|
||||
def get_encoding(self, encoding):
|
||||
# resolve the encoding alias
|
||||
return encoding.lower()
|
||||
|
||||
return self.data[encoding]
|
||||
|
||||
CodePages = CodePageManager(CAPABILITIES['encodings'])
|
|
@ -17,6 +17,7 @@ from __future__ import division
|
|||
from __future__ import print_function
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from builtins import bytes
|
||||
from .constants import CODEPAGE_CHANGE
|
||||
from .exceptions import CharCodeError, Error
|
||||
from .capabilities import get_profile
|
||||
|
@ -45,19 +46,20 @@ class Encoder(object):
|
|||
def __init__(self, codepage_map):
|
||||
self.codepages = codepage_map
|
||||
self.available_encodings = set(codepage_map.keys())
|
||||
self.available_characters = {}
|
||||
self.used_encodings = set()
|
||||
|
||||
def get_sequence(self, encoding):
|
||||
return int(self.codepages[encoding])
|
||||
|
||||
def get_encoding(self, encoding):
|
||||
def get_encoding_name(self, encoding):
|
||||
"""Given an encoding provided by the user, will return a
|
||||
canonical encoding name; and also validate that the encoding
|
||||
is supported.
|
||||
|
||||
TODO: Support encoding aliases: pc437 instead of cp437.
|
||||
"""
|
||||
encoding = CodePages.get_encoding(encoding)
|
||||
encoding = CodePages.get_encoding_name(encoding)
|
||||
if not encoding in self.codepages:
|
||||
raise ValueError((
|
||||
'Encoding "{}" cannot be used for the current profile. '
|
||||
|
@ -65,18 +67,87 @@ class Encoder(object):
|
|||
).format(encoding, ','.join(self.codepages.keys())))
|
||||
return encoding
|
||||
|
||||
def _get_codepage_char_list(self, encoding):
|
||||
"""Get codepage character list
|
||||
|
||||
Gets characters 128-255 for a given code page, as an array.
|
||||
|
||||
:param encoding: The name of the encoding. This must appear in the CodePage list
|
||||
"""
|
||||
codepage = CodePages.get_encoding(encoding)
|
||||
if 'data' in codepage:
|
||||
encodable_chars = list("".join(codepage['data']))
|
||||
assert(len(encodable_chars) == 128)
|
||||
return encodable_chars
|
||||
elif 'python_encode' in codepage:
|
||||
encodable_chars = [u" "] * 128
|
||||
for i in range(0, 128):
|
||||
codepoint = i + 128
|
||||
try:
|
||||
encodable_chars[i] = bytes([codepoint]).decode(codepage['python_encode'])
|
||||
except UnicodeDecodeError:
|
||||
# Non-encodable character, just skip it
|
||||
pass
|
||||
return encodable_chars
|
||||
raise LookupError("Can't find a known encoding for {}".format(encoding))
|
||||
|
||||
def _get_codepage_char_map(self, encoding):
|
||||
""" Get codepage character map
|
||||
|
||||
Process an encoding and return a map of UTF-characters to code points
|
||||
in this encoding.
|
||||
|
||||
This is generated once only, and returned from a cache.
|
||||
|
||||
:param encoding: The name of the encoding.
|
||||
"""
|
||||
# Skip things that were loaded previously
|
||||
if encoding in self.available_characters:
|
||||
return self.available_characters[encoding]
|
||||
codepage_char_list = self._get_codepage_char_list(encoding)
|
||||
codepage_char_map = dict((utf8, i + 128) for (i, utf8) in enumerate(codepage_char_list))
|
||||
self.available_characters[encoding] = codepage_char_map
|
||||
return codepage_char_map
|
||||
|
||||
def can_encode(self, encoding, char):
|
||||
"""Determine if a character is encodeable in the given code page.
|
||||
|
||||
:param encoding: The name of the encoding.
|
||||
:param char: The character to attempt to encode.
|
||||
"""
|
||||
available_map = {}
|
||||
try:
|
||||
encoded = CodePages.encode(char, encoding)
|
||||
assert type(encoded) is bytes
|
||||
return encoded
|
||||
available_map = self._get_codepage_char_map(encoding)
|
||||
except LookupError:
|
||||
# We don't have this encoding
|
||||
return False
|
||||
except UnicodeEncodeError:
|
||||
return False
|
||||
|
||||
return True
|
||||
# Decide whether this character is encodeable in this code page
|
||||
is_ascii = ord(char) < 128
|
||||
is_encodable = char in available_map
|
||||
return is_ascii or is_encodable
|
||||
|
||||
def _encode_char(self, char, charmap, defaultchar):
|
||||
""" Encode a single character with the given encoding map
|
||||
|
||||
:param char: char to encode
|
||||
:param charmap: dictionary for mapping characters in this code page
|
||||
"""
|
||||
if ord(char) < 128:
|
||||
return ord(char)
|
||||
if char in charmap:
|
||||
return charmap[char]
|
||||
return ord(defaultchar)
|
||||
|
||||
def encode(self, text, encoding, defaultchar='?'):
|
||||
""" Encode text under the given encoding
|
||||
|
||||
:param text: Text to encode
|
||||
:param encoding: Encoding name to use (must be defined in capabilities)
|
||||
:param defaultchar: Fallback for non-encodable characters
|
||||
"""
|
||||
codepage_char_map = self._get_codepage_char_map(encoding)
|
||||
output_bytes = bytes([self._encode_char(char, codepage_char_map, defaultchar) for char in text])
|
||||
return output_bytes
|
||||
|
||||
def __encoding_sort_func(self, item):
|
||||
key, index = item
|
||||
|
@ -151,7 +222,7 @@ class MagicEncode(object):
|
|||
self.driver = driver
|
||||
self.encoder = encoder or Encoder(driver.profile.get_code_pages())
|
||||
|
||||
self.encoding = self.encoder.get_encoding(encoding) if encoding else None
|
||||
self.encoding = self.encoder.get_encoding_name(encoding) if encoding else None
|
||||
self.defaultsymbol = defaultsymbol
|
||||
self.disabled = disabled
|
||||
|
||||
|
@ -216,4 +287,4 @@ class MagicEncode(object):
|
|||
six.int2byte(self.encoder.get_sequence(encoding)))
|
||||
|
||||
if text:
|
||||
self.driver._raw(CodePages.encode(text, encoding, errors="replace"))
|
||||
self.driver._raw(self.encoder.encode(text, encoding))
|
||||
|
|
|
@ -20,7 +20,7 @@ from escpos.printer import Dummy
|
|||
|
||||
|
||||
def get_printer():
|
||||
return Dummy(magic_encode_args={'disabled': True, 'encoding': 'cp437'})
|
||||
return Dummy(magic_encode_args={'disabled': True, 'encoding': 'CP437'})
|
||||
|
||||
|
||||
@given(text=st.text())
|
||||
|
|
|
@ -26,17 +26,17 @@ from escpos.exceptions import CharCodeError, Error
|
|||
class TestEncoder:
|
||||
|
||||
def test_can_encode(self):
|
||||
assert not Encoder({'cp437': 1}).can_encode('cp437', u'€')
|
||||
assert Encoder({'cp437': 1}).can_encode('cp437', u'á')
|
||||
assert not Encoder({'CP437': 1}).can_encode('CP437', u'€')
|
||||
assert Encoder({'CP437': 1}).can_encode('CP437', u'á')
|
||||
assert not Encoder({'foobar': 1}).can_encode('foobar', 'a')
|
||||
|
||||
def test_find_suitable_encoding(self):
|
||||
assert not Encoder({'cp437': 1}).find_suitable_encoding(u'€')
|
||||
assert Encoder({'cp858': 1}).find_suitable_encoding(u'€') == 'cp858'
|
||||
assert not Encoder({'CP437': 1}).find_suitable_encoding(u'€')
|
||||
assert Encoder({'CP858': 1}).find_suitable_encoding(u'€') == 'CP858'
|
||||
|
||||
@raises(ValueError)
|
||||
def test_get_encoding(self):
|
||||
Encoder({}).get_encoding('latin1')
|
||||
Encoder({}).get_encoding_name('latin1')
|
||||
|
||||
|
||||
class TestMagicEncode:
|
||||
|
@ -51,17 +51,17 @@ class TestMagicEncode:
|
|||
|
||||
def test_init_from_none(self, driver):
|
||||
encode = MagicEncode(driver, encoding=None)
|
||||
encode.write_with_encoding('cp858', '€ ist teuro.')
|
||||
encode.write_with_encoding('CP858', '€ ist teuro.')
|
||||
assert driver.output == b'\x1bt\x13\xd5 ist teuro.'
|
||||
|
||||
def test_change_from_another(self, driver):
|
||||
encode = MagicEncode(driver, encoding='cp437')
|
||||
encode.write_with_encoding('cp858', '€ ist teuro.')
|
||||
encode = MagicEncode(driver, encoding='CP437')
|
||||
encode.write_with_encoding('CP858', '€ ist teuro.')
|
||||
assert driver.output == b'\x1bt\x13\xd5 ist teuro.'
|
||||
|
||||
def test_no_change(self, driver):
|
||||
encode = MagicEncode(driver, encoding='cp858')
|
||||
encode.write_with_encoding('cp858', '€ ist teuro.')
|
||||
encode = MagicEncode(driver, encoding='CP858')
|
||||
encode.write_with_encoding('CP858', '€ ist teuro.')
|
||||
assert driver.output == b'\xd5 ist teuro.'
|
||||
|
||||
class TestWrite:
|
||||
|
@ -72,14 +72,14 @@ class TestMagicEncode:
|
|||
assert driver.output == b'\x1bt\x0f\xa4 ist teuro.'
|
||||
|
||||
def test_write_disabled(self, driver):
|
||||
encode = MagicEncode(driver, encoding='cp437', disabled=True)
|
||||
encode = MagicEncode(driver, encoding='CP437', disabled=True)
|
||||
encode.write('€ ist teuro.')
|
||||
assert driver.output == b'? ist teuro.'
|
||||
|
||||
def test_write_no_codepage(self, driver):
|
||||
encode = MagicEncode(
|
||||
driver, defaultsymbol="_", encoder=Encoder({'cp437': 1}),
|
||||
encoding='cp437')
|
||||
driver, defaultsymbol="_", encoder=Encoder({'CP437': 1}),
|
||||
encoding='CP437')
|
||||
encode.write(u'€ ist teuro.')
|
||||
assert driver.output == b'_ ist teuro.'
|
||||
|
||||
|
@ -87,7 +87,7 @@ class TestMagicEncode:
|
|||
|
||||
def test(self, driver):
|
||||
encode = MagicEncode(driver)
|
||||
encode.force_encoding('cp437')
|
||||
encode.force_encoding('CP437')
|
||||
assert driver.output == b'\x1bt\x00'
|
||||
|
||||
encode.write('€ ist teuro.')
|
||||
|
|
Loading…
Reference in New Issue