re-work encoder to consult the capabilities database and use custom code pages or python encoder as necessary

This commit is contained in:
Michael Billington 2016-09-11 21:03:55 +10:00
parent d9a6960f07
commit 9a65945fcd
4 changed files with 50 additions and 32 deletions

View File

@ -58,7 +58,7 @@ class BaseProfile(object):
def get_code_pages(self): def get_code_pages(self):
"""Return the support code pages as a {name: index} dict. """Return the support code pages as a {name: index} dict.
""" """
return {v.lower(): k for k, v in self.codePages.items()} return {v: k for k, v in self.codePages.items()}

View File

@ -12,21 +12,11 @@ class CodePageManager:
def get_all(self): def get_all(self):
return self.data.values() return self.data.values()
def encode(self, text, encoding, errors='strict'): def get_encoding_name(self, encoding):
"""Adds support for Japanese to the builtin str.encode(). # TODO resolve the encoding alias
return encoding.upper()
TODO: Add support for custom code page data from
escpos-printer-db.
"""
# Python has not have this builtin?
if encoding.upper() == 'KATAKANA':
return encode_katakana(text)
return text.encode(encoding, errors=errors)
def get_encoding(self, encoding): def get_encoding(self, encoding):
# resolve the encoding alias return self.data[encoding]
return encoding.lower()
CodePages = CodePageManager(CAPABILITIES['encodings']) CodePages = CodePageManager(CAPABILITIES['encodings'])

View File

@ -52,14 +52,14 @@ class Encoder(object):
def get_sequence(self, encoding): def get_sequence(self, encoding):
return int(self.codepages[encoding]) return int(self.codepages[encoding])
def get_encoding(self, encoding): def get_encoding_name(self, encoding):
"""Given an encoding provided by the user, will return a """Given an encoding provided by the user, will return a
canonical encoding name; and also validate that the encoding canonical encoding name; and also validate that the encoding
is supported. is supported.
TODO: Support encoding aliases: pc437 instead of cp437. TODO: Support encoding aliases: pc437 instead of cp437.
""" """
encoding = CodePages.get_encoding(encoding) encoding = CodePages.get_encoding_name(encoding)
if not encoding in self.codepages: if not encoding in self.codepages:
raise ValueError(( raise ValueError((
'Encoding "{}" cannot be used for the current profile. ' 'Encoding "{}" cannot be used for the current profile. '
@ -72,19 +72,24 @@ class Encoder(object):
Gets characters 128-255 for a given code page, as an array. Gets characters 128-255 for a given code page, as an array.
:param encoding: The name of the encoding. This must be a valid python encoding. :param encoding: The name of the encoding. This must appear in the CodePage list
""" """
# Compute the encodable characters as an array (this is the format codepage = CodePages.get_encoding(encoding)
# that for non-standard codings come in) if 'data' in codepage:
encodable_chars = [u" "] * 128 encodable_chars = list("".join(codepage['data']))
for i in range(0, 128): assert(len(encodable_chars) == 128)
codepoint = i + 128 return encodable_chars
try: elif 'python_encode' in codepage:
encodable_chars[i] = bytes([codepoint]).decode(encoding) encodable_chars = [u" "] * 128
except UnicodeDecodeError: for i in range(0, 128):
# Non-encodable character codepoint = i + 128
pass try:
return encodable_chars encodable_chars[i] = bytes([codepoint]).decode(codepage['python_encode'])
except UnicodeDecodeError:
# Non-encodable character, just skip it
pass
return encodable_chars
raise LookupError("Can't find a known encoding for {}".format(encoding))
def _get_codepage_char_map(self, encoding): def _get_codepage_char_map(self, encoding):
""" Get codepage character map """ Get codepage character map
@ -121,6 +126,29 @@ class Encoder(object):
is_encodable = char in available_map is_encodable = char in available_map
return is_ascii or is_encodable return is_ascii or is_encodable
def _encode_char(self, char, charmap):
""" Encode a single character with the given encoding map
:param char: char to encode
:param charmap: dictionary for mapping characters in this code page
"""
if char in charmap:
return charmap[char]
if ord(char) < 128:
return ord(char)
return ord('?')
def encode(self, text, encoding, defaultchar='?'):
""" Encode text under the given encoding
:param text: Text to encode
:param encoding: Encoding name to use (must be defined in capabilities)
:param defaultchar: Fallback for non-encodable characters
"""
codepage_char_map = self.available_characters[encoding]
output_bytes = bytes([self._encode_char(char, codepage_char_map, defaultchar) for char in text])
return output_bytes
def __encoding_sort_func(self, item): def __encoding_sort_func(self, item):
key, index = item key, index = item
return ( return (
@ -194,7 +222,7 @@ class MagicEncode(object):
self.driver = driver self.driver = driver
self.encoder = encoder or Encoder(driver.profile.get_code_pages()) self.encoder = encoder or Encoder(driver.profile.get_code_pages())
self.encoding = self.encoder.get_encoding(encoding) if encoding else None self.encoding = self.encoder.get_encoding_name(encoding) if encoding else None
self.defaultsymbol = defaultsymbol self.defaultsymbol = defaultsymbol
self.disabled = disabled self.disabled = disabled
@ -259,4 +287,4 @@ class MagicEncode(object):
six.int2byte(self.encoder.get_sequence(encoding))) six.int2byte(self.encoder.get_sequence(encoding)))
if text: if text:
self.driver._raw(CodePages.encode(text, encoding, errors="replace")) self.driver._raw(self.encoder.encode(text, encoding))

View File

@ -36,7 +36,7 @@ class TestEncoder:
@raises(ValueError) @raises(ValueError)
def test_get_encoding(self): def test_get_encoding(self):
Encoder({}).get_encoding('latin1') Encoder({}).get_encoding_name('latin1')
class TestMagicEncode: class TestMagicEncode: