2016-07-23 20:16:11 +00:00
|
|
|
#!/usr/bin/python
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
""" Magic Encode
|
|
|
|
|
|
|
|
This module tries to convert an UTF-8 string to an encoded string for the printer.
|
|
|
|
It uses trial and error in order to guess the right codepage.
|
|
|
|
The code is based on the encoding-code in py-xml-escpos by @fvdsn.
|
|
|
|
|
|
|
|
:author: `Patrick Kanzler <dev@pkanzler.de>`_
|
|
|
|
:organization: `python-escpos <https://github.com/python-escpos>`_
|
|
|
|
:copyright: Copyright (c) 2016 Patrick Kanzler and Frédéric van der Essen
|
|
|
|
:license: GNU GPL v3
|
|
|
|
"""
|
|
|
|
|
|
|
|
from __future__ import absolute_import
|
|
|
|
from __future__ import division
|
|
|
|
from __future__ import print_function
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
2016-08-27 09:09:08 +00:00
|
|
|
from .constants import CODEPAGE_CHANGE
|
2016-07-23 20:16:11 +00:00
|
|
|
from .exceptions import CharCodeError, Error
|
2016-08-27 09:09:08 +00:00
|
|
|
from .capabilities import get_profile
|
2016-08-30 15:05:31 +00:00
|
|
|
from .codepages import CodePages
|
2016-07-23 20:16:11 +00:00
|
|
|
import copy
|
|
|
|
import six
|
|
|
|
|
2016-08-27 09:09:08 +00:00
|
|
|
|
|
|
|
class Encoder(object):
|
|
|
|
"""Takes a list of available code spaces. Picks the right one for a
|
|
|
|
given character.
|
|
|
|
|
2016-08-30 15:05:31 +00:00
|
|
|
Note: To determine the code page, it needs to do the conversion, and
|
2016-08-27 09:09:08 +00:00
|
|
|
thus already knows what the final byte in the target encoding would
|
|
|
|
be. Nevertheless, the API of this class doesn't return the byte.
|
|
|
|
|
|
|
|
The caller use to do the character conversion itself.
|
|
|
|
|
|
|
|
$ python -m timeit -s "{u'ö':'a'}.get(u'ö')"
|
|
|
|
100000000 loops, best of 3: 0.0133 usec per loop
|
|
|
|
|
|
|
|
$ python -m timeit -s "u'ö'.encode('latin1')"
|
|
|
|
100000000 loops, best of 3: 0.0141 usec per loop
|
|
|
|
"""
|
|
|
|
|
2016-08-30 15:05:31 +00:00
|
|
|
def __init__(self, codepage_map):
|
|
|
|
self.codepages = codepage_map
|
|
|
|
self.available_encodings = set(codepage_map.keys())
|
2016-08-27 09:09:08 +00:00
|
|
|
self.used_encodings = set()
|
|
|
|
|
|
|
|
def get_sequence(self, encoding):
|
2016-08-30 15:05:31 +00:00
|
|
|
return int(self.codepages[encoding])
|
2016-08-27 09:09:08 +00:00
|
|
|
|
|
|
|
def get_encoding(self, encoding):
|
2016-08-30 15:05:31 +00:00
|
|
|
"""Given an encoding provided by the user, will return a
|
|
|
|
canonical encoding name; and also validate that the encoding
|
|
|
|
is supported.
|
2016-08-27 09:09:08 +00:00
|
|
|
|
2016-08-30 15:05:31 +00:00
|
|
|
TOOD: Support encoding aliases.
|
2016-08-27 09:09:08 +00:00
|
|
|
"""
|
2016-08-30 15:05:31 +00:00
|
|
|
encoding = CodePages.get_encoding(encoding)
|
|
|
|
if not encoding in self.codepages:
|
|
|
|
raise ValueError((
|
|
|
|
'Encoding "{}" cannot be used for the current profile. '
|
|
|
|
'Valid encodings are: {}'
|
|
|
|
).format(encoding, ','.join(self.codepages.keys())))
|
2016-08-27 09:09:08 +00:00
|
|
|
return encoding
|
|
|
|
|
|
|
|
def can_encode(self, encoding, char):
|
|
|
|
try:
|
2016-08-30 15:05:31 +00:00
|
|
|
encoded = CodePages.encode(char, encoding)
|
2016-08-27 09:09:08 +00:00
|
|
|
assert type(encoded) is bytes
|
|
|
|
return encoded
|
|
|
|
except LookupError:
|
|
|
|
# We don't have this encoding
|
|
|
|
return False
|
|
|
|
except UnicodeEncodeError:
|
|
|
|
return False
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
2016-08-30 15:05:31 +00:00
|
|
|
def find_suitable_encoding(self, char):
|
2016-08-27 09:09:08 +00:00
|
|
|
"""The order of our search is a specific one:
|
|
|
|
|
|
|
|
1. code pages that we already tried before; there is a good
|
|
|
|
chance they might work again, reducing the search space,
|
|
|
|
and by re-using already used encodings we might also
|
|
|
|
reduce the number of codepage change instructiosn we have
|
|
|
|
to send. Still, any performance gains will presumably be
|
|
|
|
fairly minor.
|
|
|
|
|
|
|
|
2. code pages in lower ESCPOS slots first. Presumably, they
|
|
|
|
are more likely to be supported, so if a printer profile
|
|
|
|
is missing or incomplete, we might increase our change
|
|
|
|
that the code page we pick for this character is actually
|
|
|
|
supported.
|
|
|
|
|
2016-08-30 15:05:31 +00:00
|
|
|
# TODO actually do speed up the search
|
|
|
|
"""
|
2016-08-27 09:09:08 +00:00
|
|
|
"""
|
2016-08-30 15:05:31 +00:00
|
|
|
- remove the ones not supported
|
|
|
|
- order by used first, then others
|
|
|
|
- do not use a cache, because encode already is so fast
|
|
|
|
"""
|
|
|
|
sorted_encodings = self.codepages.keys()
|
|
|
|
|
|
|
|
for encoding in sorted_encodings:
|
2016-08-27 09:09:08 +00:00
|
|
|
if self.can_encode(encoding, char):
|
|
|
|
# This encoding worked; at it to the set of used ones.
|
|
|
|
self.used_encodings.add(encoding)
|
|
|
|
return encoding
|
|
|
|
|
|
|
|
|
2016-07-23 20:16:11 +00:00
|
|
|
class MagicEncode(object):
|
2016-08-30 15:05:31 +00:00
|
|
|
"""A helper that helps us to automatically switch to the right
|
|
|
|
code page to encode any given Unicode character.
|
|
|
|
|
|
|
|
This will consider the printers supported codepages, according
|
|
|
|
to the printer profile, and if a character cannot be encoded
|
|
|
|
with the current profile, it will attempt to find a suitable one.
|
2016-07-23 20:16:11 +00:00
|
|
|
|
2016-08-30 15:05:31 +00:00
|
|
|
If the printer does not support a suitable code page, it can
|
|
|
|
insert an error character.
|
2016-08-27 09:09:08 +00:00
|
|
|
|
2016-08-30 15:05:31 +00:00
|
|
|
:param encoding: If you know the current encoding of the printer
|
|
|
|
when initializing this class, set it here. If the current
|
|
|
|
encoding is unknown, the first character emitted will be a
|
|
|
|
codepage switch.
|
2016-07-23 20:16:11 +00:00
|
|
|
"""
|
2016-08-27 09:09:08 +00:00
|
|
|
def __init__(self, driver, encoding=None, disabled=False,
|
|
|
|
defaultsymbol='?', encoder=None):
|
|
|
|
if disabled and not encoding:
|
|
|
|
raise Error('If you disable magic encode, you need to define an encoding!')
|
2016-07-23 20:16:11 +00:00
|
|
|
|
2016-08-27 09:09:08 +00:00
|
|
|
self.driver = driver
|
2016-08-30 15:05:31 +00:00
|
|
|
self.encoder = encoder or Encoder(driver.profile.get_code_pages())
|
2016-07-23 20:16:11 +00:00
|
|
|
|
2016-08-27 09:09:08 +00:00
|
|
|
self.encoding = self.encoder.get_encoding(encoding) if encoding else None
|
|
|
|
self.defaultsymbol = defaultsymbol
|
|
|
|
self.disabled = disabled
|
2016-07-23 20:16:11 +00:00
|
|
|
|
2016-08-27 09:09:08 +00:00
|
|
|
def force_encoding(self, encoding):
|
|
|
|
"""Sets a fixed encoding. The change is emitted right away.
|
2016-07-23 20:16:11 +00:00
|
|
|
|
2016-08-27 09:09:08 +00:00
|
|
|
From now one, this buffer will switch the code page anymore.
|
|
|
|
However, it will still keep track of the current code page.
|
2016-07-23 20:16:11 +00:00
|
|
|
"""
|
2016-08-27 09:09:08 +00:00
|
|
|
if not encoding:
|
|
|
|
self.disabled = False
|
|
|
|
else:
|
|
|
|
self.write_with_encoding(encoding, None)
|
|
|
|
self.disabled = True
|
2016-07-23 20:16:11 +00:00
|
|
|
|
2016-08-27 09:09:08 +00:00
|
|
|
def write(self, text):
|
|
|
|
"""Write the text, automatically switching encodings.
|
2016-07-23 20:16:11 +00:00
|
|
|
"""
|
|
|
|
|
2016-08-27 09:09:08 +00:00
|
|
|
if self.disabled:
|
|
|
|
self.write_with_encoding(self.encoding, text)
|
|
|
|
return
|
2016-07-23 20:16:11 +00:00
|
|
|
|
2016-08-27 09:09:08 +00:00
|
|
|
# TODO: Currently this very simple loop means we send every
|
|
|
|
# character individually to the printer. We can probably
|
|
|
|
# improve performace by searching the text for the first
|
|
|
|
# character that cannot be rendered using the current code
|
|
|
|
# page, and then sending all of those characters at once.
|
|
|
|
# Or, should a lower-level buffer be responsible for that?
|
2016-07-23 20:16:11 +00:00
|
|
|
|
2016-08-27 09:09:08 +00:00
|
|
|
for char in text:
|
|
|
|
# See if the current code page works for this character.
|
|
|
|
# The encoder object will use a cache to be able to answer
|
|
|
|
# this question fairly easily.
|
|
|
|
if self.encoding and self.encoder.can_encode(self.encoding, char):
|
|
|
|
self.write_with_encoding(self.encoding, char)
|
|
|
|
continue
|
2016-07-23 20:16:11 +00:00
|
|
|
|
2016-08-27 09:09:08 +00:00
|
|
|
# We have to find another way to print this character.
|
|
|
|
# See if any of the code pages that the printer profile supports
|
|
|
|
# can encode this character.
|
2016-08-30 15:05:31 +00:00
|
|
|
encoding = self.encoder.find_suitable_encoding(char)
|
|
|
|
if not encoding:
|
2016-08-27 09:09:08 +00:00
|
|
|
self._handle_character_failed(char)
|
|
|
|
continue
|
2016-07-23 20:16:11 +00:00
|
|
|
|
2016-08-30 15:05:31 +00:00
|
|
|
self.write_with_encoding(encoding, char)
|
2016-08-27 09:09:08 +00:00
|
|
|
|
|
|
|
def _handle_character_failed(self, char):
|
|
|
|
"""Called when no codepage was found to render a character.
|
2016-07-23 20:16:11 +00:00
|
|
|
"""
|
2016-08-27 09:09:08 +00:00
|
|
|
# Writing the default symbol via write() allows us to avoid
|
|
|
|
# unnecesary codepage switches.
|
|
|
|
self.write(self.defaultsymbol)
|
2016-07-23 20:16:11 +00:00
|
|
|
|
2016-08-27 09:09:08 +00:00
|
|
|
def write_with_encoding(self, encoding, text):
|
|
|
|
if text is not None and type(text) is not six.text_type:
|
|
|
|
raise Error("The supplied text has to be unicode, but is of type {type}.".format(
|
|
|
|
type=type(text)
|
|
|
|
))
|
2016-07-23 20:16:11 +00:00
|
|
|
|
2016-08-27 09:09:08 +00:00
|
|
|
# We always know the current code page; if the new codepage
|
|
|
|
# is different, emit a change command.
|
|
|
|
if encoding != self.encoding:
|
|
|
|
self.encoding = encoding
|
|
|
|
self.driver._raw(b'{}{}'.format(
|
|
|
|
CODEPAGE_CHANGE,
|
|
|
|
six.int2byte(self.encoder.get_sequence(encoding))
|
|
|
|
))
|
2016-07-23 20:16:11 +00:00
|
|
|
|
2016-08-27 09:09:08 +00:00
|
|
|
if text:
|
2016-08-30 15:05:31 +00:00
|
|
|
self.driver._raw(CodePages.encode(text, encoding, errors="replace"))
|