python-escpos/src/escpos/magicencode.py

251 lines
8.2 KiB
Python
Raw Normal View History

#!/usr/bin/python
# -*- coding: utf-8 -*-
""" Magic Encode
This module tries to convert an UTF-8 string to an encoded string for the printer.
It uses trial and error in order to guess the right codepage.
The code is based on the encoding-code in py-xml-escpos by @fvdsn.
:author: `Patrick Kanzler <dev@pkanzler.de>`_
:organization: `python-escpos <https://github.com/python-escpos>`_
:copyright: Copyright (c) 2016 Patrick Kanzler and Frédéric van der Essen
:license: GNU GPL v3
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from .constants import CHARCODE
from .exceptions import CharCodeError, Error
import copy
import six
try:
import jcconv
except ImportError:
jcconv = None
class MagicEncode(object):
""" Magic Encode Class
It tries to automatically encode utf-8 input into the right coding. When encoding is impossible a configurable
symbol will be inserted.
"""
def __init__(self, startencoding='PC437', force_encoding=False, defaultsymbol=b'', defaultencoding='PC437'):
# running these functions makes sure that the encoding is suitable
MagicEncode.codepage_name(startencoding)
MagicEncode.codepage_name(defaultencoding)
self.encoding = startencoding
self.defaultsymbol = defaultsymbol
if type(self.defaultsymbol) is not six.binary_type:
raise Error("The supplied symbol {sym} has to be a binary string".format(sym=defaultsymbol))
self.defaultencoding = defaultencoding
self.force_encoding = force_encoding
def set_encoding(self, encoding='PC437', force_encoding=False):
"""sets an encoding (normally not used)
This function should normally not be used since it manipulates the automagic behaviour. However, if you want to
force a certain codepage, then you can use this function.
:param encoding: must be a valid encoding from CHARCODE
:param force_encoding: whether the encoding should not be changed automatically
"""
self.codepage_name(encoding)
self.encoding = encoding
self.force_encoding = force_encoding
@staticmethod
def codepage_sequence(codepage):
"""returns the corresponding codepage-sequence"""
try:
return CHARCODE[codepage][0]
except KeyError:
raise CharCodeError("The encoding {enc} is unknown.".format(enc=codepage))
@staticmethod
def codepage_name(codepage):
"""returns the corresponding codepage-name (for python)"""
try:
name = CHARCODE[codepage][1]
if name == '':
raise CharCodeError("The codepage {enc} does not have a connected python-codepage".format(enc=codepage))
return name
except KeyError:
raise CharCodeError("The encoding {enc} is unknown.".format(enc=codepage))
def encode_char(self, char):
"""
Encodes a single unicode character into a sequence of
esc-pos code page change instructions and character declarations
"""
if type(char) is not six.text_type:
raise Error("The supplied text has to be unicode, but is of type {type}.".format(
type=type(char)
))
encoded = b''
encoding = self.encoding # we reuse the last encoding to prevent code page switches at every character
remaining = copy.copy(CHARCODE)
while True: # Trying all encoding until one succeeds
try:
if encoding == 'KATAKANA': # Japanese characters
if jcconv:
# try to convert japanese text to half-katakanas
kata = jcconv.kata2half(jcconv.hira2kata(char))
if kata != char:
self.extra_chars += len(kata) - 1
# the conversion may result in multiple characters
return self.encode_str(kata)
else:
kata = char
if kata in TXT_ENC_KATAKANA_MAP:
encoded = TXT_ENC_KATAKANA_MAP[kata]
break
else:
raise ValueError()
else:
try:
enc_name = MagicEncode.codepage_name(encoding)
encoded = char.encode(enc_name)
assert type(encoded) is bytes
except LookupError:
raise ValueError("The encoding {enc} seems to not exist in Python".format(enc=encoding))
except CharCodeError:
raise ValueError("The encoding {enc} is not fully configured in constants".format(
enc=encoding
))
break
except ValueError: # the encoding failed, select another one and retry
if encoding in remaining:
del remaining[encoding]
if len(remaining) >= 1:
encoding = list(remaining)[0]
else:
encoding = self.defaultencoding
encoded = self.defaultsymbol # could not encode, output error character
break
if encoding != self.encoding:
# if the encoding changed, remember it and prefix the character with
# the esc-pos encoding change sequence
self.encoding = encoding
encoded = CHARCODE[encoding][0] + encoded
return encoded
def encode_str(self, txt):
# make sure the right codepage is set in the printer
buffer = self.codepage_sequence(self.encoding)
if self.force_encoding:
buffer += txt.encode(self.codepage_name(self.encoding))
else:
for c in txt:
buffer += self.encode_char(c)
return buffer
def encode_text(self, txt):
"""returns a byte-string with encoded text
:param txt: text that shall be encoded
:return: byte-string for the printer
"""
if not txt:
return
self.extra_chars = 0
txt = self.encode_str(txt)
# if the utf-8 -> codepage conversion inserted extra characters,
# remove double spaces to try to restore the original string length
# and prevent printing alignment issues
while self.extra_chars > 0:
dspace = txt.find(' ')
if dspace > 0:
txt = txt[:dspace] + txt[dspace+1:]
self.extra_chars -= 1
else:
break
return txt
# todo emoticons mit charmap encoden
# todo Escpos liste von unterdrückten charcodes mitgeben
# TODO Sichtbarkeit der Methode anpassen (Eigentlich braucht man nur die set_encode und die encode_text)
TXT_ENC_KATAKANA_MAP = {
# Maps UTF-8 Katakana symbols to KATAKANA Page Codes
# Half-Width Katakanas
'': b'\xa1',
'': b'\xa2',
'': b'\xa3',
'': b'\xa4',
'': b'\xa5',
'': b'\xa6',
'': b'\xa7',
'': b'\xa8',
'': b'\xa9',
'': b'\xaa',
'': b'\xab',
'': b'\xac',
'': b'\xad',
'': b'\xae',
'': b'\xaf',
'': b'\xb0',
'': b'\xb1',
'': b'\xb2',
'': b'\xb3',
'': b'\xb4',
'': b'\xb5',
'': b'\xb6',
'': b'\xb7',
'': b'\xb8',
'': b'\xb9',
'': b'\xba',
'': b'\xbb',
'': b'\xbc',
'': b'\xbd',
'': b'\xbe',
'ソ': b'\xbf',
'': b'\xc0',
'': b'\xc1',
'': b'\xc2',
'': b'\xc3',
'': b'\xc4',
'': b'\xc5',
'': b'\xc6',
'': b'\xc7',
'': b'\xc8',
'': b'\xc9',
'': b'\xca',
'': b'\xcb',
'': b'\xcc',
'': b'\xcd',
'': b'\xce',
'': b'\xcf',
'': b'\xd0',
'': b'\xd1',
'': b'\xd2',
'': b'\xd3',
'': b'\xd4',
'': b'\xd5',
'': b'\xd6',
'': b'\xd7',
'': b'\xd8',
'': b'\xd9',
'': b'\xda',
'': b'\xdb',
'': b'\xdc',
'': b'\xdd',
'': b'\xde',
'': b'\xdf',
}