2016-07-23 20:16:11 +00:00
|
|
|
#!/usr/bin/python
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
""" Magic Encode
|
|
|
|
|
|
|
|
This module tries to convert an UTF-8 string to an encoded string for the printer.
|
|
|
|
It uses trial and error in order to guess the right codepage.
|
|
|
|
The code is based on the encoding-code in py-xml-escpos by @fvdsn.
|
|
|
|
|
|
|
|
:author: `Patrick Kanzler <dev@pkanzler.de>`_
|
|
|
|
:organization: `python-escpos <https://github.com/python-escpos>`_
|
|
|
|
:copyright: Copyright (c) 2016 Patrick Kanzler and Frédéric van der Essen
|
|
|
|
:license: GNU GPL v3
|
|
|
|
"""
|
|
|
|
|
|
|
|
from __future__ import absolute_import
|
|
|
|
from __future__ import division
|
|
|
|
from __future__ import print_function
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
|
|
from .constants import CHARCODE
|
|
|
|
from .exceptions import CharCodeError, Error
|
|
|
|
import copy
|
|
|
|
import six
|
|
|
|
|
|
|
|
try:
|
|
|
|
import jcconv
|
|
|
|
except ImportError:
|
|
|
|
jcconv = None
|
|
|
|
|
|
|
|
class MagicEncode(object):
|
|
|
|
""" Magic Encode Class
|
|
|
|
|
|
|
|
It tries to automatically encode utf-8 input into the right coding. When encoding is impossible a configurable
|
|
|
|
symbol will be inserted.
|
|
|
|
"""
|
|
|
|
def __init__(self, startencoding='PC437', force_encoding=False, defaultsymbol=b'', defaultencoding='PC437'):
|
|
|
|
# running these functions makes sure that the encoding is suitable
|
|
|
|
MagicEncode.codepage_name(startencoding)
|
|
|
|
MagicEncode.codepage_name(defaultencoding)
|
|
|
|
|
|
|
|
self.encoding = startencoding
|
|
|
|
self.defaultsymbol = defaultsymbol
|
|
|
|
if type(self.defaultsymbol) is not six.binary_type:
|
|
|
|
raise Error("The supplied symbol {sym} has to be a binary string".format(sym=defaultsymbol))
|
|
|
|
self.defaultencoding = defaultencoding
|
|
|
|
self.force_encoding = force_encoding
|
|
|
|
|
|
|
|
def set_encoding(self, encoding='PC437', force_encoding=False):
|
|
|
|
"""sets an encoding (normally not used)
|
|
|
|
|
|
|
|
This function should normally not be used since it manipulates the automagic behaviour. However, if you want to
|
|
|
|
force a certain codepage, then you can use this function.
|
|
|
|
|
|
|
|
:param encoding: must be a valid encoding from CHARCODE
|
|
|
|
:param force_encoding: whether the encoding should not be changed automatically
|
|
|
|
"""
|
|
|
|
self.codepage_name(encoding)
|
|
|
|
self.encoding = encoding
|
|
|
|
self.force_encoding = force_encoding
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def codepage_sequence(codepage):
|
|
|
|
"""returns the corresponding codepage-sequence"""
|
|
|
|
try:
|
|
|
|
return CHARCODE[codepage][0]
|
|
|
|
except KeyError:
|
|
|
|
raise CharCodeError("The encoding {enc} is unknown.".format(enc=codepage))
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def codepage_name(codepage):
|
|
|
|
"""returns the corresponding codepage-name (for python)"""
|
|
|
|
try:
|
|
|
|
name = CHARCODE[codepage][1]
|
|
|
|
if name == '':
|
|
|
|
raise CharCodeError("The codepage {enc} does not have a connected python-codepage".format(enc=codepage))
|
|
|
|
return name
|
|
|
|
except KeyError:
|
|
|
|
raise CharCodeError("The encoding {enc} is unknown.".format(enc=codepage))
|
|
|
|
|
|
|
|
def encode_char(self, char):
|
|
|
|
"""
|
|
|
|
Encodes a single unicode character into a sequence of
|
|
|
|
esc-pos code page change instructions and character declarations
|
|
|
|
"""
|
|
|
|
if type(char) is not six.text_type:
|
|
|
|
raise Error("The supplied text has to be unicode, but is of type {type}.".format(
|
|
|
|
type=type(char)
|
|
|
|
))
|
|
|
|
encoded = b''
|
|
|
|
encoding = self.encoding # we reuse the last encoding to prevent code page switches at every character
|
|
|
|
remaining = copy.copy(CHARCODE)
|
|
|
|
|
|
|
|
while True: # Trying all encoding until one succeeds
|
|
|
|
try:
|
|
|
|
if encoding == 'KATAKANA': # Japanese characters
|
|
|
|
if jcconv:
|
|
|
|
# try to convert japanese text to half-katakanas
|
|
|
|
kata = jcconv.kata2half(jcconv.hira2kata(char))
|
|
|
|
if kata != char:
|
|
|
|
self.extra_chars += len(kata) - 1
|
|
|
|
# the conversion may result in multiple characters
|
|
|
|
return self.encode_str(kata)
|
|
|
|
else:
|
|
|
|
kata = char
|
|
|
|
|
|
|
|
if kata in TXT_ENC_KATAKANA_MAP:
|
|
|
|
encoded = TXT_ENC_KATAKANA_MAP[kata]
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
raise ValueError()
|
|
|
|
else:
|
|
|
|
try:
|
|
|
|
enc_name = MagicEncode.codepage_name(encoding)
|
|
|
|
encoded = char.encode(enc_name)
|
|
|
|
assert type(encoded) is bytes
|
|
|
|
except LookupError:
|
|
|
|
raise ValueError("The encoding {enc} seems to not exist in Python".format(enc=encoding))
|
|
|
|
except CharCodeError:
|
|
|
|
raise ValueError("The encoding {enc} is not fully configured in constants".format(
|
|
|
|
enc=encoding
|
|
|
|
))
|
|
|
|
break
|
|
|
|
|
|
|
|
except ValueError: # the encoding failed, select another one and retry
|
|
|
|
if encoding in remaining:
|
|
|
|
del remaining[encoding]
|
|
|
|
if len(remaining) >= 1:
|
|
|
|
encoding = list(remaining)[0]
|
|
|
|
else:
|
|
|
|
encoding = self.defaultencoding
|
|
|
|
encoded = self.defaultsymbol # could not encode, output error character
|
|
|
|
break
|
|
|
|
|
|
|
|
if encoding != self.encoding:
|
|
|
|
# if the encoding changed, remember it and prefix the character with
|
|
|
|
# the esc-pos encoding change sequence
|
|
|
|
self.encoding = encoding
|
|
|
|
encoded = CHARCODE[encoding][0] + encoded
|
|
|
|
|
|
|
|
return encoded
|
|
|
|
|
|
|
|
def encode_str(self, txt):
|
|
|
|
# make sure the right codepage is set in the printer
|
|
|
|
buffer = self.codepage_sequence(self.encoding)
|
|
|
|
if self.force_encoding:
|
2016-07-25 15:25:13 +00:00
|
|
|
buffer += txt.encode(self.codepage_name(self.encoding))
|
2016-07-23 20:16:11 +00:00
|
|
|
else:
|
|
|
|
for c in txt:
|
|
|
|
buffer += self.encode_char(c)
|
|
|
|
return buffer
|
|
|
|
|
|
|
|
def encode_text(self, txt):
|
|
|
|
"""returns a byte-string with encoded text
|
|
|
|
|
|
|
|
:param txt: text that shall be encoded
|
|
|
|
:return: byte-string for the printer
|
|
|
|
"""
|
|
|
|
if not txt:
|
|
|
|
return
|
|
|
|
|
|
|
|
self.extra_chars = 0
|
|
|
|
|
|
|
|
txt = self.encode_str(txt)
|
|
|
|
|
|
|
|
# if the utf-8 -> codepage conversion inserted extra characters,
|
|
|
|
# remove double spaces to try to restore the original string length
|
|
|
|
# and prevent printing alignment issues
|
|
|
|
while self.extra_chars > 0:
|
|
|
|
dspace = txt.find(' ')
|
|
|
|
if dspace > 0:
|
|
|
|
txt = txt[:dspace] + txt[dspace+1:]
|
|
|
|
self.extra_chars -= 1
|
|
|
|
else:
|
|
|
|
break
|
|
|
|
|
|
|
|
return txt
|
|
|
|
|
|
|
|
|
|
|
|
# todo emoticons mit charmap encoden
|
|
|
|
# todo Escpos liste von unterdrückten charcodes mitgeben
|
2016-07-25 15:25:13 +00:00
|
|
|
# TODO Sichtbarkeit der Methode anpassen (Eigentlich braucht man nur die set_encode und die encode_text)
|
2016-07-23 20:16:11 +00:00
|
|
|
|
|
|
|
TXT_ENC_KATAKANA_MAP = {
|
|
|
|
# Maps UTF-8 Katakana symbols to KATAKANA Page Codes
|
|
|
|
|
|
|
|
# Half-Width Katakanas
|
|
|
|
'。': b'\xa1',
|
|
|
|
'「': b'\xa2',
|
|
|
|
'」': b'\xa3',
|
|
|
|
'、': b'\xa4',
|
|
|
|
'・': b'\xa5',
|
|
|
|
'ヲ': b'\xa6',
|
|
|
|
'ァ': b'\xa7',
|
|
|
|
'ィ': b'\xa8',
|
|
|
|
'ゥ': b'\xa9',
|
|
|
|
'ェ': b'\xaa',
|
|
|
|
'ォ': b'\xab',
|
|
|
|
'ャ': b'\xac',
|
|
|
|
'ュ': b'\xad',
|
|
|
|
'ョ': b'\xae',
|
|
|
|
'ッ': b'\xaf',
|
|
|
|
'ー': b'\xb0',
|
|
|
|
'ア': b'\xb1',
|
|
|
|
'イ': b'\xb2',
|
|
|
|
'ウ': b'\xb3',
|
|
|
|
'エ': b'\xb4',
|
|
|
|
'オ': b'\xb5',
|
|
|
|
'カ': b'\xb6',
|
|
|
|
'キ': b'\xb7',
|
|
|
|
'ク': b'\xb8',
|
|
|
|
'ケ': b'\xb9',
|
|
|
|
'コ': b'\xba',
|
|
|
|
'サ': b'\xbb',
|
|
|
|
'シ': b'\xbc',
|
|
|
|
'ス': b'\xbd',
|
|
|
|
'セ': b'\xbe',
|
|
|
|
'ソ': b'\xbf',
|
|
|
|
'タ': b'\xc0',
|
|
|
|
'チ': b'\xc1',
|
|
|
|
'ツ': b'\xc2',
|
|
|
|
'テ': b'\xc3',
|
|
|
|
'ト': b'\xc4',
|
|
|
|
'ナ': b'\xc5',
|
|
|
|
'ニ': b'\xc6',
|
|
|
|
'ヌ': b'\xc7',
|
|
|
|
'ネ': b'\xc8',
|
|
|
|
'ノ': b'\xc9',
|
|
|
|
'ハ': b'\xca',
|
|
|
|
'ヒ': b'\xcb',
|
|
|
|
'フ': b'\xcc',
|
|
|
|
'ヘ': b'\xcd',
|
|
|
|
'ホ': b'\xce',
|
|
|
|
'マ': b'\xcf',
|
|
|
|
'ミ': b'\xd0',
|
|
|
|
'ム': b'\xd1',
|
|
|
|
'メ': b'\xd2',
|
|
|
|
'モ': b'\xd3',
|
|
|
|
'ヤ': b'\xd4',
|
|
|
|
'ユ': b'\xd5',
|
|
|
|
'ヨ': b'\xd6',
|
|
|
|
'ラ': b'\xd7',
|
|
|
|
'リ': b'\xd8',
|
|
|
|
'ル': b'\xd9',
|
|
|
|
'レ': b'\xda',
|
|
|
|
'ロ': b'\xdb',
|
|
|
|
'ワ': b'\xdc',
|
|
|
|
'ン': b'\xdd',
|
|
|
|
'゙': b'\xde',
|
|
|
|
'゚': b'\xdf',
|
|
|
|
}
|