# -*- coding: utf-8 -*- """Helpers to encode Japanese characters. I doubt that this currently works correctly. """ try: import jaconv except ImportError: jaconv = None def encode_katakana(text): """I don't think this quite works yet.""" encoded = [] for char in text: if jaconv: # try to convert japanese text to half-katakanas char = jaconv.z2h(jaconv.hira2kata(char)) # TODO: "the conversion may result in multiple characters" # If that really can happen (I am not really shure), than the string would have to be split and every single # character has to passed through the following lines. if char in TXT_ENC_KATAKANA_MAP: encoded.append(TXT_ENC_KATAKANA_MAP[char]) else: # TODO doesn't this discard all that is not in the map? Can we be sure that the input does contain only # encodable characters? We could at least throw an exception if encoding is not possible. pass return b"".join(encoded) TXT_ENC_KATAKANA_MAP = { # Maps UTF-8 Katakana symbols to KATAKANA Page Codes # TODO: has this really to be hardcoded? # Half-Width Katakanas "。": b"\xa1", "「": b"\xa2", "」": b"\xa3", "、": b"\xa4", "・": b"\xa5", "ヲ": b"\xa6", "ァ": b"\xa7", "ィ": b"\xa8", "ゥ": b"\xa9", "ェ": b"\xaa", "ォ": b"\xab", "ャ": b"\xac", "ュ": b"\xad", "ョ": b"\xae", "ッ": b"\xaf", "ー": b"\xb0", "ア": b"\xb1", "イ": b"\xb2", "ウ": b"\xb3", "エ": b"\xb4", "オ": b"\xb5", "カ": b"\xb6", "キ": b"\xb7", "ク": b"\xb8", "ケ": b"\xb9", "コ": b"\xba", "サ": b"\xbb", "シ": b"\xbc", "ス": b"\xbd", "セ": b"\xbe", "ソ": b"\xbf", "タ": b"\xc0", "チ": b"\xc1", "ツ": b"\xc2", "テ": b"\xc3", "ト": b"\xc4", "ナ": b"\xc5", "ニ": b"\xc6", "ヌ": b"\xc7", "ネ": b"\xc8", "ノ": b"\xc9", "ハ": b"\xca", "ヒ": b"\xcb", "フ": b"\xcc", "ヘ": b"\xcd", "ホ": b"\xce", "マ": b"\xcf", "ミ": b"\xd0", "ム": b"\xd1", "メ": b"\xd2", "モ": b"\xd3", "ヤ": b"\xd4", "ユ": b"\xd5", "ヨ": b"\xd6", "ラ": b"\xd7", "リ": b"\xd8", "ル": b"\xd9", "レ": b"\xda", "ロ": b"\xdb", "ワ": b"\xdc", "ン": b"\xdd", "゙": b"\xde", "゚": b"\xdf", }