use jaconv instead of jcconv for japanese chars

jaconv is available for more Python-versions and seems to be more
professional. Apart from that I added jaconv to the test-requirements
but not the requirements. (If the katakana-stuff really works we can
later add it as a real dependency)
This commit is contained in:
Patrick Kanzler 2016-09-27 20:26:22 +02:00
parent b795c02dd4
commit 1b2f509758
No known key found for this signature in database
GPG Key ID: F07F07153306FCEF
4 changed files with 14 additions and 9 deletions

View File

@ -118,7 +118,7 @@ setup(
setup_requires=[ setup_requires=[
'setuptools_scm', 'setuptools_scm',
], ],
tests_require=['tox', 'pytest', 'pytest-cov', 'pytest-mock', 'nose', 'scripttest', 'mock', 'hypothesis'], tests_require=['jaconv', 'tox', 'pytest', 'pytest-cov', 'pytest-mock', 'nose', 'scripttest', 'mock', 'hypothesis'],
cmdclass={'test': Tox}, cmdclass={'test': Tox},
entry_points={ entry_points={
'console_scripts': [ 'console_scripts': [

View File

@ -11,24 +11,27 @@ from __future__ import unicode_literals
try: try:
import jcconv import jaconv
except ImportError: except ImportError:
jcconv = None jaconv = None
def encode_katakana(text): def encode_katakana(text):
"""I don't think this quite works yet.""" """I don't think this quite works yet."""
encoded = [] encoded = []
for char in text: for char in text:
if jcconv: if jaconv:
# try to convert japanese text to half-katakanas # try to convert japanese text to half-katakanas
char = jcconv.kata2half(jcconv.hira2kata(char)) char = jaconv.z2h(jaconv.hira2kata(char))
# TODO: "the conversion may result in multiple characters" # TODO: "the conversion may result in multiple characters"
# When? What should we do about it? # If that really can happen (I am not really shure), than the string would have to be split and every single
# character has to passed through the following lines.
if char in TXT_ENC_KATAKANA_MAP: if char in TXT_ENC_KATAKANA_MAP:
encoded.append(TXT_ENC_KATAKANA_MAP[char]) encoded.append(TXT_ENC_KATAKANA_MAP[char])
else: else:
#TODO doesn't this discard all that is not in the map? Can we be shure that the input does contain only
# encodable characters? We could at least throw an exception if encoding is not possible.
pass pass
return b"".join(encoded) return b"".join(encoded)
@ -36,6 +39,7 @@ def encode_katakana(text):
TXT_ENC_KATAKANA_MAP = { TXT_ENC_KATAKANA_MAP = {
# Maps UTF-8 Katakana symbols to KATAKANA Page Codes # Maps UTF-8 Katakana symbols to KATAKANA Page Codes
# TODO: has this really to be hardcoded?
# Half-Width Katakanas # Half-Width Katakanas
'': b'\xa1', '': b'\xa1',

View File

@ -95,12 +95,12 @@ class TestMagicEncode:
try: try:
import jcconv import jaconv
except ImportError: except ImportError:
jcconv = None jaconv = None
@pytest.mark.skipif(not jcconv, reason="jcconv not installed") @pytest.mark.skipif(not jaconv, reason="jaconv not installed")
class TestKatakana: class TestKatakana:
@given(st.text()) @given(st.text())
@example("カタカナ") @example("カタカナ")

View File

@ -3,6 +3,7 @@ envlist = py27, py34, py35, docs
[testenv] [testenv]
deps = nose deps = nose
jaconv
coverage coverage
scripttest scripttest
mock mock