use jaconv instead of jcconv for japanese chars
jaconv is available for more Python-versions and seems to be more professional. Apart from that I added jaconv to the test-requirements but not the requirements. (If the katakana-stuff really works we can later add it as a real dependency)
This commit is contained in:
parent
b795c02dd4
commit
1b2f509758
2
setup.py
2
setup.py
|
@ -118,7 +118,7 @@ setup(
|
|||
setup_requires=[
|
||||
'setuptools_scm',
|
||||
],
|
||||
tests_require=['tox', 'pytest', 'pytest-cov', 'pytest-mock', 'nose', 'scripttest', 'mock', 'hypothesis'],
|
||||
tests_require=['jaconv', 'tox', 'pytest', 'pytest-cov', 'pytest-mock', 'nose', 'scripttest', 'mock', 'hypothesis'],
|
||||
cmdclass={'test': Tox},
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
|
|
|
@ -11,24 +11,27 @@ from __future__ import unicode_literals
|
|||
|
||||
|
||||
try:
|
||||
import jcconv
|
||||
import jaconv
|
||||
except ImportError:
|
||||
jcconv = None
|
||||
jaconv = None
|
||||
|
||||
|
||||
def encode_katakana(text):
|
||||
"""I don't think this quite works yet."""
|
||||
encoded = []
|
||||
for char in text:
|
||||
if jcconv:
|
||||
if jaconv:
|
||||
# try to convert japanese text to half-katakanas
|
||||
char = jcconv.kata2half(jcconv.hira2kata(char))
|
||||
char = jaconv.z2h(jaconv.hira2kata(char))
|
||||
# TODO: "the conversion may result in multiple characters"
|
||||
# When? What should we do about it?
|
||||
# If that really can happen (I am not really shure), than the string would have to be split and every single
|
||||
# character has to passed through the following lines.
|
||||
|
||||
if char in TXT_ENC_KATAKANA_MAP:
|
||||
encoded.append(TXT_ENC_KATAKANA_MAP[char])
|
||||
else:
|
||||
#TODO doesn't this discard all that is not in the map? Can we be shure that the input does contain only
|
||||
# encodable characters? We could at least throw an exception if encoding is not possible.
|
||||
pass
|
||||
return b"".join(encoded)
|
||||
|
||||
|
@ -36,6 +39,7 @@ def encode_katakana(text):
|
|||
|
||||
TXT_ENC_KATAKANA_MAP = {
|
||||
# Maps UTF-8 Katakana symbols to KATAKANA Page Codes
|
||||
# TODO: has this really to be hardcoded?
|
||||
|
||||
# Half-Width Katakanas
|
||||
'。': b'\xa1',
|
||||
|
|
|
@ -95,12 +95,12 @@ class TestMagicEncode:
|
|||
|
||||
|
||||
try:
|
||||
import jcconv
|
||||
import jaconv
|
||||
except ImportError:
|
||||
jcconv = None
|
||||
jaconv = None
|
||||
|
||||
|
||||
@pytest.mark.skipif(not jcconv, reason="jcconv not installed")
|
||||
@pytest.mark.skipif(not jaconv, reason="jaconv not installed")
|
||||
class TestKatakana:
|
||||
@given(st.text())
|
||||
@example("カタカナ")
|
||||
|
|
Loading…
Reference in New Issue