use jaconv instead of jcconv for japanese chars
jaconv is available for more Python-versions and seems to be more professional. Apart from that I added jaconv to the test-requirements but not the requirements. (If the katakana-stuff really works we can later add it as a real dependency)
This commit is contained in:
parent
b795c02dd4
commit
1b2f509758
2
setup.py
2
setup.py
|
@ -118,7 +118,7 @@ setup(
|
||||||
setup_requires=[
|
setup_requires=[
|
||||||
'setuptools_scm',
|
'setuptools_scm',
|
||||||
],
|
],
|
||||||
tests_require=['tox', 'pytest', 'pytest-cov', 'pytest-mock', 'nose', 'scripttest', 'mock', 'hypothesis'],
|
tests_require=['jaconv', 'tox', 'pytest', 'pytest-cov', 'pytest-mock', 'nose', 'scripttest', 'mock', 'hypothesis'],
|
||||||
cmdclass={'test': Tox},
|
cmdclass={'test': Tox},
|
||||||
entry_points={
|
entry_points={
|
||||||
'console_scripts': [
|
'console_scripts': [
|
||||||
|
|
|
@ -11,24 +11,27 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import jcconv
|
import jaconv
|
||||||
except ImportError:
|
except ImportError:
|
||||||
jcconv = None
|
jaconv = None
|
||||||
|
|
||||||
|
|
||||||
def encode_katakana(text):
|
def encode_katakana(text):
|
||||||
"""I don't think this quite works yet."""
|
"""I don't think this quite works yet."""
|
||||||
encoded = []
|
encoded = []
|
||||||
for char in text:
|
for char in text:
|
||||||
if jcconv:
|
if jaconv:
|
||||||
# try to convert japanese text to half-katakanas
|
# try to convert japanese text to half-katakanas
|
||||||
char = jcconv.kata2half(jcconv.hira2kata(char))
|
char = jaconv.z2h(jaconv.hira2kata(char))
|
||||||
# TODO: "the conversion may result in multiple characters"
|
# TODO: "the conversion may result in multiple characters"
|
||||||
# When? What should we do about it?
|
# If that really can happen (I am not really shure), than the string would have to be split and every single
|
||||||
|
# character has to passed through the following lines.
|
||||||
|
|
||||||
if char in TXT_ENC_KATAKANA_MAP:
|
if char in TXT_ENC_KATAKANA_MAP:
|
||||||
encoded.append(TXT_ENC_KATAKANA_MAP[char])
|
encoded.append(TXT_ENC_KATAKANA_MAP[char])
|
||||||
else:
|
else:
|
||||||
|
#TODO doesn't this discard all that is not in the map? Can we be shure that the input does contain only
|
||||||
|
# encodable characters? We could at least throw an exception if encoding is not possible.
|
||||||
pass
|
pass
|
||||||
return b"".join(encoded)
|
return b"".join(encoded)
|
||||||
|
|
||||||
|
@ -36,6 +39,7 @@ def encode_katakana(text):
|
||||||
|
|
||||||
TXT_ENC_KATAKANA_MAP = {
|
TXT_ENC_KATAKANA_MAP = {
|
||||||
# Maps UTF-8 Katakana symbols to KATAKANA Page Codes
|
# Maps UTF-8 Katakana symbols to KATAKANA Page Codes
|
||||||
|
# TODO: has this really to be hardcoded?
|
||||||
|
|
||||||
# Half-Width Katakanas
|
# Half-Width Katakanas
|
||||||
'。': b'\xa1',
|
'。': b'\xa1',
|
||||||
|
|
|
@ -95,12 +95,12 @@ class TestMagicEncode:
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import jcconv
|
import jaconv
|
||||||
except ImportError:
|
except ImportError:
|
||||||
jcconv = None
|
jaconv = None
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skipif(not jcconv, reason="jcconv not installed")
|
@pytest.mark.skipif(not jaconv, reason="jaconv not installed")
|
||||||
class TestKatakana:
|
class TestKatakana:
|
||||||
@given(st.text())
|
@given(st.text())
|
||||||
@example("カタカナ")
|
@example("カタカナ")
|
||||||
|
|
Loading…
Reference in New Issue