nzbToMedia/libs/common/jellyfish/test.py
Labrys of Knossos 56c6773c6b Update vendored beets to 1.6.0
Updates colorama to 0.4.6
Adds confuse version 1.7.0
Updates jellyfish to 0.9.0
Adds mediafile 0.10.1
Updates munkres to 1.1.4
Updates musicbrainzngs to 0.7.1
Updates mutagen to 1.46.0
Updates pyyaml to 6.0
Updates unidecode to 1.3.6
2022-11-29 00:44:48 -05:00

239 lines
7.0 KiB
Python

# -*- coding: utf-8 -*-
import csv
import platform
import pytest
open_kwargs = {"encoding": "utf8"}
def assertAlmostEqual(a, b, places=3):
assert abs(a - b) < (0.1 ** places)
if platform.python_implementation() == "CPython":
implementations = ["python", "c"]
else:
implementations = ["python"]
@pytest.fixture(params=implementations)
def jf(request):
if request.param == "python":
from jellyfish import _jellyfish as jf
else:
from jellyfish import cjellyfish as jf
return jf
def _load_data(name):
with open("testdata/{}.csv".format(name), **open_kwargs) as f:
for data in csv.reader(f):
yield data
@pytest.mark.parametrize("s1,s2,value", _load_data("jaro_winkler"), ids=str)
def test_jaro_winkler_similarity(jf, s1, s2, value):
value = float(value)
assertAlmostEqual(jf.jaro_winkler_similarity(s1, s2), value, places=3)
@pytest.mark.parametrize("s1,s2,value", _load_data("jaro_winkler_longtol"), ids=str)
def test_jaro_winkler_similarity_longtol(jf, s1, s2, value):
value = float(value)
assertAlmostEqual(jf.jaro_winkler_similarity(s1, s2, True), value, places=3)
def test_jaro_winkler_deprecation(jf):
# backwards compatibility function
from jellyfish import jaro_winkler
with pytest.deprecated_call():
assert jaro_winkler("a", "a") == 1
def test_jaro_distance_deprecation():
# backwards compatibility function
from jellyfish import jaro_distance
with pytest.deprecated_call():
assert jaro_distance("a", "a") == 1
@pytest.mark.parametrize("s1,s2,value", _load_data("jaro_distance"), ids=str)
def test_jaro_similarity(jf, s1, s2, value):
value = float(value)
assertAlmostEqual(jf.jaro_similarity(s1, s2), value, places=3)
@pytest.mark.parametrize("s1,s2,value", _load_data("hamming"), ids=str)
def test_hamming_distance(jf, s1, s2, value):
value = int(value)
assert jf.hamming_distance(s1, s2) == value
@pytest.mark.parametrize("s1,s2,value", _load_data("levenshtein"), ids=str)
def test_levenshtein_distance(jf, s1, s2, value):
value = int(value)
assert jf.levenshtein_distance(s1, s2) == value
@pytest.mark.parametrize("s1,s2,value", _load_data("damerau_levenshtein"), ids=str)
def test_damerau_levenshtein_distance(jf, s1, s2, value):
value = int(value)
assert jf.damerau_levenshtein_distance(s1, s2) == value
@pytest.mark.parametrize("s1,code", _load_data("soundex"), ids=str)
def test_soundex(jf, s1, code):
assert jf.soundex(s1) == code
@pytest.mark.parametrize("s1,code", _load_data("metaphone"), ids=str)
def test_metaphone(jf, s1, code):
assert jf.metaphone(s1) == code
@pytest.mark.parametrize("s1,s2", _load_data("nysiis"), ids=str)
def test_nysiis(jf, s1, s2):
assert jf.nysiis(s1) == s2
@pytest.mark.parametrize("s1,s2", _load_data("match_rating_codex"), ids=str)
def test_match_rating_codex(jf, s1, s2):
assert jf.match_rating_codex(s1) == s2
@pytest.mark.parametrize("s1,s2,value", _load_data("match_rating_comparison"), ids=str)
def test_match_rating_comparison(jf, s1, s2, value):
value = {"True": True, "False": False, "None": None}[value]
assert jf.match_rating_comparison(s1, s2) is value
# use non-parameterized version for speed
# @pytest.mark.parametrize("a,b", _load_data('porter'), ids=str)
# def test_porter_stem(jf, a, b):
# assert jf.porter_stem(a) == b
def test_porter_stem(jf):
with open("testdata/porter.csv", **open_kwargs) as f:
reader = csv.reader(f)
for (a, b) in reader:
assert jf.porter_stem(a) == b
if platform.python_implementation() == "CPython":
def test_match_rating_comparison_segfault():
import hashlib
from jellyfish import cjellyfish as jf
sha1s = [
u"{}".format(hashlib.sha1(str(v).encode("ascii")).hexdigest())
for v in range(100)
]
# this segfaulted on 0.1.2
assert [[jf.match_rating_comparison(h1, h2) for h1 in sha1s] for h2 in sha1s]
def test_damerau_levenshtein_unicode_segfault():
# test that unicode works in C & Python versions now
from jellyfish.cjellyfish import damerau_levenshtein_distance as c_dl
from jellyfish._jellyfish import damerau_levenshtein_distance as py_dl
s1 = u"mylifeoutdoors"
s2 = u"нахлыст"
assert c_dl(s1, s2) == 14
assert c_dl(s2, s1) == 14
assert py_dl(s1, s2) == 14
assert py_dl(s2, s1) == 14
def test_jaro_winkler_long_tolerance(jf):
no_lt = jf.jaro_winkler_similarity(
u"two long strings", u"two long stringz", long_tolerance=False
)
with_lt = jf.jaro_winkler_similarity(
u"two long strings", u"two long stringz", long_tolerance=True
)
# make sure long_tolerance does something
assertAlmostEqual(no_lt, 0.975)
assertAlmostEqual(with_lt, 0.984)
def test_damerau_levenshtein_distance_type(jf):
jf.damerau_levenshtein_distance(u"abc", u"abc")
with pytest.raises(TypeError) as exc:
jf.damerau_levenshtein_distance(b"abc", b"abc")
assert "expected" in str(exc.value)
def test_levenshtein_distance_type(jf):
assert jf.levenshtein_distance(u"abc", u"abc") == 0
with pytest.raises(TypeError) as exc:
jf.levenshtein_distance(b"abc", b"abc")
assert "expected" in str(exc.value)
def test_jaro_similarity_type(jf):
assert jf.jaro_similarity(u"abc", u"abc") == 1
with pytest.raises(TypeError) as exc:
jf.jaro_similarity(b"abc", b"abc")
assert "expected" in str(exc.value)
def test_jaro_winkler_type(jf):
assert jf.jaro_winkler_similarity(u"abc", u"abc") == 1
with pytest.raises(TypeError) as exc:
jf.jaro_winkler_similarity(b"abc", b"abc")
assert "expected" in str(exc.value)
def test_mra_comparison_type(jf):
assert jf.match_rating_comparison(u"abc", u"abc") is True
with pytest.raises(TypeError) as exc:
jf.match_rating_comparison(b"abc", b"abc")
assert "expected" in str(exc.value)
def test_hamming_type(jf):
assert jf.hamming_distance(u"abc", u"abc") == 0
with pytest.raises(TypeError) as exc:
jf.hamming_distance(b"abc", b"abc")
assert "expected" in str(exc.value)
def test_soundex_type(jf):
assert jf.soundex(u"ABC") == "A120"
with pytest.raises(TypeError) as exc:
jf.soundex(b"ABC")
assert "expected" in str(exc.value)
def test_metaphone_type(jf):
assert jf.metaphone(u"abc") == "ABK"
with pytest.raises(TypeError) as exc:
jf.metaphone(b"abc")
assert "expected" in str(exc.value)
def test_nysiis_type(jf):
assert jf.nysiis(u"abc") == "ABC"
with pytest.raises(TypeError) as exc:
jf.nysiis(b"abc")
assert "expected" in str(exc.value)
def test_mr_codex_type(jf):
assert jf.match_rating_codex(u"abc") == "ABC"
with pytest.raises(TypeError) as exc:
jf.match_rating_codex(b"abc")
assert "expected" in str(exc.value)
def test_porter_type(jf):
assert jf.porter_stem(u"abc") == "abc"
with pytest.raises(TypeError) as exc:
jf.porter_stem(b"abc")
assert "expected" in str(exc.value)