mirror of
https://github.com/clinton-hall/nzbToMedia.git
synced 2024-11-14 17:40:24 -08:00
56c6773c6b
Updates colorama to 0.4.6 Adds confuse version 1.7.0 Updates jellyfish to 0.9.0 Adds mediafile 0.10.1 Updates munkres to 1.1.4 Updates musicbrainzngs to 0.7.1 Updates mutagen to 1.46.0 Updates pyyaml to 6.0 Updates unidecode to 1.3.6
239 lines
7.0 KiB
Python
239 lines
7.0 KiB
Python
# -*- coding: utf-8 -*-
|
|
import csv
|
|
import platform
|
|
import pytest
|
|
|
|
open_kwargs = {"encoding": "utf8"}
|
|
|
|
|
|
def assertAlmostEqual(a, b, places=3):
|
|
assert abs(a - b) < (0.1 ** places)
|
|
|
|
|
|
if platform.python_implementation() == "CPython":
|
|
implementations = ["python", "c"]
|
|
else:
|
|
implementations = ["python"]
|
|
|
|
|
|
@pytest.fixture(params=implementations)
|
|
def jf(request):
|
|
if request.param == "python":
|
|
from jellyfish import _jellyfish as jf
|
|
else:
|
|
from jellyfish import cjellyfish as jf
|
|
return jf
|
|
|
|
|
|
def _load_data(name):
|
|
with open("testdata/{}.csv".format(name), **open_kwargs) as f:
|
|
for data in csv.reader(f):
|
|
yield data
|
|
|
|
|
|
@pytest.mark.parametrize("s1,s2,value", _load_data("jaro_winkler"), ids=str)
|
|
def test_jaro_winkler_similarity(jf, s1, s2, value):
|
|
value = float(value)
|
|
assertAlmostEqual(jf.jaro_winkler_similarity(s1, s2), value, places=3)
|
|
|
|
|
|
@pytest.mark.parametrize("s1,s2,value", _load_data("jaro_winkler_longtol"), ids=str)
|
|
def test_jaro_winkler_similarity_longtol(jf, s1, s2, value):
|
|
value = float(value)
|
|
assertAlmostEqual(jf.jaro_winkler_similarity(s1, s2, True), value, places=3)
|
|
|
|
|
|
def test_jaro_winkler_deprecation(jf):
|
|
# backwards compatibility function
|
|
from jellyfish import jaro_winkler
|
|
|
|
with pytest.deprecated_call():
|
|
assert jaro_winkler("a", "a") == 1
|
|
|
|
|
|
def test_jaro_distance_deprecation():
|
|
# backwards compatibility function
|
|
from jellyfish import jaro_distance
|
|
|
|
with pytest.deprecated_call():
|
|
assert jaro_distance("a", "a") == 1
|
|
|
|
|
|
@pytest.mark.parametrize("s1,s2,value", _load_data("jaro_distance"), ids=str)
|
|
def test_jaro_similarity(jf, s1, s2, value):
|
|
value = float(value)
|
|
assertAlmostEqual(jf.jaro_similarity(s1, s2), value, places=3)
|
|
|
|
|
|
@pytest.mark.parametrize("s1,s2,value", _load_data("hamming"), ids=str)
|
|
def test_hamming_distance(jf, s1, s2, value):
|
|
value = int(value)
|
|
assert jf.hamming_distance(s1, s2) == value
|
|
|
|
|
|
@pytest.mark.parametrize("s1,s2,value", _load_data("levenshtein"), ids=str)
|
|
def test_levenshtein_distance(jf, s1, s2, value):
|
|
value = int(value)
|
|
assert jf.levenshtein_distance(s1, s2) == value
|
|
|
|
|
|
@pytest.mark.parametrize("s1,s2,value", _load_data("damerau_levenshtein"), ids=str)
|
|
def test_damerau_levenshtein_distance(jf, s1, s2, value):
|
|
value = int(value)
|
|
assert jf.damerau_levenshtein_distance(s1, s2) == value
|
|
|
|
|
|
@pytest.mark.parametrize("s1,code", _load_data("soundex"), ids=str)
|
|
def test_soundex(jf, s1, code):
|
|
assert jf.soundex(s1) == code
|
|
|
|
|
|
@pytest.mark.parametrize("s1,code", _load_data("metaphone"), ids=str)
|
|
def test_metaphone(jf, s1, code):
|
|
assert jf.metaphone(s1) == code
|
|
|
|
|
|
@pytest.mark.parametrize("s1,s2", _load_data("nysiis"), ids=str)
|
|
def test_nysiis(jf, s1, s2):
|
|
assert jf.nysiis(s1) == s2
|
|
|
|
|
|
@pytest.mark.parametrize("s1,s2", _load_data("match_rating_codex"), ids=str)
|
|
def test_match_rating_codex(jf, s1, s2):
|
|
assert jf.match_rating_codex(s1) == s2
|
|
|
|
|
|
@pytest.mark.parametrize("s1,s2,value", _load_data("match_rating_comparison"), ids=str)
|
|
def test_match_rating_comparison(jf, s1, s2, value):
|
|
value = {"True": True, "False": False, "None": None}[value]
|
|
assert jf.match_rating_comparison(s1, s2) is value
|
|
|
|
|
|
# use non-parameterized version for speed
|
|
# @pytest.mark.parametrize("a,b", _load_data('porter'), ids=str)
|
|
# def test_porter_stem(jf, a, b):
|
|
# assert jf.porter_stem(a) == b
|
|
|
|
|
|
def test_porter_stem(jf):
|
|
with open("testdata/porter.csv", **open_kwargs) as f:
|
|
reader = csv.reader(f)
|
|
for (a, b) in reader:
|
|
assert jf.porter_stem(a) == b
|
|
|
|
|
|
if platform.python_implementation() == "CPython":
|
|
|
|
def test_match_rating_comparison_segfault():
|
|
import hashlib
|
|
from jellyfish import cjellyfish as jf
|
|
|
|
sha1s = [
|
|
u"{}".format(hashlib.sha1(str(v).encode("ascii")).hexdigest())
|
|
for v in range(100)
|
|
]
|
|
# this segfaulted on 0.1.2
|
|
assert [[jf.match_rating_comparison(h1, h2) for h1 in sha1s] for h2 in sha1s]
|
|
|
|
def test_damerau_levenshtein_unicode_segfault():
|
|
# test that unicode works in C & Python versions now
|
|
from jellyfish.cjellyfish import damerau_levenshtein_distance as c_dl
|
|
from jellyfish._jellyfish import damerau_levenshtein_distance as py_dl
|
|
|
|
s1 = u"mylifeoutdoors"
|
|
s2 = u"нахлыст"
|
|
assert c_dl(s1, s2) == 14
|
|
assert c_dl(s2, s1) == 14
|
|
|
|
assert py_dl(s1, s2) == 14
|
|
assert py_dl(s2, s1) == 14
|
|
|
|
|
|
def test_jaro_winkler_long_tolerance(jf):
|
|
no_lt = jf.jaro_winkler_similarity(
|
|
u"two long strings", u"two long stringz", long_tolerance=False
|
|
)
|
|
with_lt = jf.jaro_winkler_similarity(
|
|
u"two long strings", u"two long stringz", long_tolerance=True
|
|
)
|
|
# make sure long_tolerance does something
|
|
assertAlmostEqual(no_lt, 0.975)
|
|
assertAlmostEqual(with_lt, 0.984)
|
|
|
|
|
|
def test_damerau_levenshtein_distance_type(jf):
|
|
jf.damerau_levenshtein_distance(u"abc", u"abc")
|
|
with pytest.raises(TypeError) as exc:
|
|
jf.damerau_levenshtein_distance(b"abc", b"abc")
|
|
assert "expected" in str(exc.value)
|
|
|
|
|
|
def test_levenshtein_distance_type(jf):
|
|
assert jf.levenshtein_distance(u"abc", u"abc") == 0
|
|
with pytest.raises(TypeError) as exc:
|
|
jf.levenshtein_distance(b"abc", b"abc")
|
|
assert "expected" in str(exc.value)
|
|
|
|
|
|
def test_jaro_similarity_type(jf):
|
|
assert jf.jaro_similarity(u"abc", u"abc") == 1
|
|
with pytest.raises(TypeError) as exc:
|
|
jf.jaro_similarity(b"abc", b"abc")
|
|
assert "expected" in str(exc.value)
|
|
|
|
|
|
def test_jaro_winkler_type(jf):
|
|
assert jf.jaro_winkler_similarity(u"abc", u"abc") == 1
|
|
with pytest.raises(TypeError) as exc:
|
|
jf.jaro_winkler_similarity(b"abc", b"abc")
|
|
assert "expected" in str(exc.value)
|
|
|
|
|
|
def test_mra_comparison_type(jf):
|
|
assert jf.match_rating_comparison(u"abc", u"abc") is True
|
|
with pytest.raises(TypeError) as exc:
|
|
jf.match_rating_comparison(b"abc", b"abc")
|
|
assert "expected" in str(exc.value)
|
|
|
|
|
|
def test_hamming_type(jf):
|
|
assert jf.hamming_distance(u"abc", u"abc") == 0
|
|
with pytest.raises(TypeError) as exc:
|
|
jf.hamming_distance(b"abc", b"abc")
|
|
assert "expected" in str(exc.value)
|
|
|
|
|
|
def test_soundex_type(jf):
|
|
assert jf.soundex(u"ABC") == "A120"
|
|
with pytest.raises(TypeError) as exc:
|
|
jf.soundex(b"ABC")
|
|
assert "expected" in str(exc.value)
|
|
|
|
|
|
def test_metaphone_type(jf):
|
|
assert jf.metaphone(u"abc") == "ABK"
|
|
with pytest.raises(TypeError) as exc:
|
|
jf.metaphone(b"abc")
|
|
assert "expected" in str(exc.value)
|
|
|
|
|
|
def test_nysiis_type(jf):
|
|
assert jf.nysiis(u"abc") == "ABC"
|
|
with pytest.raises(TypeError) as exc:
|
|
jf.nysiis(b"abc")
|
|
assert "expected" in str(exc.value)
|
|
|
|
|
|
def test_mr_codex_type(jf):
|
|
assert jf.match_rating_codex(u"abc") == "ABC"
|
|
with pytest.raises(TypeError) as exc:
|
|
jf.match_rating_codex(b"abc")
|
|
assert "expected" in str(exc.value)
|
|
|
|
|
|
def test_porter_type(jf):
|
|
assert jf.porter_stem(u"abc") == "abc"
|
|
with pytest.raises(TypeError) as exc:
|
|
jf.porter_stem(b"abc")
|
|
assert "expected" in str(exc.value)
|