mirror of
https://github.com/clinton-hall/nzbToMedia.git
synced 2024-12-22 20:11:26 -08:00
115 lines
3.8 KiB
Python
115 lines
3.8 KiB
Python
#!/usr/bin/env python
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
country property
|
||
"""
|
||
# pylint: disable=no-member
|
||
import babelfish
|
||
|
||
from rebulk import Rebulk
|
||
from ..common.pattern import is_disabled
|
||
from ..common.words import iter_words
|
||
|
||
|
||
def country(config, common_words):
|
||
"""
|
||
Builder for rebulk object.
|
||
|
||
:param config: rule configuration
|
||
:type config: dict
|
||
:param common_words: common words
|
||
:type common_words: set
|
||
:return: Created Rebulk object
|
||
:rtype: Rebulk
|
||
"""
|
||
rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'country'))
|
||
rebulk = rebulk.defaults(name='country')
|
||
|
||
def find_countries(string, context=None):
|
||
"""
|
||
Find countries in given string.
|
||
"""
|
||
allowed_countries = context.get('allowed_countries') if context else None
|
||
return CountryFinder(allowed_countries, common_words).find(string)
|
||
|
||
rebulk.functional(find_countries,
|
||
# Prefer language and any other property over country if not US or GB.
|
||
conflict_solver=lambda match, other: match
|
||
if other.name != 'language' or match.value not in (babelfish.Country('US'),
|
||
babelfish.Country('GB'))
|
||
else other,
|
||
properties={'country': [None]},
|
||
disabled=lambda context: not context.get('allowed_countries'))
|
||
|
||
babelfish.country_converters['guessit'] = GuessitCountryConverter(config['synonyms'])
|
||
|
||
return rebulk
|
||
|
||
|
||
class GuessitCountryConverter(babelfish.CountryReverseConverter): # pylint: disable=missing-docstring
|
||
def __init__(self, synonyms):
|
||
self.guessit_exceptions = {}
|
||
|
||
for alpha2, synlist in synonyms.items():
|
||
for syn in synlist:
|
||
self.guessit_exceptions[syn.lower()] = alpha2
|
||
|
||
@property
|
||
def codes(self): # pylint: disable=missing-docstring
|
||
return (babelfish.country_converters['name'].codes |
|
||
frozenset(babelfish.COUNTRIES.values()) |
|
||
frozenset(self.guessit_exceptions.keys()))
|
||
|
||
def convert(self, alpha2):
|
||
if alpha2 == 'GB':
|
||
return 'UK'
|
||
return str(babelfish.Country(alpha2))
|
||
|
||
def reverse(self, name): # pylint:disable=arguments-differ
|
||
# exceptions come first, as they need to override a potential match
|
||
# with any of the other guessers
|
||
try:
|
||
return self.guessit_exceptions[name.lower()]
|
||
except KeyError:
|
||
pass
|
||
|
||
try:
|
||
return babelfish.Country(name.upper()).alpha2
|
||
except ValueError:
|
||
pass
|
||
|
||
for conv in [babelfish.Country.fromname]:
|
||
try:
|
||
return conv(name).alpha2
|
||
except babelfish.CountryReverseError:
|
||
pass
|
||
|
||
raise babelfish.CountryReverseError(name)
|
||
|
||
|
||
class CountryFinder(object):
|
||
"""Helper class to search and return country matches."""
|
||
|
||
def __init__(self, allowed_countries, common_words):
|
||
self.allowed_countries = {l.lower() for l in allowed_countries or []}
|
||
self.common_words = common_words
|
||
|
||
def find(self, string):
|
||
"""Return all matches for country."""
|
||
for word_match in iter_words(string.strip().lower()):
|
||
word = word_match.value
|
||
if word.lower() in self.common_words:
|
||
continue
|
||
|
||
try:
|
||
country_object = babelfish.Country.fromguessit(word)
|
||
if (country_object.name.lower() in self.allowed_countries or
|
||
country_object.alpha2.lower() in self.allowed_countries):
|
||
yield self._to_rebulk_match(word_match, country_object)
|
||
except babelfish.Error:
|
||
continue
|
||
|
||
@classmethod
|
||
def _to_rebulk_match(cls, word, value):
|
||
return word.span[0], word.span[1], {'value': value}
|