nzbToMedia/libs/common/guessit/rules/properties/title.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
title property
"""

from rebulk import Rebulk, Rule, AppendMatch, RemoveMatch, AppendTags
from rebulk.formatters import formatters

from .film import FilmTitleRule
from .language import (
    SubtitlePrefixLanguageRule,
    SubtitleSuffixLanguageRule,
    SubtitleExtensionRule,
    NON_SPECIFIC_LANGUAGES
)
from ..common import seps, title_seps
from ..common.comparators import marker_sorted
from ..common.expected import build_expected_function
from ..common.formatters import cleanup, reorder_title
from ..common.pattern import is_disabled
from ..common.validators import seps_surround


def title(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'title'))
    rebulk.rules(TitleFromPosition, PreferTitleWithYear)

    expected_title = build_expected_function('expected_title')

    rebulk.functional(expected_title, name='title', tags=['expected', 'title'],
                      validator=seps_surround,
                      formatter=formatters(cleanup, reorder_title),
                      conflict_solver=lambda match, other: other,
                      disabled=lambda context: not context.get('expected_title'))

    return rebulk


class TitleBaseRule(Rule):
    """
    Add title match in existing matches
    """
    # pylint:disable=no-self-use,unused-argument
    consequence = [AppendMatch, RemoveMatch]

    def __init__(self, match_name, match_tags=None, alternative_match_name=None):
        super(TitleBaseRule, self).__init__()
        self.match_name = match_name
        self.match_tags = match_tags
        self.alternative_match_name = alternative_match_name

    def hole_filter(self, hole, matches):
        """
        Filter holes for titles.
        :param hole:
        :type hole:
        :param matches:
        :type matches:
        :return:
        :rtype:
        """
        return True

    def filepart_filter(self, filepart, matches):
        """
        Filter filepart for titles.
        :param filepart:
        :type filepart:
        :param matches:
        :type matches:
        :return:
        :rtype:
        """
        return True

    def holes_process(self, holes, matches):
        """
        process holes
        :param holes:
        :type holes:
        :param matches:
        :type matches:
        :return:
        :rtype:
        """
        cropped_holes = []
        group_markers = matches.markers.named('group')
        for group_marker in group_markers:
            path_marker = matches.markers.at_match(group_marker, predicate=lambda m: m.name == 'path', index=0)
            if path_marker and path_marker.span == group_marker.span:
                group_markers.remove(group_marker)

        for hole in holes:
            cropped_holes.extend(hole.crop(group_markers))

        return cropped_holes

    @staticmethod
    def is_ignored(match):
        """
        Ignore matches when scanning for title (hole).

        Full word language and countries won't be ignored if they are uppercase.
        """
        return not (len(match) > 3 and match.raw.isupper()) and match.name in ('language', 'country', 'episode_details')

    def should_keep(self, match, to_keep, matches, filepart, hole, starting):
        """
        Check if this match should be accepted when ending or starting a hole.
        :param match:
        :type match:
        :param to_keep:
        :type to_keep: list[Match]
        :param matches:
        :type matches: Matches
        :param hole: the filepart match
        :type hole: Match
        :param hole: the hole match
        :type hole: Match
        :param starting: true if match is starting the hole
        :type starting: bool
        :return:
        :rtype:
        """
        if match.name in ('language', 'country'):
            # Keep language if exactly matching the hole.
            if len(hole.value) == len(match.raw):
                return True

            # Keep language if other languages exists in the filepart.
            outside_matches = filepart.crop(hole)
            other_languages = []
            for outside in outside_matches:
                other_languages.extend(matches.range(outside.start, outside.end,
                                                     lambda c_match: c_match.name == match.name and
                                                     c_match not in to_keep and
                                                     c_match.value not in NON_SPECIFIC_LANGUAGES))

            if not other_languages and (not starting or len(match.raw) <= 3):
                return True

        return False

    def should_remove(self, match, matches, filepart, hole, context):
        """
        Check if this match should be removed after beeing ignored.
        :param match:
        :param matches:
        :param filepart:
        :param hole:
        :return:
        """
        if context.get('type') == 'episode' and match.name == 'episode_details':
            return match.start >= hole.start and match.end <= hole.end
        return True

    def check_titles_in_filepart(self, filepart, matches, context):  # pylint:disable=inconsistent-return-statements
        """
        Find title in filepart (ignoring language)
        """
        # pylint:disable=too-many-locals,too-many-branches,too-many-statements
        start, end = filepart.span

        holes = matches.holes(start, end + 1, formatter=formatters(cleanup, reorder_title),
                              ignore=self.is_ignored,
                              predicate=lambda m: m.value)

        holes = self.holes_process(holes, matches)

        for hole in holes:
            if not hole or (self.hole_filter and not self.hole_filter(hole, matches)):
                continue

            to_remove = []
            to_keep = []

            ignored_matches = matches.range(hole.start, hole.end, self.is_ignored)

            if ignored_matches:
                for ignored_match in reversed(ignored_matches):
                    # pylint:disable=undefined-loop-variable, cell-var-from-loop
                    trailing = matches.chain_before(hole.end, seps, predicate=lambda m: m == ignored_match)
                    if trailing:
                        should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole, False)
                        if should_keep:
                            # pylint:disable=unpacking-non-sequence
                            try:
                                append, crop = should_keep
                            except TypeError:
                                append, crop = should_keep, should_keep
                            if append:
                                to_keep.append(ignored_match)
                            if crop:
                                hole.end = ignored_match.start

                for ignored_match in ignored_matches:
                    if ignored_match not in to_keep:
                        starting = matches.chain_after(hole.start, seps,
                                                       predicate=lambda m: m == ignored_match)
                        if starting:
                            should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole, True)
                            if should_keep:
                                # pylint:disable=unpacking-non-sequence
                                try:
                                    append, crop = should_keep
                                except TypeError:
                                    append, crop = should_keep, should_keep
                                if append:
                                    to_keep.append(ignored_match)
                                if crop:
                                    hole.start = ignored_match.end

            for match in ignored_matches:
                if self.should_remove(match, matches, filepart, hole, context):
                    to_remove.append(match)
            for keep_match in to_keep:
                if keep_match in to_remove:
                    to_remove.remove(keep_match)

            if hole and hole.value:
                hole.name = self.match_name
                hole.tags = self.match_tags
                if self.alternative_match_name:
                    # Split and keep values that can be a title
                    titles = hole.split(title_seps, lambda m: m.value)
                    for title_match in list(titles[1:]):
                        previous_title = titles[titles.index(title_match) - 1]
                        separator = matches.input_string[previous_title.end:title_match.start]
                        if len(separator) == 1 and separator == '-' \
                                and previous_title.raw[-1] not in seps \
                                and title_match.raw[0] not in seps:
                            titles[titles.index(title_match) - 1].end = title_match.end
                            titles.remove(title_match)
                        else:
                            title_match.name = self.alternative_match_name

                else:
                    titles = [hole]
                return titles, to_remove

    def when(self, matches, context):
        ret = []
        to_remove = []

        if matches.named(self.match_name, lambda match: 'expected' in match.tags):
            return False

        fileparts = [filepart for filepart in list(marker_sorted(matches.markers.named('path'), matches))
                     if not self.filepart_filter or self.filepart_filter(filepart, matches)]

        # Priorize fileparts containing the year
        years_fileparts = []
        for filepart in fileparts:
            year_match = matches.range(filepart.start, filepart.end, lambda match: match.name == 'year', 0)
            if year_match:
                years_fileparts.append(filepart)

        for filepart in fileparts:
            try:
                years_fileparts.remove(filepart)
            except ValueError:
                pass
            titles = self.check_titles_in_filepart(filepart, matches, context)
            if titles:
                titles, to_remove_c = titles
                ret.extend(titles)
                to_remove.extend(to_remove_c)
                break

        # Add title match in all fileparts containing the year.
        for filepart in years_fileparts:
            titles = self.check_titles_in_filepart(filepart, matches, context)
            if titles:
                # pylint:disable=unbalanced-tuple-unpacking
                titles, to_remove_c = titles
                ret.extend(titles)
                to_remove.extend(to_remove_c)

        if ret or to_remove:
            return ret, to_remove
        return False


class TitleFromPosition(TitleBaseRule):
    """
    Add title match in existing matches
    """
    dependency = [FilmTitleRule, SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule]

    properties = {'title': [None], 'alternative_title': [None]}

    def __init__(self):
        super(TitleFromPosition, self).__init__('title', ['title'], 'alternative_title')

    def enabled(self, context):
        return not is_disabled(context, 'alternative_title')


class PreferTitleWithYear(Rule):
    """
    Prefer title where filepart contains year.
    """
    dependency = TitleFromPosition
    consequence = [RemoveMatch, AppendTags(['equivalent-ignore'])]

    properties = {'title': [None]}

    def when(self, matches, context):
        with_year_in_group = []
        with_year = []
        titles = matches.named('title')

        for title_match in titles:
            filepart = matches.markers.at_match(title_match, lambda marker: marker.name == 'path', 0)
            if filepart:
                year_match = matches.range(filepart.start, filepart.end, lambda match: match.name == 'year', 0)
                if year_match:
                    group = matches.markers.at_match(year_match, lambda m: m.name == 'group')
                    if group:
                        with_year_in_group.append(title_match)
                    else:
                        with_year.append(title_match)

        to_tag = []
        if with_year_in_group:
            title_values = {title_match.value for title_match in with_year_in_group}
            to_tag.extend(with_year_in_group)
        elif with_year:
            title_values = {title_match.value for title_match in with_year}
            to_tag.extend(with_year)
        else:
            title_values = {title_match.value for title_match in titles}

        to_remove = []
        for title_match in titles:
            if title_match.value not in title_values:
                to_remove.append(title_match)
        if to_remove or to_tag:
            return to_remove, to_tag
        return False