mirror of
https://github.com/clinton-hall/nzbToMedia.git
synced 2025-01-26 21:02:59 -08:00
1111074dc3
Dependencies: * PyYAML 3.11 * Unidecode 0.4.19 * beets 1.3.18 * colorama 0.3.7 * enum34 1.1.6 * jellyfish 0.5.4 * munkres 1.0.7 * musicbrainzngs 0.6 * mutagen 1.32
174 lines
5.6 KiB
Python
174 lines
5.6 KiB
Python
# -*- coding: utf-8 -*-
|
|
# This file is part of beets.
|
|
# Copyright 2016, Jan-Erik Dahlin
|
|
#
|
|
# Permission is hereby granted, free of charge, to any person obtaining
|
|
# a copy of this software and associated documentation files (the
|
|
# "Software"), to deal in the Software without restriction, including
|
|
# without limitation the rights to use, copy, modify, merge, publish,
|
|
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
# permit persons to whom the Software is furnished to do so, subject to
|
|
# the following conditions:
|
|
#
|
|
# The above copyright notice and this permission notice shall be
|
|
# included in all copies or substantial portions of the Software.
|
|
|
|
"""If the title is empty, try to extract track and title from the
|
|
filename.
|
|
"""
|
|
from __future__ import division, absolute_import, print_function
|
|
|
|
from beets import plugins
|
|
from beets.util import displayable_path
|
|
import os
|
|
import re
|
|
|
|
|
|
# Filename field extraction patterns.
|
|
PATTERNS = [
|
|
# "01 - Track 01" and "01": do nothing
|
|
r'^(\d+)\s*-\s*track\s*\d$',
|
|
r'^\d+$',
|
|
|
|
# Useful patterns.
|
|
r'^(?P<artist>.+)-(?P<title>.+)-(?P<tag>.*)$',
|
|
r'^(?P<track>\d+)\s*-(?P<artist>.+)-(?P<title>.+)-(?P<tag>.*)$',
|
|
r'^(?P<track>\d+)\s(?P<artist>.+)-(?P<title>.+)-(?P<tag>.*)$',
|
|
r'^(?P<artist>.+)-(?P<title>.+)$',
|
|
r'^(?P<track>\d+)\.\s*(?P<artist>.+)-(?P<title>.+)$',
|
|
r'^(?P<track>\d+)\s*-\s*(?P<artist>.+)-(?P<title>.+)$',
|
|
r'^(?P<track>\d+)\s*-(?P<artist>.+)-(?P<title>.+)$',
|
|
r'^(?P<track>\d+)\s(?P<artist>.+)-(?P<title>.+)$',
|
|
r'^(?P<title>.+)$',
|
|
r'^(?P<track>\d+)\.\s*(?P<title>.+)$',
|
|
r'^(?P<track>\d+)\s*-\s*(?P<title>.+)$',
|
|
r'^(?P<track>\d+)\s(?P<title>.+)$',
|
|
r'^(?P<title>.+) by (?P<artist>.+)$',
|
|
]
|
|
|
|
# Titles considered "empty" and in need of replacement.
|
|
BAD_TITLE_PATTERNS = [
|
|
r'^$',
|
|
r'\d+?\s?-?\s*track\s*\d+',
|
|
]
|
|
|
|
|
|
def equal(seq):
|
|
"""Determine whether a sequence holds identical elements.
|
|
"""
|
|
return len(set(seq)) <= 1
|
|
|
|
|
|
def equal_fields(matchdict, field):
|
|
"""Do all items in `matchdict`, whose values are dictionaries, have
|
|
the same value for `field`? (If they do, the field is probably not
|
|
the title.)
|
|
"""
|
|
return equal(m[field] for m in matchdict.values())
|
|
|
|
|
|
def all_matches(names, pattern):
|
|
"""If all the filenames in the item/filename mapping match the
|
|
pattern, return a dictionary mapping the items to dictionaries
|
|
giving the value for each named subpattern in the match. Otherwise,
|
|
return None.
|
|
"""
|
|
matches = {}
|
|
for item, name in names.items():
|
|
m = re.match(pattern, name, re.IGNORECASE)
|
|
if m and m.groupdict():
|
|
# Only yield a match when the regex applies *and* has
|
|
# capture groups. Otherwise, no information can be extracted
|
|
# from the filename.
|
|
matches[item] = m.groupdict()
|
|
else:
|
|
return None
|
|
return matches
|
|
|
|
|
|
def bad_title(title):
|
|
"""Determine whether a given title is "bad" (empty or otherwise
|
|
meaningless) and in need of replacement.
|
|
"""
|
|
for pat in BAD_TITLE_PATTERNS:
|
|
if re.match(pat, title, re.IGNORECASE):
|
|
return True
|
|
return False
|
|
|
|
|
|
def apply_matches(d):
|
|
"""Given a mapping from items to field dicts, apply the fields to
|
|
the objects.
|
|
"""
|
|
some_map = d.values()[0]
|
|
keys = some_map.keys()
|
|
|
|
# Only proceed if the "tag" field is equal across all filenames.
|
|
if 'tag' in keys and not equal_fields(d, 'tag'):
|
|
return
|
|
|
|
# Given both an "artist" and "title" field, assume that one is
|
|
# *actually* the artist, which must be uniform, and use the other
|
|
# for the title. This, of course, won't work for VA albums.
|
|
if 'artist' in keys:
|
|
if equal_fields(d, 'artist'):
|
|
artist = some_map['artist']
|
|
title_field = 'title'
|
|
elif equal_fields(d, 'title'):
|
|
artist = some_map['title']
|
|
title_field = 'artist'
|
|
else:
|
|
# Both vary. Abort.
|
|
return
|
|
|
|
for item in d:
|
|
if not item.artist:
|
|
item.artist = artist
|
|
|
|
# No artist field: remaining field is the title.
|
|
else:
|
|
title_field = 'title'
|
|
|
|
# Apply the title and track.
|
|
for item in d:
|
|
if bad_title(item.title):
|
|
item.title = unicode(d[item][title_field])
|
|
if 'track' in d[item] and item.track == 0:
|
|
item.track = int(d[item]['track'])
|
|
|
|
|
|
# Plugin structure and hook into import process.
|
|
|
|
class FromFilenamePlugin(plugins.BeetsPlugin):
|
|
def __init__(self):
|
|
super(FromFilenamePlugin, self).__init__()
|
|
self.register_listener('import_task_start', filename_task)
|
|
|
|
|
|
def filename_task(task, session):
|
|
"""Examine each item in the task to see if we can extract a title
|
|
from the filename. Try to match all filenames to a number of
|
|
regexps, starting with the most complex patterns and successively
|
|
trying less complex patterns. As soon as all filenames match the
|
|
same regex we can make an educated guess of which part of the
|
|
regex that contains the title.
|
|
"""
|
|
items = task.items if task.is_album else [task.item]
|
|
|
|
# Look for suspicious (empty or meaningless) titles.
|
|
missing_titles = sum(bad_title(i.title) for i in items)
|
|
|
|
if missing_titles:
|
|
# Get the base filenames (no path or extension).
|
|
names = {}
|
|
for item in items:
|
|
path = displayable_path(item.path)
|
|
name, _ = os.path.splitext(os.path.basename(path))
|
|
names[item] = name
|
|
|
|
# Look for useful information in the filenames.
|
|
for pattern in PATTERNS:
|
|
d = all_matches(names, pattern)
|
|
if d:
|
|
apply_matches(d)
|