nzbToMedia/libs/common/beetsplug/fromfilename.py
Labrys of Knossos 56c6773c6b Update vendored beets to 1.6.0
Updates colorama to 0.4.6
Adds confuse version 1.7.0
Updates jellyfish to 0.9.0
Adds mediafile 0.10.1
Updates munkres to 1.1.4
Updates musicbrainzngs to 0.7.1
Updates mutagen to 1.46.0
Updates pyyaml to 6.0
Updates unidecode to 1.3.6
2022-11-29 00:44:48 -05:00

163 lines
5.1 KiB
Python

# This file is part of beets.
# Copyright 2016, Jan-Erik Dahlin
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
"""If the title is empty, try to extract track and title from the
filename.
"""
from beets import plugins
from beets.util import displayable_path
import os
import re
# Filename field extraction patterns.
PATTERNS = [
# Useful patterns.
r'^(?P<artist>.+)[\-_](?P<title>.+)[\-_](?P<tag>.*)$',
r'^(?P<track>\d+)[\s.\-_]+(?P<artist>.+)[\-_](?P<title>.+)[\-_](?P<tag>.*)$',
r'^(?P<artist>.+)[\-_](?P<title>.+)$',
r'^(?P<track>\d+)[\s.\-_]+(?P<artist>.+)[\-_](?P<title>.+)$',
r'^(?P<title>.+)$',
r'^(?P<track>\d+)[\s.\-_]+(?P<title>.+)$',
r'^(?P<track>\d+)\s+(?P<title>.+)$',
r'^(?P<title>.+) by (?P<artist>.+)$',
r'^(?P<track>\d+).*$',
]
# Titles considered "empty" and in need of replacement.
BAD_TITLE_PATTERNS = [
r'^$',
]
def equal(seq):
"""Determine whether a sequence holds identical elements.
"""
return len(set(seq)) <= 1
def equal_fields(matchdict, field):
"""Do all items in `matchdict`, whose values are dictionaries, have
the same value for `field`? (If they do, the field is probably not
the title.)
"""
return equal(m[field] for m in matchdict.values())
def all_matches(names, pattern):
"""If all the filenames in the item/filename mapping match the
pattern, return a dictionary mapping the items to dictionaries
giving the value for each named subpattern in the match. Otherwise,
return None.
"""
matches = {}
for item, name in names.items():
m = re.match(pattern, name, re.IGNORECASE)
if m and m.groupdict():
# Only yield a match when the regex applies *and* has
# capture groups. Otherwise, no information can be extracted
# from the filename.
matches[item] = m.groupdict()
else:
return None
return matches
def bad_title(title):
"""Determine whether a given title is "bad" (empty or otherwise
meaningless) and in need of replacement.
"""
for pat in BAD_TITLE_PATTERNS:
if re.match(pat, title, re.IGNORECASE):
return True
return False
def apply_matches(d):
"""Given a mapping from items to field dicts, apply the fields to
the objects.
"""
some_map = list(d.values())[0]
keys = some_map.keys()
# Only proceed if the "tag" field is equal across all filenames.
if 'tag' in keys and not equal_fields(d, 'tag'):
return
# Given both an "artist" and "title" field, assume that one is
# *actually* the artist, which must be uniform, and use the other
# for the title. This, of course, won't work for VA albums.
if 'artist' in keys:
if equal_fields(d, 'artist'):
artist = some_map['artist']
title_field = 'title'
elif equal_fields(d, 'title'):
artist = some_map['title']
title_field = 'artist'
else:
# Both vary. Abort.
return
for item in d:
if not item.artist:
item.artist = artist
# No artist field: remaining field is the title.
else:
title_field = 'title'
# Apply the title and track.
for item in d:
if bad_title(item.title):
item.title = str(d[item][title_field])
if 'track' in d[item] and item.track == 0:
item.track = int(d[item]['track'])
# Plugin structure and hook into import process.
class FromFilenamePlugin(plugins.BeetsPlugin):
def __init__(self):
super().__init__()
self.register_listener('import_task_start', filename_task)
def filename_task(task, session):
"""Examine each item in the task to see if we can extract a title
from the filename. Try to match all filenames to a number of
regexps, starting with the most complex patterns and successively
trying less complex patterns. As soon as all filenames match the
same regex we can make an educated guess of which part of the
regex that contains the title.
"""
items = task.items if task.is_album else [task.item]
# Look for suspicious (empty or meaningless) titles.
missing_titles = sum(bad_title(i.title) for i in items)
if missing_titles:
# Get the base filenames (no path or extension).
names = {}
for item in items:
path = displayable_path(item.path)
name, _ = os.path.splitext(os.path.basename(path))
names[item] = name
# Look for useful information in the filenames.
for pattern in PATTERNS:
d = all_matches(names, pattern)
if d:
apply_matches(d)