plexpy/lib/musicbrainzngs/mbxml.py
2019-10-05 23:27:16 -07:00

818 lines
28 KiB
Python

# This file is part of the musicbrainzngs library
# Copyright (C) Alastair Porter, Adrian Sampson, and others
# This file is distributed under a BSD-2-Clause type license.
# See the COPYING file for more information.
import re
import xml.etree.ElementTree as ET
import logging
from . import util
def fixtag(tag, namespaces):
# given a decorated tag (of the form {uri}tag), return prefixed
# tag and namespace declaration, if any
if isinstance(tag, ET.QName):
tag = tag.text
namespace_uri, tag = tag[1:].split("}", 1)
prefix = namespaces.get(namespace_uri)
if prefix is None:
prefix = "ns%d" % len(namespaces)
namespaces[namespace_uri] = prefix
if prefix == "xml":
xmlns = None
else:
xmlns = ("xmlns:%s" % prefix, namespace_uri)
else:
xmlns = None
return "%s:%s" % (prefix, tag), xmlns
NS_MAP = {"http://musicbrainz.org/ns/mmd-2.0#": "ws2",
"http://musicbrainz.org/ns/ext#-2.0": "ext"}
_log = logging.getLogger("musicbrainzngs")
def get_error_message(error):
""" Given an error XML message from the webservice containing
<error><text>x</text><text>y</text></error>, return a list
of [x, y]"""
try:
tree = util.bytes_to_elementtree(error)
root = tree.getroot()
errors = []
if root.tag == "error":
for ch in root:
if ch.tag == "text":
errors.append(ch.text)
return errors
except ET.ParseError:
return None
def make_artist_credit(artists):
names = []
for artist in artists:
if isinstance(artist, dict):
if "name" in artist:
names.append(artist.get("name", ""))
else:
names.append(artist.get("artist", {}).get("name", ""))
else:
names.append(artist)
return "".join(names)
def parse_elements(valid_els, inner_els, element):
""" Extract single level subelements from an element.
For example, given the element:
<element>
<subelement>Text</subelement>
</element>
and a list valid_els that contains "subelement",
return a dict {'subelement': 'Text'}
Delegate the parsing of multi-level subelements to another function.
For example, given the element:
<element>
<subelement>
<a>Foo</a><b>Bar</b>
</subelement>
</element>
and a dictionary {'subelement': parse_subelement},
call parse_subelement(<subelement>) and
return a dict {'subelement': <result>}
if parse_subelement returns a tuple of the form
(True, {'subelement-key': <result>})
then merge the second element of the tuple into the
result (which may have a key other than 'subelement' or
more than 1 key)
"""
result = {}
for sub in element:
t = fixtag(sub.tag, NS_MAP)[0]
if ":" in t:
t = t.split(":")[1]
if t in valid_els:
result[t] = sub.text or ""
elif t in inner_els.keys():
inner_result = inner_els[t](sub)
if isinstance(inner_result, tuple) and inner_result[0]:
result.update(inner_result[1])
else:
result[t] = inner_result
# add counts for lists when available
m = re.match(r'([a-z0-9-]+)-list', t)
if m and "count" in sub.attrib:
result["%s-count" % m.group(1)] = int(sub.attrib["count"])
else:
_log.info("in <%s>, uncaught <%s>",
fixtag(element.tag, NS_MAP)[0], t)
return result
def parse_attributes(attributes, element):
""" Extract attributes from an element.
For example, given the element:
<element type="Group" />
and a list attributes that contains "type",
return a dict {'type': 'Group'}
"""
result = {}
for attr in element.attrib:
if "{" in attr:
a = fixtag(attr, NS_MAP)[0]
else:
a = attr
if a in attributes:
result[a] = element.attrib[attr]
else:
_log.info("in <%s>, uncaught attribute %s", fixtag(element.tag, NS_MAP)[0], attr)
return result
def parse_message(message):
tree = util.bytes_to_elementtree(message)
root = tree.getroot()
result = {}
valid_elements = {"area": parse_area,
"artist": parse_artist,
"instrument": parse_instrument,
"label": parse_label,
"place": parse_place,
"event": parse_event,
"release": parse_release,
"release-group": parse_release_group,
"series": parse_series,
"recording": parse_recording,
"work": parse_work,
"url": parse_url,
"disc": parse_disc,
"cdstub": parse_cdstub,
"isrc": parse_isrc,
"annotation-list": parse_annotation_list,
"area-list": parse_area_list,
"artist-list": parse_artist_list,
"label-list": parse_label_list,
"place-list": parse_place_list,
"event-list": parse_event_list,
"instrument-list": parse_instrument_list,
"release-list": parse_release_list,
"release-group-list": parse_release_group_list,
"series-list": parse_series_list,
"recording-list": parse_recording_list,
"work-list": parse_work_list,
"url-list": parse_url_list,
"collection-list": parse_collection_list,
"collection": parse_collection,
"message": parse_response_message
}
result.update(parse_elements([], valid_elements, root))
return result
def parse_response_message(message):
return parse_elements(["text"], {}, message)
def parse_collection_list(cl):
return [parse_collection(c) for c in cl]
def parse_collection(collection):
result = {}
attribs = ["id", "type", "entity-type"]
elements = ["name", "editor"]
inner_els = {"release-list": parse_release_list,
"artist-list": parse_artist_list,
"event-list": parse_event_list,
"place-list": parse_place_list,
"recording-list": parse_recording_list,
"work-list": parse_work_list}
result.update(parse_attributes(attribs, collection))
result.update(parse_elements(elements, inner_els, collection))
return result
def parse_annotation_list(al):
return [parse_annotation(a) for a in al]
def parse_annotation(annotation):
result = {}
attribs = ["type", "ext:score"]
elements = ["entity", "name", "text"]
result.update(parse_attributes(attribs, annotation))
result.update(parse_elements(elements, {}, annotation))
return result
def parse_lifespan(lifespan):
parts = parse_elements(["begin", "end", "ended"], {}, lifespan)
return parts
def parse_area_list(al):
return [parse_area(a) for a in al]
def parse_area(area):
result = {}
attribs = ["id", "type", "ext:score"]
elements = ["name", "sort-name", "disambiguation"]
inner_els = {"life-span": parse_lifespan,
"alias-list": parse_alias_list,
"relation-list": parse_relation_list,
"annotation": parse_annotation,
"iso-3166-1-code-list": parse_element_list,
"iso-3166-2-code-list": parse_element_list,
"iso-3166-3-code-list": parse_element_list}
result.update(parse_attributes(attribs, area))
result.update(parse_elements(elements, inner_els, area))
return result
def parse_artist_list(al):
return [parse_artist(a) for a in al]
def parse_artist(artist):
result = {}
attribs = ["id", "type", "ext:score"]
elements = ["name", "sort-name", "country", "user-rating",
"disambiguation", "gender", "ipi"]
inner_els = {"area": parse_area,
"begin-area": parse_area,
"end-area": parse_area,
"life-span": parse_lifespan,
"recording-list": parse_recording_list,
"relation-list": parse_relation_list,
"release-list": parse_release_list,
"release-group-list": parse_release_group_list,
"work-list": parse_work_list,
"tag-list": parse_tag_list,
"user-tag-list": parse_tag_list,
"rating": parse_rating,
"ipi-list": parse_element_list,
"isni-list": parse_element_list,
"alias-list": parse_alias_list,
"annotation": parse_annotation}
result.update(parse_attributes(attribs, artist))
result.update(parse_elements(elements, inner_els, artist))
return result
def parse_coordinates(c):
return parse_elements(['latitude', 'longitude'], {}, c)
def parse_place_list(pl):
return [parse_place(p) for p in pl]
def parse_place(place):
result = {}
attribs = ["id", "type", "ext:score"]
elements = ["name", "address",
"ipi", "disambiguation"]
inner_els = {"area": parse_area,
"coordinates": parse_coordinates,
"life-span": parse_lifespan,
"tag-list": parse_tag_list,
"user-tag-list": parse_tag_list,
"alias-list": parse_alias_list,
"relation-list": parse_relation_list,
"annotation": parse_annotation}
result.update(parse_attributes(attribs, place))
result.update(parse_elements(elements, inner_els, place))
return result
def parse_event_list(el):
return [parse_event(e) for e in el]
def parse_event(event):
result = {}
attribs = ["id", "type", "ext:score"]
elements = ["name", "time", "setlist", "cancelled", "disambiguation", "user-rating"]
inner_els = {"life-span": parse_lifespan,
"relation-list": parse_relation_list,
"alias-list": parse_alias_list,
"tag-list": parse_tag_list,
"user-tag-list": parse_tag_list,
"rating": parse_rating}
result.update(parse_attributes(attribs, event))
result.update(parse_elements(elements, inner_els, event))
return result
def parse_instrument(instrument):
result = {}
attribs = ["id", "type", "ext:score"]
elements = ["name", "description", "disambiguation"]
inner_els = {"relation-list": parse_relation_list,
"tag-list": parse_tag_list,
"alias-list": parse_alias_list,
"annotation": parse_annotation}
result.update(parse_attributes(attribs, instrument))
result.update(parse_elements(elements, inner_els, instrument))
return result
def parse_label_list(ll):
return [parse_label(l) for l in ll]
def parse_label(label):
result = {}
attribs = ["id", "type", "ext:score"]
elements = ["name", "sort-name", "country", "label-code", "user-rating",
"ipi", "disambiguation"]
inner_els = {"area": parse_area,
"life-span": parse_lifespan,
"release-list": parse_release_list,
"tag-list": parse_tag_list,
"user-tag-list": parse_tag_list,
"rating": parse_rating,
"ipi-list": parse_element_list,
"alias-list": parse_alias_list,
"relation-list": parse_relation_list,
"annotation": parse_annotation}
result.update(parse_attributes(attribs, label))
result.update(parse_elements(elements, inner_els, label))
return result
def parse_relation_target(tgt):
attributes = parse_attributes(['id'], tgt)
if 'id' in attributes:
return (True, {'target-id': attributes['id']})
else:
return (True, {'target-id': tgt.text})
def parse_relation_list(rl):
attribs = ["target-type"]
ttype = parse_attributes(attribs, rl)
key = "%s-relation-list" % ttype["target-type"]
return (True, {key: [parse_relation(r) for r in rl]})
def parse_relation(relation):
result = {}
attribs = ["type", "type-id"]
elements = ["target", "direction", "begin", "end", "ended", "ordering-key"]
inner_els = {"area": parse_area,
"artist": parse_artist,
"instrument": parse_instrument,
"label": parse_label,
"place": parse_place,
"event": parse_event,
"recording": parse_recording,
"release": parse_release,
"release-group": parse_release_group,
"series": parse_series,
"attribute-list": parse_element_list,
"work": parse_work,
"target": parse_relation_target
}
result.update(parse_attributes(attribs, relation))
result.update(parse_elements(elements, inner_els, relation))
# We parse attribute-list again to get attributes that have both
# text and attribute values
result.update(parse_elements(['target-credit'], {"attribute-list": parse_relation_attribute_list}, relation))
return result
def parse_relation_attribute_list(attributelist):
ret = []
for attribute in attributelist:
ret.append(parse_relation_attribute_element(attribute))
return (True, {"attributes": ret})
def parse_relation_attribute_element(element):
# Parses an attribute into a dictionary containing an element
# {"attribute": <text value>} and also an additional element
# containing any xml attributes.
# e.g <attribute value="BuxWV 1">number</attribute>
# -> {"attribute": "number", "value": "BuxWV 1"}
result = {}
for attr in element.attrib:
if "{" in attr:
a = fixtag(attr, NS_MAP)[0]
else:
a = attr
result[a] = element.attrib[attr]
result["attribute"] = element.text
return result
def parse_release(release):
result = {}
attribs = ["id", "ext:score"]
elements = ["title", "status", "disambiguation", "quality", "country",
"barcode", "date", "packaging", "asin"]
inner_els = {"text-representation": parse_text_representation,
"artist-credit": parse_artist_credit,
"label-info-list": parse_label_info_list,
"medium-list": parse_medium_list,
"release-group": parse_release_group,
"tag-list": parse_tag_list,
"user-tag-list": parse_tag_list,
"relation-list": parse_relation_list,
"annotation": parse_annotation,
"cover-art-archive": parse_caa,
"release-event-list": parse_release_event_list}
result.update(parse_attributes(attribs, release))
result.update(parse_elements(elements, inner_els, release))
if "artist-credit" in result:
result["artist-credit-phrase"] = make_artist_credit(
result["artist-credit"])
return result
def parse_medium_list(ml):
"""medium-list results from search have an additional
<track-count> element containing the number of tracks
over all mediums. Optionally add this"""
medium_list = []
track_count = None
for m in ml:
tag = fixtag(m.tag, NS_MAP)[0]
if tag == "ws2:medium":
medium_list.append(parse_medium(m))
elif tag == "ws2:track-count":
track_count = int(m.text)
ret = {"medium-list": medium_list}
if track_count is not None:
ret["medium-track-count"] = track_count
return (True, ret)
def parse_release_event_list(rel):
return [parse_release_event(re) for re in rel]
def parse_release_event(event):
result = {}
elements = ["date"]
inner_els = {"area": parse_area}
result.update(parse_elements(elements, inner_els, event))
return result
def parse_medium(medium):
result = {}
elements = ["position", "format", "title"]
inner_els = {"disc-list": parse_disc_list,
"pregap": parse_track,
"track-list": parse_track_list,
"data-track-list": parse_track_list}
result.update(parse_elements(elements, inner_els, medium))
return result
def parse_disc_list(dl):
return [parse_disc(d) for d in dl]
def parse_text_representation(textr):
return parse_elements(["language", "script"], {}, textr)
def parse_release_group(rg):
result = {}
attribs = ["id", "type", "ext:score"]
elements = ["title", "user-rating", "first-release-date", "primary-type",
"disambiguation"]
inner_els = {"artist-credit": parse_artist_credit,
"release-list": parse_release_list,
"tag-list": parse_tag_list,
"user-tag-list": parse_tag_list,
"secondary-type-list": parse_element_list,
"relation-list": parse_relation_list,
"rating": parse_rating,
"annotation": parse_annotation}
result.update(parse_attributes(attribs, rg))
result.update(parse_elements(elements, inner_els, rg))
if "artist-credit" in result:
result["artist-credit-phrase"] = make_artist_credit(result["artist-credit"])
return result
def parse_recording(recording):
result = {}
attribs = ["id", "ext:score"]
elements = ["title", "length", "user-rating", "disambiguation", "video"]
inner_els = {"artist-credit": parse_artist_credit,
"release-list": parse_release_list,
"tag-list": parse_tag_list,
"user-tag-list": parse_tag_list,
"rating": parse_rating,
"isrc-list": parse_external_id_list,
"relation-list": parse_relation_list,
"annotation": parse_annotation}
result.update(parse_attributes(attribs, recording))
result.update(parse_elements(elements, inner_els, recording))
if "artist-credit" in result:
result["artist-credit-phrase"] = make_artist_credit(result["artist-credit"])
return result
def parse_series_list(sl):
return [parse_series(s) for s in sl]
def parse_series(series):
result = {}
attribs = ["id", "type", "ext:score"]
elements = ["name", "disambiguation"]
inner_els = {"alias-list": parse_alias_list,
"relation-list": parse_relation_list,
"annotation": parse_annotation}
result.update(parse_attributes(attribs, series))
result.update(parse_elements(elements, inner_els, series))
return result
def parse_external_id_list(pl):
return [parse_attributes(["id"], p)["id"] for p in pl]
def parse_element_list(el):
return [e.text for e in el]
def parse_work_list(wl):
return [parse_work(w) for w in wl]
def parse_work(work):
result = {}
attribs = ["id", "ext:score", "type"]
elements = ["title", "user-rating", "language", "iswc", "disambiguation"]
inner_els = {"tag-list": parse_tag_list,
"user-tag-list": parse_tag_list,
"rating": parse_rating,
"alias-list": parse_alias_list,
"iswc-list": parse_element_list,
"relation-list": parse_relation_list,
"annotation": parse_response_message,
"attribute-list": parse_work_attribute_list
}
result.update(parse_attributes(attribs, work))
result.update(parse_elements(elements, inner_els, work))
return result
def parse_work_attribute_list(wal):
return [parse_work_attribute(wa) for wa in wal]
def parse_work_attribute(wa):
attribs = ["type"]
typeinfo = parse_attributes(attribs, wa)
result = {}
if typeinfo:
result = {"attribute": typeinfo["type"],
"value": wa.text}
return result
def parse_url_list(ul):
return [parse_url(u) for u in ul]
def parse_url(url):
result = {}
attribs = ["id"]
elements = ["resource"]
inner_els = {"relation-list": parse_relation_list}
result.update(parse_attributes(attribs, url))
result.update(parse_elements(elements, inner_els, url))
return result
def parse_disc(disc):
result = {}
attribs = ["id"]
elements = ["sectors"]
inner_els = {"release-list": parse_release_list,
"offset-list": parse_offset_list
}
result.update(parse_attributes(attribs, disc))
result.update(parse_elements(elements, inner_els, disc))
return result
def parse_cdstub(cdstub):
result = {}
attribs = ["id"]
elements = ["title", "artist", "barcode"]
inner_els = {"track-list": parse_track_list}
result.update(parse_attributes(attribs, cdstub))
result.update(parse_elements(elements, inner_els, cdstub))
return result
def parse_offset_list(ol):
return [int(o.text) for o in ol]
def parse_instrument_list(rl):
result = []
for r in rl:
result.append(parse_instrument(r))
return result
def parse_release_list(rl):
result = []
for r in rl:
result.append(parse_release(r))
return result
def parse_release_group_list(rgl):
result = []
for rg in rgl:
result.append(parse_release_group(rg))
return result
def parse_isrc(isrc):
result = {}
attribs = ["id"]
inner_els = {"recording-list": parse_recording_list}
result.update(parse_attributes(attribs, isrc))
result.update(parse_elements([], inner_els, isrc))
return result
def parse_recording_list(recs):
result = []
for r in recs:
result.append(parse_recording(r))
return result
def parse_artist_credit(ac):
result = []
for namecredit in ac:
result.append(parse_name_credit(namecredit))
join = parse_attributes(["joinphrase"], namecredit)
if "joinphrase" in join:
result.append(join["joinphrase"])
return result
def parse_name_credit(nc):
result = {}
elements = ["name"]
inner_els = {"artist": parse_artist}
result.update(parse_elements(elements, inner_els, nc))
return result
def parse_label_info_list(lil):
result = []
for li in lil:
result.append(parse_label_info(li))
return result
def parse_label_info(li):
result = {}
elements = ["catalog-number"]
inner_els = {"label": parse_label}
result.update(parse_elements(elements, inner_els, li))
return result
def parse_track_list(tl):
result = []
for t in tl:
result.append(parse_track(t))
return result
def parse_track(track):
result = {}
attribs = ["id"]
elements = ["number", "position", "title", "length"]
inner_els = {"recording": parse_recording,
"artist-credit": parse_artist_credit}
result.update(parse_attributes(attribs, track))
result.update(parse_elements(elements, inner_els, track))
if "artist-credit" in result.get("recording", {}) and "artist-credit" not in result:
result["artist-credit"] = result["recording"]["artist-credit"]
if "artist-credit" in result:
result["artist-credit-phrase"] = make_artist_credit(result["artist-credit"])
# Make a length field that contains track length or recording length
track_or_recording = None
if "length" in result:
track_or_recording = result["length"]
elif result.get("recording", {}).get("length"):
track_or_recording = result.get("recording", {}).get("length")
if track_or_recording:
result["track_or_recording_length"] = track_or_recording
return result
def parse_tag_list(tl):
return [parse_tag(t) for t in tl]
def parse_tag(tag):
result = {}
attribs = ["count"]
elements = ["name"]
result.update(parse_attributes(attribs, tag))
result.update(parse_elements(elements, {}, tag))
return result
def parse_rating(rating):
result = {}
attribs = ["votes-count"]
result.update(parse_attributes(attribs, rating))
result["rating"] = rating.text
return result
def parse_alias_list(al):
return [parse_alias(a) for a in al]
def parse_alias(alias):
result = {}
attribs = ["locale", "sort-name", "type", "primary",
"begin-date", "end-date"]
result.update(parse_attributes(attribs, alias))
result["alias"] = alias.text
return result
def parse_caa(caa_element):
result = {}
elements = ["artwork", "count", "front", "back", "darkened"]
result.update(parse_elements(elements, {}, caa_element))
return result
###
def make_barcode_request(release2barcode):
NS = "http://musicbrainz.org/ns/mmd-2.0#"
root = ET.Element("{%s}metadata" % NS)
rel_list = ET.SubElement(root, "{%s}release-list" % NS)
for release, barcode in release2barcode.items():
rel_xml = ET.SubElement(rel_list, "{%s}release" % NS)
bar_xml = ET.SubElement(rel_xml, "{%s}barcode" % NS)
rel_xml.set("{%s}id" % NS, release)
bar_xml.text = barcode
return ET.tostring(root, "utf-8")
def make_tag_request(**kwargs):
NS = "http://musicbrainz.org/ns/mmd-2.0#"
root = ET.Element("{%s}metadata" % NS)
for entity_type in ['artist', 'label', 'place', 'recording', 'release', 'release_group', 'work']:
entity_tags = kwargs.pop(entity_type + '_tags', None)
if entity_tags is not None:
e_list = ET.SubElement(root, "{%s}%s-list" % (NS, entity_type.replace('_', '-')))
for e, tags in entity_tags.items():
e_xml = ET.SubElement(e_list, "{%s}%s" % (NS, entity_type.replace('_', '-')))
e_xml.set("{%s}id" % NS, e)
taglist = ET.SubElement(e_xml, "{%s}user-tag-list" % NS)
for tag in tags:
usertag_xml = ET.SubElement(taglist, "{%s}user-tag" % NS)
name_xml = ET.SubElement(usertag_xml, "{%s}name" % NS)
name_xml.text = tag
if kwargs.keys():
raise TypeError("make_tag_request() got an unexpected keyword argument '%s'" % kwargs.popitem()[0])
return ET.tostring(root, "utf-8")
def make_rating_request(**kwargs):
NS = "http://musicbrainz.org/ns/mmd-2.0#"
root = ET.Element("{%s}metadata" % NS)
for entity_type in ['artist', 'label', 'recording', 'release_group', 'work']:
entity_ratings = kwargs.pop(entity_type + '_ratings', None)
if entity_ratings is not None:
e_list = ET.SubElement(root, "{%s}%s-list" % (NS, entity_type.replace('_', '-')))
for e, rating in entity_ratings.items():
e_xml = ET.SubElement(e_list, "{%s}%s" % (NS, entity_type.replace('_', '-')))
e_xml.set("{%s}id" % NS, e)
rating_xml = ET.SubElement(e_xml, "{%s}user-rating" % NS)
rating_xml.text = str(rating)
if kwargs.keys():
raise TypeError("make_rating_request() got an unexpected keyword argument '%s'" % kwargs.popitem()[0])
return ET.tostring(root, "utf-8")
def make_isrc_request(recording2isrcs):
NS = "http://musicbrainz.org/ns/mmd-2.0#"
root = ET.Element("{%s}metadata" % NS)
rec_list = ET.SubElement(root, "{%s}recording-list" % NS)
for rec, isrcs in recording2isrcs.items():
if len(isrcs) > 0:
rec_xml = ET.SubElement(rec_list, "{%s}recording" % NS)
rec_xml.set("{%s}id" % NS, rec)
isrc_list_xml = ET.SubElement(rec_xml, "{%s}isrc-list" % NS)
isrc_list_xml.set("{%s}count" % NS, str(len(isrcs)))
for isrc in isrcs:
isrc_xml = ET.SubElement(isrc_list_xml, "{%s}isrc" % NS)
isrc_xml.set("{%s}id" % NS, isrc)
return ET.tostring(root, "utf-8")