plexpy/lib/mutagen/flac.py
2015-02-22 18:32:51 +02:00

839 lines
28 KiB
Python

# -*- coding: utf-8 -*-
# Copyright (C) 2005 Joe Wreschnig
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
"""Read and write FLAC Vorbis comments and stream information.
Read more about FLAC at http://flac.sourceforge.net.
FLAC supports arbitrary metadata blocks. The two most interesting ones
are the FLAC stream information block, and the Vorbis comment block;
these are also the only ones Mutagen can currently read.
This module does not handle Ogg FLAC files.
Based off documentation available at
http://flac.sourceforge.net/format.html
"""
__all__ = ["FLAC", "Open", "delete"]
import struct
from ._vorbis import VCommentDict
import mutagen
from ._compat import cBytesIO, endswith, chr_
from mutagen._util import insert_bytes, MutagenError
from mutagen.id3 import BitPaddedInt
from functools import reduce
class error(IOError, MutagenError):
pass
class FLACNoHeaderError(error):
pass
class FLACVorbisError(ValueError, error):
pass
def to_int_be(data):
"""Convert an arbitrarily-long string to a long using big-endian
byte order."""
return reduce(lambda a, b: (a << 8) + b, bytearray(data), 0)
class StrictFileObject(object):
"""Wraps a file-like object and raises an exception if the requested
amount of data to read isn't returned."""
def __init__(self, fileobj):
self._fileobj = fileobj
for m in ["close", "tell", "seek", "write", "name"]:
if hasattr(fileobj, m):
setattr(self, m, getattr(fileobj, m))
def read(self, size=-1):
data = self._fileobj.read(size)
if size >= 0 and len(data) != size:
raise error("file said %d bytes, read %d bytes" % (
size, len(data)))
return data
def tryread(self, *args):
return self._fileobj.read(*args)
class MetadataBlock(object):
"""A generic block of FLAC metadata.
This class is extended by specific used as an ancestor for more specific
blocks, and also as a container for data blobs of unknown blocks.
Attributes:
* data -- raw binary data for this block
"""
_distrust_size = False
def __init__(self, data):
"""Parse the given data string or file-like as a metadata block.
The metadata header should not be included."""
if data is not None:
if not isinstance(data, StrictFileObject):
if isinstance(data, bytes):
data = cBytesIO(data)
elif not hasattr(data, 'read'):
raise TypeError(
"StreamInfo requires string data or a file-like")
data = StrictFileObject(data)
self.load(data)
def load(self, data):
self.data = data.read()
def write(self):
return self.data
@staticmethod
def writeblocks(blocks):
"""Render metadata block as a byte string."""
data = []
codes = [[block.code, block.write()] for block in blocks]
codes[-1][0] |= 128
for code, datum in codes:
byte = chr_(code)
if len(datum) > 2 ** 24:
raise error("block is too long to write")
length = struct.pack(">I", len(datum))[-3:]
data.append(byte + length + datum)
return b"".join(data)
@staticmethod
def group_padding(blocks):
"""Consolidate FLAC padding metadata blocks.
The overall size of the rendered blocks does not change, so
this adds several bytes of padding for each merged block.
"""
paddings = [b for b in blocks if isinstance(b, Padding)]
for p in paddings:
blocks.remove(p)
# total padding size is the sum of padding sizes plus 4 bytes
# per removed header.
size = sum(padding.length for padding in paddings)
padding = Padding()
padding.length = size + 4 * (len(paddings) - 1)
blocks.append(padding)
class StreamInfo(MetadataBlock, mutagen.StreamInfo):
"""FLAC stream information.
This contains information about the audio data in the FLAC file.
Unlike most stream information objects in Mutagen, changes to this
one will rewritten to the file when it is saved. Unless you are
actually changing the audio stream itself, don't change any
attributes of this block.
Attributes:
* min_blocksize -- minimum audio block size
* max_blocksize -- maximum audio block size
* sample_rate -- audio sample rate in Hz
* channels -- audio channels (1 for mono, 2 for stereo)
* bits_per_sample -- bits per sample
* total_samples -- total samples in file
* length -- audio length in seconds
"""
code = 0
def __eq__(self, other):
try:
return (self.min_blocksize == other.min_blocksize and
self.max_blocksize == other.max_blocksize and
self.sample_rate == other.sample_rate and
self.channels == other.channels and
self.bits_per_sample == other.bits_per_sample and
self.total_samples == other.total_samples)
except:
return False
__hash__ = MetadataBlock.__hash__
def load(self, data):
self.min_blocksize = int(to_int_be(data.read(2)))
self.max_blocksize = int(to_int_be(data.read(2)))
self.min_framesize = int(to_int_be(data.read(3)))
self.max_framesize = int(to_int_be(data.read(3)))
# first 16 bits of sample rate
sample_first = to_int_be(data.read(2))
# last 4 bits of sample rate, 3 of channels, first 1 of bits/sample
sample_channels_bps = to_int_be(data.read(1))
# last 4 of bits/sample, 36 of total samples
bps_total = to_int_be(data.read(5))
sample_tail = sample_channels_bps >> 4
self.sample_rate = int((sample_first << 4) + sample_tail)
if not self.sample_rate:
raise error("A sample rate value of 0 is invalid")
self.channels = int(((sample_channels_bps >> 1) & 7) + 1)
bps_tail = bps_total >> 36
bps_head = (sample_channels_bps & 1) << 4
self.bits_per_sample = int(bps_head + bps_tail + 1)
self.total_samples = bps_total & 0xFFFFFFFFF
self.length = self.total_samples / float(self.sample_rate)
self.md5_signature = to_int_be(data.read(16))
def write(self):
f = cBytesIO()
f.write(struct.pack(">I", self.min_blocksize)[-2:])
f.write(struct.pack(">I", self.max_blocksize)[-2:])
f.write(struct.pack(">I", self.min_framesize)[-3:])
f.write(struct.pack(">I", self.max_framesize)[-3:])
# first 16 bits of sample rate
f.write(struct.pack(">I", self.sample_rate >> 4)[-2:])
# 4 bits sample, 3 channel, 1 bps
byte = (self.sample_rate & 0xF) << 4
byte += ((self.channels - 1) & 7) << 1
byte += ((self.bits_per_sample - 1) >> 4) & 1
f.write(chr_(byte))
# 4 bits of bps, 4 of sample count
byte = ((self.bits_per_sample - 1) & 0xF) << 4
byte += (self.total_samples >> 32) & 0xF
f.write(chr_(byte))
# last 32 of sample count
f.write(struct.pack(">I", self.total_samples & 0xFFFFFFFF))
# MD5 signature
sig = self.md5_signature
f.write(struct.pack(
">4I", (sig >> 96) & 0xFFFFFFFF, (sig >> 64) & 0xFFFFFFFF,
(sig >> 32) & 0xFFFFFFFF, sig & 0xFFFFFFFF))
return f.getvalue()
def pprint(self):
return "FLAC, %.2f seconds, %d Hz" % (self.length, self.sample_rate)
class SeekPoint(tuple):
"""A single seek point in a FLAC file.
Placeholder seek points have first_sample of 0xFFFFFFFFFFFFFFFFL,
and byte_offset and num_samples undefined. Seek points must be
sorted in ascending order by first_sample number. Seek points must
be unique by first_sample number, except for placeholder
points. Placeholder points must occur last in the table and there
may be any number of them.
Attributes:
* first_sample -- sample number of first sample in the target frame
* byte_offset -- offset from first frame to target frame
* num_samples -- number of samples in target frame
"""
def __new__(cls, first_sample, byte_offset, num_samples):
return super(cls, SeekPoint).__new__(
cls, (first_sample, byte_offset, num_samples))
first_sample = property(lambda self: self[0])
byte_offset = property(lambda self: self[1])
num_samples = property(lambda self: self[2])
class SeekTable(MetadataBlock):
"""Read and write FLAC seek tables.
Attributes:
* seekpoints -- list of SeekPoint objects
"""
__SEEKPOINT_FORMAT = '>QQH'
__SEEKPOINT_SIZE = struct.calcsize(__SEEKPOINT_FORMAT)
code = 3
def __init__(self, data):
self.seekpoints = []
super(SeekTable, self).__init__(data)
def __eq__(self, other):
try:
return (self.seekpoints == other.seekpoints)
except (AttributeError, TypeError):
return False
__hash__ = MetadataBlock.__hash__
def load(self, data):
self.seekpoints = []
sp = data.tryread(self.__SEEKPOINT_SIZE)
while len(sp) == self.__SEEKPOINT_SIZE:
self.seekpoints.append(SeekPoint(
*struct.unpack(self.__SEEKPOINT_FORMAT, sp)))
sp = data.tryread(self.__SEEKPOINT_SIZE)
def write(self):
f = cBytesIO()
for seekpoint in self.seekpoints:
packed = struct.pack(
self.__SEEKPOINT_FORMAT,
seekpoint.first_sample, seekpoint.byte_offset,
seekpoint.num_samples)
f.write(packed)
return f.getvalue()
def __repr__(self):
return "<%s seekpoints=%r>" % (type(self).__name__, self.seekpoints)
class VCFLACDict(VCommentDict):
"""Read and write FLAC Vorbis comments.
FLACs don't use the framing bit at the end of the comment block.
So this extends VCommentDict to not use the framing bit.
"""
code = 4
_distrust_size = True
def load(self, data, errors='replace', framing=False):
super(VCFLACDict, self).load(data, errors=errors, framing=framing)
def write(self, framing=False):
return super(VCFLACDict, self).write(framing=framing)
class CueSheetTrackIndex(tuple):
"""Index for a track in a cuesheet.
For CD-DA, an index_number of 0 corresponds to the track
pre-gap. The first index in a track must have a number of 0 or 1,
and subsequently, index_numbers must increase by 1. Index_numbers
must be unique within a track. And index_offset must be evenly
divisible by 588 samples.
Attributes:
* index_number -- index point number
* index_offset -- offset in samples from track start
"""
def __new__(cls, index_number, index_offset):
return super(cls, CueSheetTrackIndex).__new__(
cls, (index_number, index_offset))
index_number = property(lambda self: self[0])
index_offset = property(lambda self: self[1])
class CueSheetTrack(object):
"""A track in a cuesheet.
For CD-DA, track_numbers must be 1-99, or 170 for the
lead-out. Track_numbers must be unique within a cue sheet. There
must be atleast one index in every track except the lead-out track
which must have none.
Attributes:
* track_number -- track number
* start_offset -- track offset in samples from start of FLAC stream
* isrc -- ISRC code
* type -- 0 for audio, 1 for digital data
* pre_emphasis -- true if the track is recorded with pre-emphasis
* indexes -- list of CueSheetTrackIndex objects
"""
def __init__(self, track_number, start_offset, isrc='', type_=0,
pre_emphasis=False):
self.track_number = track_number
self.start_offset = start_offset
self.isrc = isrc
self.type = type_
self.pre_emphasis = pre_emphasis
self.indexes = []
def __eq__(self, other):
try:
return (self.track_number == other.track_number and
self.start_offset == other.start_offset and
self.isrc == other.isrc and
self.type == other.type and
self.pre_emphasis == other.pre_emphasis and
self.indexes == other.indexes)
except (AttributeError, TypeError):
return False
__hash__ = object.__hash__
def __repr__(self):
return (("<%s number=%r, offset=%d, isrc=%r, type=%r, "
"pre_emphasis=%r, indexes=%r)>") %
(type(self).__name__, self.track_number, self.start_offset,
self.isrc, self.type, self.pre_emphasis, self.indexes))
class CueSheet(MetadataBlock):
"""Read and write FLAC embedded cue sheets.
Number of tracks should be from 1 to 100. There should always be
exactly one lead-out track and that track must be the last track
in the cue sheet.
Attributes:
* media_catalog_number -- media catalog number in ASCII
* lead_in_samples -- number of lead-in samples
* compact_disc -- true if the cuesheet corresponds to a compact disc
* tracks -- list of CueSheetTrack objects
* lead_out -- lead-out as CueSheetTrack or None if lead-out was not found
"""
__CUESHEET_FORMAT = '>128sQB258xB'
__CUESHEET_SIZE = struct.calcsize(__CUESHEET_FORMAT)
__CUESHEET_TRACK_FORMAT = '>QB12sB13xB'
__CUESHEET_TRACK_SIZE = struct.calcsize(__CUESHEET_TRACK_FORMAT)
__CUESHEET_TRACKINDEX_FORMAT = '>QB3x'
__CUESHEET_TRACKINDEX_SIZE = struct.calcsize(__CUESHEET_TRACKINDEX_FORMAT)
code = 5
media_catalog_number = b''
lead_in_samples = 88200
compact_disc = True
def __init__(self, data):
self.tracks = []
super(CueSheet, self).__init__(data)
def __eq__(self, other):
try:
return (self.media_catalog_number == other.media_catalog_number and
self.lead_in_samples == other.lead_in_samples and
self.compact_disc == other.compact_disc and
self.tracks == other.tracks)
except (AttributeError, TypeError):
return False
__hash__ = MetadataBlock.__hash__
def load(self, data):
header = data.read(self.__CUESHEET_SIZE)
media_catalog_number, lead_in_samples, flags, num_tracks = \
struct.unpack(self.__CUESHEET_FORMAT, header)
self.media_catalog_number = media_catalog_number.rstrip(b'\0')
self.lead_in_samples = lead_in_samples
self.compact_disc = bool(flags & 0x80)
self.tracks = []
for i in range(num_tracks):
track = data.read(self.__CUESHEET_TRACK_SIZE)
start_offset, track_number, isrc_padded, flags, num_indexes = \
struct.unpack(self.__CUESHEET_TRACK_FORMAT, track)
isrc = isrc_padded.rstrip(b'\0')
type_ = (flags & 0x80) >> 7
pre_emphasis = bool(flags & 0x40)
val = CueSheetTrack(
track_number, start_offset, isrc, type_, pre_emphasis)
for j in range(num_indexes):
index = data.read(self.__CUESHEET_TRACKINDEX_SIZE)
index_offset, index_number = struct.unpack(
self.__CUESHEET_TRACKINDEX_FORMAT, index)
val.indexes.append(
CueSheetTrackIndex(index_number, index_offset))
self.tracks.append(val)
def write(self):
f = cBytesIO()
flags = 0
if self.compact_disc:
flags |= 0x80
packed = struct.pack(
self.__CUESHEET_FORMAT, self.media_catalog_number,
self.lead_in_samples, flags, len(self.tracks))
f.write(packed)
for track in self.tracks:
track_flags = 0
track_flags |= (track.type & 1) << 7
if track.pre_emphasis:
track_flags |= 0x40
track_packed = struct.pack(
self.__CUESHEET_TRACK_FORMAT, track.start_offset,
track.track_number, track.isrc, track_flags,
len(track.indexes))
f.write(track_packed)
for index in track.indexes:
index_packed = struct.pack(
self.__CUESHEET_TRACKINDEX_FORMAT,
index.index_offset, index.index_number)
f.write(index_packed)
return f.getvalue()
def __repr__(self):
return (("<%s media_catalog_number=%r, lead_in=%r, compact_disc=%r, "
"tracks=%r>") %
(type(self).__name__, self.media_catalog_number,
self.lead_in_samples, self.compact_disc, self.tracks))
class Picture(MetadataBlock):
"""Read and write FLAC embed pictures.
Attributes:
* type -- picture type (same as types for ID3 APIC frames)
* mime -- MIME type of the picture
* desc -- picture's description
* width -- width in pixels
* height -- height in pixels
* depth -- color depth in bits-per-pixel
* colors -- number of colors for indexed palettes (like GIF),
0 for non-indexed
* data -- picture data
"""
code = 6
_distrust_size = True
def __init__(self, data=None):
self.type = 0
self.mime = u''
self.desc = u''
self.width = 0
self.height = 0
self.depth = 0
self.colors = 0
self.data = b''
super(Picture, self).__init__(data)
def __eq__(self, other):
try:
return (self.type == other.type and
self.mime == other.mime and
self.desc == other.desc and
self.width == other.width and
self.height == other.height and
self.depth == other.depth and
self.colors == other.colors and
self.data == other.data)
except (AttributeError, TypeError):
return False
__hash__ = MetadataBlock.__hash__
def load(self, data):
self.type, length = struct.unpack('>2I', data.read(8))
self.mime = data.read(length).decode('UTF-8', 'replace')
length, = struct.unpack('>I', data.read(4))
self.desc = data.read(length).decode('UTF-8', 'replace')
(self.width, self.height, self.depth,
self.colors, length) = struct.unpack('>5I', data.read(20))
self.data = data.read(length)
def write(self):
f = cBytesIO()
mime = self.mime.encode('UTF-8')
f.write(struct.pack('>2I', self.type, len(mime)))
f.write(mime)
desc = self.desc.encode('UTF-8')
f.write(struct.pack('>I', len(desc)))
f.write(desc)
f.write(struct.pack('>5I', self.width, self.height, self.depth,
self.colors, len(self.data)))
f.write(self.data)
return f.getvalue()
def __repr__(self):
return "<%s '%s' (%d bytes)>" % (type(self).__name__, self.mime,
len(self.data))
class Padding(MetadataBlock):
"""Empty padding space for metadata blocks.
To avoid rewriting the entire FLAC file when editing comments,
metadata is often padded. Padding should occur at the end, and no
more than one padding block should be in any FLAC file. Mutagen
handles this with MetadataBlock.group_padding.
"""
code = 1
def __init__(self, data=b""):
super(Padding, self).__init__(data)
def load(self, data):
self.length = len(data.read())
def write(self):
try:
return b"\x00" * self.length
# On some 64 bit platforms this won't generate a MemoryError
# or OverflowError since you might have enough RAM, but it
# still generates a ValueError. On other 64 bit platforms,
# this will still succeed for extremely large values.
# Those should never happen in the real world, and if they
# do, writeblocks will catch it.
except (OverflowError, ValueError, MemoryError):
raise error("cannot write %d bytes" % self.length)
def __eq__(self, other):
return isinstance(other, Padding) and self.length == other.length
__hash__ = MetadataBlock.__hash__
def __repr__(self):
return "<%s (%d bytes)>" % (type(self).__name__, self.length)
class FLAC(mutagen.FileType):
"""A FLAC audio file.
Attributes:
* info -- stream information (length, bitrate, sample rate)
* tags -- metadata tags, if any
* cuesheet -- CueSheet object, if any
* seektable -- SeekTable object, if any
* pictures -- list of embedded pictures
"""
_mimes = ["audio/x-flac", "application/x-flac"]
METADATA_BLOCKS = [StreamInfo, Padding, None, SeekTable, VCFLACDict,
CueSheet, Picture]
"""Known metadata block types, indexed by ID."""
@staticmethod
def score(filename, fileobj, header_data):
return (header_data.startswith(b"fLaC") +
endswith(filename.lower(), ".flac") * 3)
def __read_metadata_block(self, fileobj):
byte = ord(fileobj.read(1))
size = to_int_be(fileobj.read(3))
code = byte & 0x7F
last_block = bool(byte & 0x80)
try:
block_type = self.METADATA_BLOCKS[code] or MetadataBlock
except IndexError:
block_type = MetadataBlock
if block_type._distrust_size:
# Some jackass is writing broken Metadata block length
# for Vorbis comment blocks, and the FLAC reference
# implementaton can parse them (mostly by accident),
# so we have to too. Instead of parsing the size
# given, parse an actual Vorbis comment, leaving
# fileobj in the right position.
# http://code.google.com/p/mutagen/issues/detail?id=52
# ..same for the Picture block:
# http://code.google.com/p/mutagen/issues/detail?id=106
block = block_type(fileobj)
else:
data = fileobj.read(size)
block = block_type(data)
block.code = code
if block.code == VCFLACDict.code:
if self.tags is None:
self.tags = block
else:
raise FLACVorbisError("> 1 Vorbis comment block found")
elif block.code == CueSheet.code:
if self.cuesheet is None:
self.cuesheet = block
else:
raise error("> 1 CueSheet block found")
elif block.code == SeekTable.code:
if self.seektable is None:
self.seektable = block
else:
raise error("> 1 SeekTable block found")
self.metadata_blocks.append(block)
return not last_block
def add_tags(self):
"""Add a Vorbis comment block to the file."""
if self.tags is None:
self.tags = VCFLACDict()
self.metadata_blocks.append(self.tags)
else:
raise FLACVorbisError("a Vorbis comment already exists")
add_vorbiscomment = add_tags
def delete(self, filename=None):
"""Remove Vorbis comments from a file.
If no filename is given, the one most recently loaded is used.
"""
if filename is None:
filename = self.filename
for s in list(self.metadata_blocks):
if isinstance(s, VCFLACDict):
self.metadata_blocks.remove(s)
self.tags = None
self.save()
break
vc = property(lambda s: s.tags, doc="Alias for tags; don't use this.")
def load(self, filename):
"""Load file information from a filename."""
self.metadata_blocks = []
self.tags = None
self.cuesheet = None
self.seektable = None
self.filename = filename
fileobj = StrictFileObject(open(filename, "rb"))
try:
self.__check_header(fileobj)
while self.__read_metadata_block(fileobj):
pass
finally:
fileobj.close()
try:
self.metadata_blocks[0].length
except (AttributeError, IndexError):
raise FLACNoHeaderError("Stream info block not found")
@property
def info(self):
return self.metadata_blocks[0]
def add_picture(self, picture):
"""Add a new picture to the file."""
self.metadata_blocks.append(picture)
def clear_pictures(self):
"""Delete all pictures from the file."""
blocks = [b for b in self.metadata_blocks if b.code != Picture.code]
self.metadata_blocks = blocks
@property
def pictures(self):
"""List of embedded pictures"""
return [b for b in self.metadata_blocks if b.code == Picture.code]
def save(self, filename=None, deleteid3=False):
"""Save metadata blocks to a file.
If no filename is given, the one most recently loaded is used.
"""
if filename is None:
filename = self.filename
f = open(filename, 'rb+')
try:
# Ensure we've got padding at the end, and only at the end.
# If adding makes it too large, we'll scale it down later.
self.metadata_blocks.append(Padding(b'\x00' * 1020))
MetadataBlock.group_padding(self.metadata_blocks)
header = self.__check_header(f)
# "fLaC" and maybe ID3
available = self.__find_audio_offset(f) - header
data = MetadataBlock.writeblocks(self.metadata_blocks)
# Delete ID3v2
if deleteid3 and header > 4:
available += header - 4
header = 4
if len(data) > available:
# If we have too much data, see if we can reduce padding.
padding = self.metadata_blocks[-1]
newlength = padding.length - (len(data) - available)
if newlength > 0:
padding.length = newlength
data = MetadataBlock.writeblocks(self.metadata_blocks)
assert len(data) == available
elif len(data) < available:
# If we have too little data, increase padding.
self.metadata_blocks[-1].length += (available - len(data))
data = MetadataBlock.writeblocks(self.metadata_blocks)
assert len(data) == available
if len(data) != available:
# We couldn't reduce the padding enough.
diff = (len(data) - available)
insert_bytes(f, diff, header)
f.seek(header - 4)
f.write(b"fLaC" + data)
# Delete ID3v1
if deleteid3:
try:
f.seek(-128, 2)
except IOError:
pass
else:
if f.read(3) == b"TAG":
f.seek(-128, 2)
f.truncate()
finally:
f.close()
def __find_audio_offset(self, fileobj):
byte = 0x00
while not (byte & 0x80):
byte = ord(fileobj.read(1))
size = to_int_be(fileobj.read(3))
try:
block_type = self.METADATA_BLOCKS[byte & 0x7F]
except IndexError:
block_type = None
if block_type and block_type._distrust_size:
# See comments in read_metadata_block; the size can't
# be trusted for Vorbis comment blocks and Picture block
block_type(fileobj)
else:
fileobj.read(size)
return fileobj.tell()
def __check_header(self, fileobj):
size = 4
header = fileobj.read(4)
if header != b"fLaC":
size = None
if header[:3] == b"ID3":
size = 14 + BitPaddedInt(fileobj.read(6)[2:])
fileobj.seek(size - 4)
if fileobj.read(4) != b"fLaC":
size = None
if size is None:
raise FLACNoHeaderError(
"%r is not a valid FLAC file" % fileobj.name)
return size
Open = FLAC
def delete(filename):
"""Remove tags from a file."""
FLAC(filename).delete()