nzbToMedia/libs/pysrt/srtitem.py
2014-06-27 17:04:44 +09:30

102 lines
3.2 KiB
Python

# -*- coding: utf-8 -*-
"""
SubRip's subtitle parser
"""
from pysrt.srtexc import InvalidItem, InvalidIndex
from pysrt.srttime import SubRipTime
from pysrt.comparablemixin import ComparableMixin
from pysrt.compat import str, is_py2
import re
class SubRipItem(ComparableMixin):
"""
SubRipItem(index, start, end, text, position)
index -> int: index of item in file. 0 by default.
start, end -> SubRipTime or coercible.
text -> unicode: text content for item.
position -> unicode: raw srt/vtt "display coordinates" string
"""
ITEM_PATTERN = str('%s\n%s --> %s%s\n%s\n')
TIMESTAMP_SEPARATOR = '-->'
def __init__(self, index=0, start=None, end=None, text='', position=''):
try:
self.index = int(index)
except (TypeError, ValueError): # try to cast as int, but it's not mandatory
self.index = index
self.start = SubRipTime.coerce(start or 0)
self.end = SubRipTime.coerce(end or 0)
self.position = str(position)
self.text = str(text)
@property
def duration(self):
return self.end - self.start
@property
def text_without_tags(self):
RE_TAG = re.compile(r'<[^>]*?>')
return RE_TAG.sub('', self.text)
@property
def characters_per_second(self):
characters_count = len(self.text_without_tags.replace('\n', ''))
try:
return characters_count / (self.duration.ordinal / 1000.0)
except ZeroDivisionError:
return 0.0
def __str__(self):
position = ' %s' % self.position if self.position.strip() else ''
return self.ITEM_PATTERN % (self.index, self.start, self.end,
position, self.text)
if is_py2:
__unicode__ = __str__
def __str__(self):
raise NotImplementedError('Use unicode() instead!')
def _cmpkey(self):
return (self.start, self.end)
def shift(self, *args, **kwargs):
"""
shift(hours, minutes, seconds, milliseconds, ratio)
Add given values to start and end attributes.
All arguments are optional and have a default value of 0.
"""
self.start.shift(*args, **kwargs)
self.end.shift(*args, **kwargs)
@classmethod
def from_string(cls, source):
return cls.from_lines(source.splitlines(True))
@classmethod
def from_lines(cls, lines):
if len(lines) < 2:
raise InvalidItem()
lines = [l.rstrip() for l in lines]
index = None
if cls.TIMESTAMP_SEPARATOR not in lines[0]:
index = lines.pop(0)
start, end, position = cls.split_timestamps(lines[0])
body = '\n'.join(lines[1:])
return cls(index, start, end, body, position)
@classmethod
def split_timestamps(cls, line):
timestamps = line.split(cls.TIMESTAMP_SEPARATOR)
if len(timestamps) != 2:
raise InvalidItem()
start, end_and_position = timestamps
end_and_position = end_and_position.lstrip().split(' ', 1)
end = end_and_position[0]
position = end_and_position[1] if len(end_and_position) > 1 else ''
return (s.strip() for s in (start, end, position))