mirror of
https://github.com/clinton-hall/nzbToMedia.git
synced 2025-01-24 03:42:59 -08:00
f05b09f349
Updates rarfile to 3.1 Updates stevedore to 3.5.0 Updates appdirs to 1.4.4 Updates click to 8.1.3 Updates decorator to 5.1.1 Updates dogpile.cache to 1.1.8 Updates pbr to 5.11.0 Updates pysrt to 1.1.2 Updates pytz to 2022.6 Adds importlib-metadata version 3.1.1 Adds typing-extensions version 4.1.1 Adds zipp version 3.11.0
313 lines
10 KiB
Python
313 lines
10 KiB
Python
# -*- coding: utf-8 -*-
|
|
import os
|
|
import sys
|
|
import codecs
|
|
|
|
try:
|
|
from collections import UserList
|
|
except ImportError:
|
|
from UserList import UserList
|
|
|
|
from itertools import chain
|
|
from copy import copy
|
|
|
|
from pysrt.srtexc import Error
|
|
from pysrt.srtitem import SubRipItem
|
|
from pysrt.compat import str
|
|
|
|
BOMS = ((codecs.BOM_UTF32_LE, 'utf_32_le'),
|
|
(codecs.BOM_UTF32_BE, 'utf_32_be'),
|
|
(codecs.BOM_UTF16_LE, 'utf_16_le'),
|
|
(codecs.BOM_UTF16_BE, 'utf_16_be'),
|
|
(codecs.BOM_UTF8, 'utf_8'))
|
|
CODECS_BOMS = dict((codec, str(bom, codec)) for bom, codec in BOMS)
|
|
BIGGER_BOM = max(len(bom) for bom, encoding in BOMS)
|
|
|
|
|
|
class SubRipFile(UserList, object):
|
|
"""
|
|
SubRip file descriptor.
|
|
|
|
Provide a pure Python mapping on all metadata.
|
|
|
|
SubRipFile(items, eol, path, encoding)
|
|
|
|
items -> list of SubRipItem. Default to [].
|
|
eol -> str: end of line character. Default to linesep used in opened file
|
|
if any else to os.linesep.
|
|
path -> str: path where file will be saved. To open an existant file see
|
|
SubRipFile.open.
|
|
encoding -> str: encoding used at file save. Default to utf-8.
|
|
"""
|
|
ERROR_PASS = 0
|
|
ERROR_LOG = 1
|
|
ERROR_RAISE = 2
|
|
|
|
DEFAULT_ENCODING = 'utf_8'
|
|
|
|
def __init__(self, items=None, eol=None, path=None, encoding='utf-8'):
|
|
UserList.__init__(self, items or [])
|
|
self._eol = eol
|
|
self.path = path
|
|
self.encoding = encoding
|
|
|
|
def _get_eol(self):
|
|
return self._eol or os.linesep
|
|
|
|
def _set_eol(self, eol):
|
|
self._eol = self._eol or eol
|
|
|
|
eol = property(_get_eol, _set_eol)
|
|
|
|
def slice(self, starts_before=None, starts_after=None, ends_before=None,
|
|
ends_after=None):
|
|
"""
|
|
slice([starts_before][, starts_after][, ends_before][, ends_after]) \
|
|
-> SubRipFile clone
|
|
|
|
All arguments are optional, and should be coercible to SubRipTime
|
|
object.
|
|
|
|
It reduce the set of subtitles to those that match match given time
|
|
constraints.
|
|
|
|
The returned set is a clone, but still contains references to original
|
|
subtitles. So if you shift this returned set, subs contained in the
|
|
original SubRipFile instance will be altered too.
|
|
|
|
Example:
|
|
>>> subs.slice(ends_after={'seconds': 20}).shift(seconds=2)
|
|
"""
|
|
clone = copy(self)
|
|
|
|
if starts_before:
|
|
clone.data = (i for i in clone.data if i.start < starts_before)
|
|
if starts_after:
|
|
clone.data = (i for i in clone.data if i.start > starts_after)
|
|
if ends_before:
|
|
clone.data = (i for i in clone.data if i.end < ends_before)
|
|
if ends_after:
|
|
clone.data = (i for i in clone.data if i.end > ends_after)
|
|
|
|
clone.data = list(clone.data)
|
|
return clone
|
|
|
|
def at(self, timestamp=None, **kwargs):
|
|
"""
|
|
at(timestamp) -> SubRipFile clone
|
|
|
|
timestamp argument should be coercible to SubRipFile object.
|
|
|
|
A specialization of slice. Return all subtiles visible at the
|
|
timestamp mark.
|
|
|
|
Example:
|
|
>>> subs.at((0, 0, 20, 0)).shift(seconds=2)
|
|
>>> subs.at(seconds=20).shift(seconds=2)
|
|
"""
|
|
time = timestamp or kwargs
|
|
return self.slice(starts_before=time, ends_after=time)
|
|
|
|
def shift(self, *args, **kwargs):
|
|
"""shift(hours, minutes, seconds, milliseconds, ratio)
|
|
|
|
Shift `start` and `end` attributes of each items of file either by
|
|
applying a ratio or by adding an offset.
|
|
|
|
`ratio` should be either an int or a float.
|
|
Example to convert subtitles from 23.9 fps to 25 fps:
|
|
>>> subs.shift(ratio=25/23.9)
|
|
|
|
All "time" arguments are optional and have a default value of 0.
|
|
Example to delay all subs from 2 seconds and half
|
|
>>> subs.shift(seconds=2, milliseconds=500)
|
|
"""
|
|
for item in self:
|
|
item.shift(*args, **kwargs)
|
|
|
|
def clean_indexes(self):
|
|
"""
|
|
clean_indexes()
|
|
|
|
Sort subs and reset their index attribute. Should be called after
|
|
destructive operations like split or such.
|
|
"""
|
|
self.sort()
|
|
for index, item in enumerate(self):
|
|
item.index = index + 1
|
|
|
|
@property
|
|
def text(self):
|
|
return '\n'.join(i.text for i in self)
|
|
|
|
@classmethod
|
|
def open(cls, path='', encoding=None, error_handling=ERROR_PASS):
|
|
"""
|
|
open([path, [encoding]])
|
|
|
|
If you do not provide any encoding, it can be detected if the file
|
|
contain a bit order mark, unless it is set to utf-8 as default.
|
|
"""
|
|
source_file, encoding = cls._open_unicode_file(path, claimed_encoding=encoding)
|
|
new_file = cls(path=path, encoding=encoding)
|
|
new_file.read(source_file, error_handling=error_handling)
|
|
source_file.close()
|
|
return new_file
|
|
|
|
@classmethod
|
|
def from_string(cls, source, **kwargs):
|
|
"""
|
|
from_string(source, **kwargs) -> SubRipFile
|
|
|
|
`source` -> a unicode instance or at least a str instance encoded with
|
|
`sys.getdefaultencoding()`
|
|
"""
|
|
error_handling = kwargs.pop('error_handling', None)
|
|
new_file = cls(**kwargs)
|
|
new_file.read(source.splitlines(True), error_handling=error_handling)
|
|
return new_file
|
|
|
|
def read(self, source_file, error_handling=ERROR_PASS):
|
|
"""
|
|
read(source_file, [error_handling])
|
|
|
|
This method parse subtitles contained in `source_file` and append them
|
|
to the current instance.
|
|
|
|
`source_file` -> Any iterable that yield unicode strings, like a file
|
|
opened with `codecs.open()` or an array of unicode.
|
|
"""
|
|
self.eol = self._guess_eol(source_file)
|
|
self.extend(self.stream(source_file, error_handling=error_handling))
|
|
return self
|
|
|
|
@classmethod
|
|
def stream(cls, source_file, error_handling=ERROR_PASS):
|
|
"""
|
|
stream(source_file, [error_handling])
|
|
|
|
This method yield SubRipItem instances a soon as they have been parsed
|
|
without storing them. It is a kind of SAX parser for .srt files.
|
|
|
|
`source_file` -> Any iterable that yield unicode strings, like a file
|
|
opened with `codecs.open()` or an array of unicode.
|
|
|
|
Example:
|
|
>>> import pysrt
|
|
>>> import codecs
|
|
>>> file = codecs.open('movie.srt', encoding='utf-8')
|
|
>>> for sub in pysrt.stream(file):
|
|
... sub.text += "\nHello !"
|
|
... print unicode(sub)
|
|
"""
|
|
string_buffer = []
|
|
for index, line in enumerate(chain(source_file, '\n')):
|
|
if line.strip():
|
|
string_buffer.append(line)
|
|
else:
|
|
source = string_buffer
|
|
string_buffer = []
|
|
if source and all(source):
|
|
try:
|
|
yield SubRipItem.from_lines(source)
|
|
except Error as error:
|
|
error.args += (''.join(source), )
|
|
cls._handle_error(error, error_handling, index)
|
|
|
|
def save(self, path=None, encoding=None, eol=None):
|
|
"""
|
|
save([path][, encoding][, eol])
|
|
|
|
Use initial path if no other provided.
|
|
Use initial encoding if no other provided.
|
|
Use initial eol if no other provided.
|
|
"""
|
|
path = path or self.path
|
|
encoding = encoding or self.encoding
|
|
|
|
save_file = codecs.open(path, 'w+', encoding=encoding)
|
|
self.write_into(save_file, eol=eol)
|
|
save_file.close()
|
|
|
|
def write_into(self, output_file, eol=None):
|
|
"""
|
|
write_into(output_file [, eol])
|
|
|
|
Serialize current state into `output_file`.
|
|
|
|
`output_file` -> Any instance that respond to `write()`, typically a
|
|
file object
|
|
"""
|
|
output_eol = eol or self.eol
|
|
|
|
for item in self:
|
|
string_repr = str(item)
|
|
if output_eol != '\n':
|
|
string_repr = string_repr.replace('\n', output_eol)
|
|
output_file.write(string_repr)
|
|
# Only add trailing eol if it's not already present.
|
|
# It was kept in the SubRipItem's text before but it really
|
|
# belongs here. Existing applications might give us subtitles
|
|
# which already contain a trailing eol though.
|
|
if not string_repr.endswith(2 * output_eol):
|
|
output_file.write(output_eol)
|
|
|
|
@classmethod
|
|
def _guess_eol(cls, string_iterable):
|
|
first_line = cls._get_first_line(string_iterable)
|
|
for eol in ('\r\n', '\r', '\n'):
|
|
if first_line.endswith(eol):
|
|
return eol
|
|
return os.linesep
|
|
|
|
@classmethod
|
|
def _get_first_line(cls, string_iterable):
|
|
if hasattr(string_iterable, 'tell'):
|
|
previous_position = string_iterable.tell()
|
|
|
|
try:
|
|
first_line = next(iter(string_iterable))
|
|
except StopIteration:
|
|
return ''
|
|
if hasattr(string_iterable, 'seek'):
|
|
string_iterable.seek(previous_position)
|
|
|
|
return first_line
|
|
|
|
@classmethod
|
|
def _detect_encoding(cls, path):
|
|
file_descriptor = open(path, 'rb')
|
|
first_chars = file_descriptor.read(BIGGER_BOM)
|
|
file_descriptor.close()
|
|
|
|
for bom, encoding in BOMS:
|
|
if first_chars.startswith(bom):
|
|
return encoding
|
|
|
|
# TODO: maybe a chardet integration
|
|
return cls.DEFAULT_ENCODING
|
|
|
|
@classmethod
|
|
def _open_unicode_file(cls, path, claimed_encoding=None):
|
|
encoding = claimed_encoding or cls._detect_encoding(path)
|
|
source_file = codecs.open(path, 'r', encoding=encoding)
|
|
|
|
# get rid of BOM if any
|
|
possible_bom = CODECS_BOMS.get(encoding, None)
|
|
if possible_bom:
|
|
file_bom = source_file.read(len(possible_bom))
|
|
if not file_bom == possible_bom:
|
|
source_file.seek(0) # if not rewind
|
|
return source_file, encoding
|
|
|
|
@classmethod
|
|
def _handle_error(cls, error, error_handling, index):
|
|
if error_handling == cls.ERROR_RAISE:
|
|
error.args = (index, ) + error.args
|
|
raise error
|
|
if error_handling == cls.ERROR_LOG:
|
|
name = type(error).__name__
|
|
sys.stderr.write('PySRT-%s(line %s): \n' % (name, index))
|
|
sys.stderr.write(error.args[0].encode('ascii', 'replace'))
|
|
sys.stderr.write('\n')
|