mirror of
https://github.com/Tautulli/Tautulli.git
synced 2025-01-06 11:09:57 -08:00
c1b8be0227
* Bump arrow from 1.2.3 to 1.3.0 Bumps [arrow](https://github.com/arrow-py/arrow) from 1.2.3 to 1.3.0. - [Release notes](https://github.com/arrow-py/arrow/releases) - [Changelog](https://github.com/arrow-py/arrow/blob/master/CHANGELOG.rst) - [Commits](https://github.com/arrow-py/arrow/compare/1.2.3...1.3.0) --- updated-dependencies: - dependency-name: arrow dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> * Update arrow==1.3.0 --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com> [skip ci]
772 lines
25 KiB
Python
772 lines
25 KiB
Python
"""Provides the :class:`Arrow <arrow.parser.DateTimeParser>` class, a better way to parse datetime strings."""
|
|
|
|
import re
|
|
import sys
|
|
from datetime import datetime, timedelta
|
|
from datetime import tzinfo as dt_tzinfo
|
|
from functools import lru_cache
|
|
from typing import (
|
|
Any,
|
|
ClassVar,
|
|
Dict,
|
|
Iterable,
|
|
List,
|
|
Match,
|
|
Optional,
|
|
Pattern,
|
|
SupportsFloat,
|
|
SupportsInt,
|
|
Tuple,
|
|
Union,
|
|
cast,
|
|
overload,
|
|
)
|
|
|
|
from dateutil import tz
|
|
|
|
from arrow import locales
|
|
from arrow.constants import DEFAULT_LOCALE
|
|
from arrow.util import next_weekday, normalize_timestamp
|
|
|
|
if sys.version_info < (3, 8): # pragma: no cover
|
|
from typing_extensions import Literal, TypedDict
|
|
else:
|
|
from typing import Literal, TypedDict # pragma: no cover
|
|
|
|
|
|
class ParserError(ValueError):
|
|
pass
|
|
|
|
|
|
# Allows for ParserErrors to be propagated from _build_datetime()
|
|
# when day_of_year errors occur.
|
|
# Before this, the ParserErrors were caught by the try/except in
|
|
# _parse_multiformat() and the appropriate error message was not
|
|
# transmitted to the user.
|
|
class ParserMatchError(ParserError):
|
|
pass
|
|
|
|
|
|
_WEEKDATE_ELEMENT = Union[str, bytes, SupportsInt, bytearray]
|
|
|
|
_FORMAT_TYPE = Literal[
|
|
"YYYY",
|
|
"YY",
|
|
"MM",
|
|
"M",
|
|
"DDDD",
|
|
"DDD",
|
|
"DD",
|
|
"D",
|
|
"HH",
|
|
"H",
|
|
"hh",
|
|
"h",
|
|
"mm",
|
|
"m",
|
|
"ss",
|
|
"s",
|
|
"X",
|
|
"x",
|
|
"ZZZ",
|
|
"ZZ",
|
|
"Z",
|
|
"S",
|
|
"W",
|
|
"MMMM",
|
|
"MMM",
|
|
"Do",
|
|
"dddd",
|
|
"ddd",
|
|
"d",
|
|
"a",
|
|
"A",
|
|
]
|
|
|
|
|
|
class _Parts(TypedDict, total=False):
|
|
year: int
|
|
month: int
|
|
day_of_year: int
|
|
day: int
|
|
hour: int
|
|
minute: int
|
|
second: int
|
|
microsecond: int
|
|
timestamp: float
|
|
expanded_timestamp: int
|
|
tzinfo: dt_tzinfo
|
|
am_pm: Literal["am", "pm"]
|
|
day_of_week: int
|
|
weekdate: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]]
|
|
|
|
|
|
class DateTimeParser:
|
|
_FORMAT_RE: ClassVar[Pattern[str]] = re.compile(
|
|
r"(YYY?Y?|MM?M?M?|Do|DD?D?D?|d?d?d?d|HH?|hh?|mm?|ss?|S+|ZZ?Z?|a|A|x|X|W)"
|
|
)
|
|
_ESCAPE_RE: ClassVar[Pattern[str]] = re.compile(r"\[[^\[\]]*\]")
|
|
|
|
_ONE_OR_TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,2}")
|
|
_ONE_OR_TWO_OR_THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{1,3}")
|
|
_ONE_OR_MORE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d+")
|
|
_TWO_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{2}")
|
|
_THREE_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{3}")
|
|
_FOUR_DIGIT_RE: ClassVar[Pattern[str]] = re.compile(r"\d{4}")
|
|
_TZ_Z_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:(\d{2}))?|Z")
|
|
_TZ_ZZ_RE: ClassVar[Pattern[str]] = re.compile(r"([\+\-])(\d{2})(?:\:(\d{2}))?|Z")
|
|
_TZ_NAME_RE: ClassVar[Pattern[str]] = re.compile(r"\w[\w+\-/]+")
|
|
# NOTE: timestamps cannot be parsed from natural language strings (by removing the ^...$) because it will
|
|
# break cases like "15 Jul 2000" and a format list (see issue #447)
|
|
_TIMESTAMP_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+\.?\d+$")
|
|
_TIMESTAMP_EXPANDED_RE: ClassVar[Pattern[str]] = re.compile(r"^\-?\d+$")
|
|
_TIME_RE: ClassVar[Pattern[str]] = re.compile(
|
|
r"^(\d{2})(?:\:?(\d{2}))?(?:\:?(\d{2}))?(?:([\.\,])(\d+))?$"
|
|
)
|
|
_WEEK_DATE_RE: ClassVar[Pattern[str]] = re.compile(
|
|
r"(?P<year>\d{4})[\-]?W(?P<week>\d{2})[\-]?(?P<day>\d)?"
|
|
)
|
|
|
|
_BASE_INPUT_RE_MAP: ClassVar[Dict[_FORMAT_TYPE, Pattern[str]]] = {
|
|
"YYYY": _FOUR_DIGIT_RE,
|
|
"YY": _TWO_DIGIT_RE,
|
|
"MM": _TWO_DIGIT_RE,
|
|
"M": _ONE_OR_TWO_DIGIT_RE,
|
|
"DDDD": _THREE_DIGIT_RE,
|
|
"DDD": _ONE_OR_TWO_OR_THREE_DIGIT_RE,
|
|
"DD": _TWO_DIGIT_RE,
|
|
"D": _ONE_OR_TWO_DIGIT_RE,
|
|
"HH": _TWO_DIGIT_RE,
|
|
"H": _ONE_OR_TWO_DIGIT_RE,
|
|
"hh": _TWO_DIGIT_RE,
|
|
"h": _ONE_OR_TWO_DIGIT_RE,
|
|
"mm": _TWO_DIGIT_RE,
|
|
"m": _ONE_OR_TWO_DIGIT_RE,
|
|
"ss": _TWO_DIGIT_RE,
|
|
"s": _ONE_OR_TWO_DIGIT_RE,
|
|
"X": _TIMESTAMP_RE,
|
|
"x": _TIMESTAMP_EXPANDED_RE,
|
|
"ZZZ": _TZ_NAME_RE,
|
|
"ZZ": _TZ_ZZ_RE,
|
|
"Z": _TZ_Z_RE,
|
|
"S": _ONE_OR_MORE_DIGIT_RE,
|
|
"W": _WEEK_DATE_RE,
|
|
}
|
|
|
|
SEPARATORS: ClassVar[List[str]] = ["-", "/", "."]
|
|
|
|
locale: locales.Locale
|
|
_input_re_map: Dict[_FORMAT_TYPE, Pattern[str]]
|
|
|
|
def __init__(self, locale: str = DEFAULT_LOCALE, cache_size: int = 0) -> None:
|
|
self.locale = locales.get_locale(locale)
|
|
self._input_re_map = self._BASE_INPUT_RE_MAP.copy()
|
|
self._input_re_map.update(
|
|
{
|
|
"MMMM": self._generate_choice_re(
|
|
self.locale.month_names[1:], re.IGNORECASE
|
|
),
|
|
"MMM": self._generate_choice_re(
|
|
self.locale.month_abbreviations[1:], re.IGNORECASE
|
|
),
|
|
"Do": re.compile(self.locale.ordinal_day_re),
|
|
"dddd": self._generate_choice_re(
|
|
self.locale.day_names[1:], re.IGNORECASE
|
|
),
|
|
"ddd": self._generate_choice_re(
|
|
self.locale.day_abbreviations[1:], re.IGNORECASE
|
|
),
|
|
"d": re.compile(r"[1-7]"),
|
|
"a": self._generate_choice_re(
|
|
(self.locale.meridians["am"], self.locale.meridians["pm"])
|
|
),
|
|
# note: 'A' token accepts both 'am/pm' and 'AM/PM' formats to
|
|
# ensure backwards compatibility of this token
|
|
"A": self._generate_choice_re(self.locale.meridians.values()),
|
|
}
|
|
)
|
|
if cache_size > 0:
|
|
self._generate_pattern_re = lru_cache(maxsize=cache_size)( # type: ignore
|
|
self._generate_pattern_re
|
|
)
|
|
|
|
# TODO: since we support more than ISO 8601, we should rename this function
|
|
# IDEA: break into multiple functions
|
|
def parse_iso(
|
|
self, datetime_string: str, normalize_whitespace: bool = False
|
|
) -> datetime:
|
|
if normalize_whitespace:
|
|
datetime_string = re.sub(r"\s+", " ", datetime_string.strip())
|
|
|
|
has_space_divider = " " in datetime_string
|
|
has_t_divider = "T" in datetime_string
|
|
|
|
num_spaces = datetime_string.count(" ")
|
|
if has_space_divider and num_spaces != 1 or has_t_divider and num_spaces > 0:
|
|
raise ParserError(
|
|
f"Expected an ISO 8601-like string, but was given {datetime_string!r}. "
|
|
"Try passing in a format string to resolve this."
|
|
)
|
|
|
|
has_time = has_space_divider or has_t_divider
|
|
has_tz = False
|
|
|
|
# date formats (ISO 8601 and others) to test against
|
|
# NOTE: YYYYMM is omitted to avoid confusion with YYMMDD (no longer part of ISO 8601, but is still often used)
|
|
formats = [
|
|
"YYYY-MM-DD",
|
|
"YYYY-M-DD",
|
|
"YYYY-M-D",
|
|
"YYYY/MM/DD",
|
|
"YYYY/M/DD",
|
|
"YYYY/M/D",
|
|
"YYYY.MM.DD",
|
|
"YYYY.M.DD",
|
|
"YYYY.M.D",
|
|
"YYYYMMDD",
|
|
"YYYY-DDDD",
|
|
"YYYYDDDD",
|
|
"YYYY-MM",
|
|
"YYYY/MM",
|
|
"YYYY.MM",
|
|
"YYYY",
|
|
"W",
|
|
]
|
|
|
|
if has_time:
|
|
if has_space_divider:
|
|
date_string, time_string = datetime_string.split(" ", 1)
|
|
else:
|
|
date_string, time_string = datetime_string.split("T", 1)
|
|
|
|
time_parts = re.split(
|
|
r"[\+\-Z]", time_string, maxsplit=1, flags=re.IGNORECASE
|
|
)
|
|
|
|
time_components: Optional[Match[str]] = self._TIME_RE.match(time_parts[0])
|
|
|
|
if time_components is None:
|
|
raise ParserError(
|
|
"Invalid time component provided. "
|
|
"Please specify a format or provide a valid time component in the basic or extended ISO 8601 time format."
|
|
)
|
|
|
|
(
|
|
hours,
|
|
minutes,
|
|
seconds,
|
|
subseconds_sep,
|
|
subseconds,
|
|
) = time_components.groups()
|
|
|
|
has_tz = len(time_parts) == 2
|
|
has_minutes = minutes is not None
|
|
has_seconds = seconds is not None
|
|
has_subseconds = subseconds is not None
|
|
|
|
is_basic_time_format = ":" not in time_parts[0]
|
|
tz_format = "Z"
|
|
|
|
# use 'ZZ' token instead since tz offset is present in non-basic format
|
|
if has_tz and ":" in time_parts[1]:
|
|
tz_format = "ZZ"
|
|
|
|
time_sep = "" if is_basic_time_format else ":"
|
|
|
|
if has_subseconds:
|
|
time_string = "HH{time_sep}mm{time_sep}ss{subseconds_sep}S".format(
|
|
time_sep=time_sep, subseconds_sep=subseconds_sep
|
|
)
|
|
elif has_seconds:
|
|
time_string = "HH{time_sep}mm{time_sep}ss".format(time_sep=time_sep)
|
|
elif has_minutes:
|
|
time_string = f"HH{time_sep}mm"
|
|
else:
|
|
time_string = "HH"
|
|
|
|
if has_space_divider:
|
|
formats = [f"{f} {time_string}" for f in formats]
|
|
else:
|
|
formats = [f"{f}T{time_string}" for f in formats]
|
|
|
|
if has_time and has_tz:
|
|
# Add "Z" or "ZZ" to the format strings to indicate to
|
|
# _parse_token() that a timezone needs to be parsed
|
|
formats = [f"{f}{tz_format}" for f in formats]
|
|
|
|
return self._parse_multiformat(datetime_string, formats)
|
|
|
|
def parse(
|
|
self,
|
|
datetime_string: str,
|
|
fmt: Union[List[str], str],
|
|
normalize_whitespace: bool = False,
|
|
) -> datetime:
|
|
if normalize_whitespace:
|
|
datetime_string = re.sub(r"\s+", " ", datetime_string)
|
|
|
|
if isinstance(fmt, list):
|
|
return self._parse_multiformat(datetime_string, fmt)
|
|
|
|
try:
|
|
fmt_tokens: List[_FORMAT_TYPE]
|
|
fmt_pattern_re: Pattern[str]
|
|
fmt_tokens, fmt_pattern_re = self._generate_pattern_re(fmt)
|
|
except re.error as e:
|
|
raise ParserMatchError(
|
|
f"Failed to generate regular expression pattern: {e}."
|
|
)
|
|
|
|
match = fmt_pattern_re.search(datetime_string)
|
|
|
|
if match is None:
|
|
raise ParserMatchError(
|
|
f"Failed to match {fmt!r} when parsing {datetime_string!r}."
|
|
)
|
|
|
|
parts: _Parts = {}
|
|
for token in fmt_tokens:
|
|
value: Union[Tuple[str, str, str], str]
|
|
if token == "Do":
|
|
value = match.group("value")
|
|
elif token == "W":
|
|
value = (match.group("year"), match.group("week"), match.group("day"))
|
|
else:
|
|
value = match.group(token)
|
|
|
|
if value is None:
|
|
raise ParserMatchError(
|
|
f"Unable to find a match group for the specified token {token!r}."
|
|
)
|
|
|
|
self._parse_token(token, value, parts) # type: ignore[arg-type]
|
|
|
|
return self._build_datetime(parts)
|
|
|
|
def _generate_pattern_re(self, fmt: str) -> Tuple[List[_FORMAT_TYPE], Pattern[str]]:
|
|
# fmt is a string of tokens like 'YYYY-MM-DD'
|
|
# we construct a new string by replacing each
|
|
# token by its pattern:
|
|
# 'YYYY-MM-DD' -> '(?P<YYYY>\d{4})-(?P<MM>\d{2})-(?P<DD>\d{2})'
|
|
tokens: List[_FORMAT_TYPE] = []
|
|
offset = 0
|
|
|
|
# Escape all special RegEx chars
|
|
escaped_fmt = re.escape(fmt)
|
|
|
|
# Extract the bracketed expressions to be reinserted later.
|
|
escaped_fmt = re.sub(self._ESCAPE_RE, "#", escaped_fmt)
|
|
|
|
# Any number of S is the same as one.
|
|
# TODO: allow users to specify the number of digits to parse
|
|
escaped_fmt = re.sub(r"S+", "S", escaped_fmt)
|
|
|
|
escaped_data = re.findall(self._ESCAPE_RE, fmt)
|
|
|
|
fmt_pattern = escaped_fmt
|
|
|
|
for m in self._FORMAT_RE.finditer(escaped_fmt):
|
|
token: _FORMAT_TYPE = cast(_FORMAT_TYPE, m.group(0))
|
|
try:
|
|
input_re = self._input_re_map[token]
|
|
except KeyError:
|
|
raise ParserError(f"Unrecognized token {token!r}.")
|
|
input_pattern = f"(?P<{token}>{input_re.pattern})"
|
|
tokens.append(token)
|
|
# a pattern doesn't have the same length as the token
|
|
# it replaces! We keep the difference in the offset variable.
|
|
# This works because the string is scanned left-to-right and matches
|
|
# are returned in the order found by finditer.
|
|
fmt_pattern = (
|
|
fmt_pattern[: m.start() + offset]
|
|
+ input_pattern
|
|
+ fmt_pattern[m.end() + offset :]
|
|
)
|
|
offset += len(input_pattern) - (m.end() - m.start())
|
|
|
|
final_fmt_pattern = ""
|
|
split_fmt = fmt_pattern.split(r"\#")
|
|
|
|
# Due to the way Python splits, 'split_fmt' will always be longer
|
|
for i in range(len(split_fmt)):
|
|
final_fmt_pattern += split_fmt[i]
|
|
if i < len(escaped_data):
|
|
final_fmt_pattern += escaped_data[i][1:-1]
|
|
|
|
# Wrap final_fmt_pattern in a custom word boundary to strictly
|
|
# match the formatting pattern and filter out date and time formats
|
|
# that include junk such as: blah1998-09-12 blah, blah 1998-09-12blah,
|
|
# blah1998-09-12blah. The custom word boundary matches every character
|
|
# that is not a whitespace character to allow for searching for a date
|
|
# and time string in a natural language sentence. Therefore, searching
|
|
# for a string of the form YYYY-MM-DD in "blah 1998-09-12 blah" will
|
|
# work properly.
|
|
# Certain punctuation before or after the target pattern such as
|
|
# "1998-09-12," is permitted. For the full list of valid punctuation,
|
|
# see the documentation.
|
|
|
|
starting_word_boundary = (
|
|
r"(?<!\S\S)" # Don't have two consecutive non-whitespace characters. This ensures that we allow cases
|
|
# like .11.25.2019 but not 1.11.25.2019 (for pattern MM.DD.YYYY)
|
|
r"(?<![^\,\.\;\:\?\!\"\'\`\[\]\{\}\(\)<>\s])" # This is the list of punctuation that is ok before the
|
|
# pattern (i.e. "It can't not be these characters before the pattern")
|
|
r"(\b|^)"
|
|
# The \b is to block cases like 1201912 but allow 201912 for pattern YYYYMM. The ^ was necessary to allow a
|
|
# negative number through i.e. before epoch numbers
|
|
)
|
|
ending_word_boundary = (
|
|
r"(?=[\,\.\;\:\?\!\"\'\`\[\]\{\}\(\)\<\>]?" # Positive lookahead stating that these punctuation marks
|
|
# can appear after the pattern at most 1 time
|
|
r"(?!\S))" # Don't allow any non-whitespace character after the punctuation
|
|
)
|
|
bounded_fmt_pattern = r"{}{}{}".format(
|
|
starting_word_boundary, final_fmt_pattern, ending_word_boundary
|
|
)
|
|
|
|
return tokens, re.compile(bounded_fmt_pattern, flags=re.IGNORECASE)
|
|
|
|
@overload
|
|
def _parse_token(
|
|
self,
|
|
token: Literal[
|
|
"YYYY",
|
|
"YY",
|
|
"MM",
|
|
"M",
|
|
"DDDD",
|
|
"DDD",
|
|
"DD",
|
|
"D",
|
|
"Do",
|
|
"HH",
|
|
"hh",
|
|
"h",
|
|
"H",
|
|
"mm",
|
|
"m",
|
|
"ss",
|
|
"s",
|
|
"x",
|
|
],
|
|
value: Union[str, bytes, SupportsInt, bytearray],
|
|
parts: _Parts,
|
|
) -> None:
|
|
... # pragma: no cover
|
|
|
|
@overload
|
|
def _parse_token(
|
|
self,
|
|
token: Literal["X"],
|
|
value: Union[str, bytes, SupportsFloat, bytearray],
|
|
parts: _Parts,
|
|
) -> None:
|
|
... # pragma: no cover
|
|
|
|
@overload
|
|
def _parse_token(
|
|
self,
|
|
token: Literal["MMMM", "MMM", "dddd", "ddd", "S"],
|
|
value: Union[str, bytes, bytearray],
|
|
parts: _Parts,
|
|
) -> None:
|
|
... # pragma: no cover
|
|
|
|
@overload
|
|
def _parse_token(
|
|
self,
|
|
token: Literal["a", "A", "ZZZ", "ZZ", "Z"],
|
|
value: Union[str, bytes],
|
|
parts: _Parts,
|
|
) -> None:
|
|
... # pragma: no cover
|
|
|
|
@overload
|
|
def _parse_token(
|
|
self,
|
|
token: Literal["W"],
|
|
value: Tuple[_WEEKDATE_ELEMENT, _WEEKDATE_ELEMENT, Optional[_WEEKDATE_ELEMENT]],
|
|
parts: _Parts,
|
|
) -> None:
|
|
... # pragma: no cover
|
|
|
|
def _parse_token(
|
|
self,
|
|
token: Any,
|
|
value: Any,
|
|
parts: _Parts,
|
|
) -> None:
|
|
if token == "YYYY":
|
|
parts["year"] = int(value)
|
|
|
|
elif token == "YY":
|
|
value = int(value)
|
|
parts["year"] = 1900 + value if value > 68 else 2000 + value
|
|
|
|
elif token in ["MMMM", "MMM"]:
|
|
# FIXME: month_number() is nullable
|
|
parts["month"] = self.locale.month_number(value.lower()) # type: ignore[typeddict-item]
|
|
|
|
elif token in ["MM", "M"]:
|
|
parts["month"] = int(value)
|
|
|
|
elif token in ["DDDD", "DDD"]:
|
|
parts["day_of_year"] = int(value)
|
|
|
|
elif token in ["DD", "D"]:
|
|
parts["day"] = int(value)
|
|
|
|
elif token == "Do":
|
|
parts["day"] = int(value)
|
|
|
|
elif token == "dddd":
|
|
# locale day names are 1-indexed
|
|
day_of_week = [x.lower() for x in self.locale.day_names].index(
|
|
value.lower()
|
|
)
|
|
parts["day_of_week"] = day_of_week - 1
|
|
|
|
elif token == "ddd":
|
|
# locale day abbreviations are 1-indexed
|
|
day_of_week = [x.lower() for x in self.locale.day_abbreviations].index(
|
|
value.lower()
|
|
)
|
|
parts["day_of_week"] = day_of_week - 1
|
|
|
|
elif token.upper() in ["HH", "H"]:
|
|
parts["hour"] = int(value)
|
|
|
|
elif token in ["mm", "m"]:
|
|
parts["minute"] = int(value)
|
|
|
|
elif token in ["ss", "s"]:
|
|
parts["second"] = int(value)
|
|
|
|
elif token == "S":
|
|
# We have the *most significant* digits of an arbitrary-precision integer.
|
|
# We want the six most significant digits as an integer, rounded.
|
|
# IDEA: add nanosecond support somehow? Need datetime support for it first.
|
|
value = value.ljust(7, "0")
|
|
|
|
# floating-point (IEEE-754) defaults to half-to-even rounding
|
|
seventh_digit = int(value[6])
|
|
if seventh_digit == 5:
|
|
rounding = int(value[5]) % 2
|
|
elif seventh_digit > 5:
|
|
rounding = 1
|
|
else:
|
|
rounding = 0
|
|
|
|
parts["microsecond"] = int(value[:6]) + rounding
|
|
|
|
elif token == "X":
|
|
parts["timestamp"] = float(value)
|
|
|
|
elif token == "x":
|
|
parts["expanded_timestamp"] = int(value)
|
|
|
|
elif token in ["ZZZ", "ZZ", "Z"]:
|
|
parts["tzinfo"] = TzinfoParser.parse(value)
|
|
|
|
elif token in ["a", "A"]:
|
|
if value in (self.locale.meridians["am"], self.locale.meridians["AM"]):
|
|
parts["am_pm"] = "am"
|
|
if "hour" in parts and not 0 <= parts["hour"] <= 12:
|
|
raise ParserMatchError(
|
|
f"Hour token value must be between 0 and 12 inclusive for token {token!r}."
|
|
)
|
|
elif value in (self.locale.meridians["pm"], self.locale.meridians["PM"]):
|
|
parts["am_pm"] = "pm"
|
|
elif token == "W":
|
|
parts["weekdate"] = value
|
|
|
|
@staticmethod
|
|
def _build_datetime(parts: _Parts) -> datetime:
|
|
weekdate = parts.get("weekdate")
|
|
|
|
if weekdate is not None:
|
|
year, week = int(weekdate[0]), int(weekdate[1])
|
|
|
|
if weekdate[2] is not None:
|
|
_day = int(weekdate[2])
|
|
else:
|
|
# day not given, default to 1
|
|
_day = 1
|
|
|
|
date_string = f"{year}-{week}-{_day}"
|
|
|
|
# tokens for ISO 8601 weekdates
|
|
dt = datetime.strptime(date_string, "%G-%V-%u")
|
|
|
|
parts["year"] = dt.year
|
|
parts["month"] = dt.month
|
|
parts["day"] = dt.day
|
|
|
|
timestamp = parts.get("timestamp")
|
|
|
|
if timestamp is not None:
|
|
return datetime.fromtimestamp(timestamp, tz=tz.tzutc())
|
|
|
|
expanded_timestamp = parts.get("expanded_timestamp")
|
|
|
|
if expanded_timestamp is not None:
|
|
return datetime.fromtimestamp(
|
|
normalize_timestamp(expanded_timestamp),
|
|
tz=tz.tzutc(),
|
|
)
|
|
|
|
day_of_year = parts.get("day_of_year")
|
|
|
|
if day_of_year is not None:
|
|
_year = parts.get("year")
|
|
month = parts.get("month")
|
|
if _year is None:
|
|
raise ParserError(
|
|
"Year component is required with the DDD and DDDD tokens."
|
|
)
|
|
|
|
if month is not None:
|
|
raise ParserError(
|
|
"Month component is not allowed with the DDD and DDDD tokens."
|
|
)
|
|
|
|
date_string = f"{_year}-{day_of_year}"
|
|
try:
|
|
dt = datetime.strptime(date_string, "%Y-%j")
|
|
except ValueError:
|
|
raise ParserError(
|
|
f"The provided day of year {day_of_year!r} is invalid."
|
|
)
|
|
|
|
parts["year"] = dt.year
|
|
parts["month"] = dt.month
|
|
parts["day"] = dt.day
|
|
|
|
day_of_week: Optional[int] = parts.get("day_of_week")
|
|
day = parts.get("day")
|
|
|
|
# If day is passed, ignore day of week
|
|
if day_of_week is not None and day is None:
|
|
year = parts.get("year", 1970)
|
|
month = parts.get("month", 1)
|
|
day = 1
|
|
|
|
# dddd => first day of week after epoch
|
|
# dddd YYYY => first day of week in specified year
|
|
# dddd MM YYYY => first day of week in specified year and month
|
|
# dddd MM => first day after epoch in specified month
|
|
next_weekday_dt = next_weekday(datetime(year, month, day), day_of_week)
|
|
parts["year"] = next_weekday_dt.year
|
|
parts["month"] = next_weekday_dt.month
|
|
parts["day"] = next_weekday_dt.day
|
|
|
|
am_pm = parts.get("am_pm")
|
|
hour = parts.get("hour", 0)
|
|
|
|
if am_pm == "pm" and hour < 12:
|
|
hour += 12
|
|
elif am_pm == "am" and hour == 12:
|
|
hour = 0
|
|
|
|
# Support for midnight at the end of day
|
|
if hour == 24:
|
|
if parts.get("minute", 0) != 0:
|
|
raise ParserError("Midnight at the end of day must not contain minutes")
|
|
if parts.get("second", 0) != 0:
|
|
raise ParserError("Midnight at the end of day must not contain seconds")
|
|
if parts.get("microsecond", 0) != 0:
|
|
raise ParserError(
|
|
"Midnight at the end of day must not contain microseconds"
|
|
)
|
|
hour = 0
|
|
day_increment = 1
|
|
else:
|
|
day_increment = 0
|
|
|
|
# account for rounding up to 1000000
|
|
microsecond = parts.get("microsecond", 0)
|
|
if microsecond == 1000000:
|
|
microsecond = 0
|
|
second_increment = 1
|
|
else:
|
|
second_increment = 0
|
|
|
|
increment = timedelta(days=day_increment, seconds=second_increment)
|
|
|
|
return (
|
|
datetime(
|
|
year=parts.get("year", 1),
|
|
month=parts.get("month", 1),
|
|
day=parts.get("day", 1),
|
|
hour=hour,
|
|
minute=parts.get("minute", 0),
|
|
second=parts.get("second", 0),
|
|
microsecond=microsecond,
|
|
tzinfo=parts.get("tzinfo"),
|
|
)
|
|
+ increment
|
|
)
|
|
|
|
def _parse_multiformat(self, string: str, formats: Iterable[str]) -> datetime:
|
|
_datetime: Optional[datetime] = None
|
|
|
|
for fmt in formats:
|
|
try:
|
|
_datetime = self.parse(string, fmt)
|
|
break
|
|
except ParserMatchError:
|
|
pass
|
|
|
|
if _datetime is None:
|
|
supported_formats = ", ".join(formats)
|
|
raise ParserError(
|
|
f"Could not match input {string!r} to any of the following formats: {supported_formats}."
|
|
)
|
|
|
|
return _datetime
|
|
|
|
# generates a capture group of choices separated by an OR operator
|
|
@staticmethod
|
|
def _generate_choice_re(
|
|
choices: Iterable[str], flags: Union[int, re.RegexFlag] = 0
|
|
) -> Pattern[str]:
|
|
return re.compile(r"({})".format("|".join(choices)), flags=flags)
|
|
|
|
|
|
class TzinfoParser:
|
|
_TZINFO_RE: ClassVar[Pattern[str]] = re.compile(
|
|
r"^(?:\(UTC)*([\+\-])?(\d{2})(?:\:?(\d{2}))?"
|
|
)
|
|
|
|
@classmethod
|
|
def parse(cls, tzinfo_string: str) -> dt_tzinfo:
|
|
tzinfo: Optional[dt_tzinfo] = None
|
|
|
|
if tzinfo_string == "local":
|
|
tzinfo = tz.tzlocal()
|
|
|
|
elif tzinfo_string in ["utc", "UTC", "Z"]:
|
|
tzinfo = tz.tzutc()
|
|
|
|
else:
|
|
iso_match = cls._TZINFO_RE.match(tzinfo_string)
|
|
|
|
if iso_match:
|
|
sign: Optional[str]
|
|
hours: str
|
|
minutes: Union[str, int, None]
|
|
sign, hours, minutes = iso_match.groups()
|
|
seconds = int(hours) * 3600 + int(minutes or 0) * 60
|
|
|
|
if sign == "-":
|
|
seconds *= -1
|
|
|
|
tzinfo = tz.tzoffset(None, seconds)
|
|
|
|
else:
|
|
tzinfo = tz.gettz(tzinfo_string)
|
|
|
|
if tzinfo is None:
|
|
raise ParserError(f"Could not parse timezone expression {tzinfo_string!r}.")
|
|
|
|
return tzinfo
|