mirror of
https://github.com/clinton-hall/nzbToMedia.git
synced 2025-01-09 04:23:16 -08:00
4348 lines
130 KiB
Python
4348 lines
130 KiB
Python
import warnings
|
|
|
|
from collections import Counter, defaultdict, deque, abc
|
|
from collections.abc import Sequence
|
|
from functools import partial, reduce, wraps
|
|
from heapq import heapify, heapreplace, heappop
|
|
from itertools import (
|
|
chain,
|
|
compress,
|
|
count,
|
|
cycle,
|
|
dropwhile,
|
|
groupby,
|
|
islice,
|
|
repeat,
|
|
starmap,
|
|
takewhile,
|
|
tee,
|
|
zip_longest,
|
|
)
|
|
from math import exp, factorial, floor, log
|
|
from queue import Empty, Queue
|
|
from random import random, randrange, uniform
|
|
from operator import itemgetter, mul, sub, gt, lt, ge, le
|
|
from sys import hexversion, maxsize
|
|
from time import monotonic
|
|
|
|
from .recipes import (
|
|
_marker,
|
|
_zip_equal,
|
|
UnequalIterablesError,
|
|
consume,
|
|
flatten,
|
|
pairwise,
|
|
powerset,
|
|
take,
|
|
unique_everseen,
|
|
all_equal,
|
|
)
|
|
|
|
__all__ = [
|
|
'AbortThread',
|
|
'SequenceView',
|
|
'UnequalIterablesError',
|
|
'adjacent',
|
|
'all_unique',
|
|
'always_iterable',
|
|
'always_reversible',
|
|
'bucket',
|
|
'callback_iter',
|
|
'chunked',
|
|
'chunked_even',
|
|
'circular_shifts',
|
|
'collapse',
|
|
'combination_index',
|
|
'consecutive_groups',
|
|
'constrained_batches',
|
|
'consumer',
|
|
'count_cycle',
|
|
'countable',
|
|
'difference',
|
|
'distinct_combinations',
|
|
'distinct_permutations',
|
|
'distribute',
|
|
'divide',
|
|
'duplicates_everseen',
|
|
'duplicates_justseen',
|
|
'exactly_n',
|
|
'filter_except',
|
|
'first',
|
|
'groupby_transform',
|
|
'ichunked',
|
|
'iequals',
|
|
'ilen',
|
|
'interleave',
|
|
'interleave_evenly',
|
|
'interleave_longest',
|
|
'intersperse',
|
|
'is_sorted',
|
|
'islice_extended',
|
|
'iterate',
|
|
'last',
|
|
'locate',
|
|
'longest_common_prefix',
|
|
'lstrip',
|
|
'make_decorator',
|
|
'map_except',
|
|
'map_if',
|
|
'map_reduce',
|
|
'mark_ends',
|
|
'minmax',
|
|
'nth_or_last',
|
|
'nth_permutation',
|
|
'nth_product',
|
|
'numeric_range',
|
|
'one',
|
|
'only',
|
|
'padded',
|
|
'partitions',
|
|
'peekable',
|
|
'permutation_index',
|
|
'product_index',
|
|
'raise_',
|
|
'repeat_each',
|
|
'repeat_last',
|
|
'replace',
|
|
'rlocate',
|
|
'rstrip',
|
|
'run_length',
|
|
'sample',
|
|
'seekable',
|
|
'set_partitions',
|
|
'side_effect',
|
|
'sliced',
|
|
'sort_together',
|
|
'split_after',
|
|
'split_at',
|
|
'split_before',
|
|
'split_into',
|
|
'split_when',
|
|
'spy',
|
|
'stagger',
|
|
'strip',
|
|
'strictly_n',
|
|
'substrings',
|
|
'substrings_indexes',
|
|
'time_limited',
|
|
'unique_in_window',
|
|
'unique_to_each',
|
|
'unzip',
|
|
'value_chain',
|
|
'windowed',
|
|
'windowed_complete',
|
|
'with_iter',
|
|
'zip_broadcast',
|
|
'zip_equal',
|
|
'zip_offset',
|
|
]
|
|
|
|
|
|
def chunked(iterable, n, strict=False):
|
|
"""Break *iterable* into lists of length *n*:
|
|
|
|
>>> list(chunked([1, 2, 3, 4, 5, 6], 3))
|
|
[[1, 2, 3], [4, 5, 6]]
|
|
|
|
By the default, the last yielded list will have fewer than *n* elements
|
|
if the length of *iterable* is not divisible by *n*:
|
|
|
|
>>> list(chunked([1, 2, 3, 4, 5, 6, 7, 8], 3))
|
|
[[1, 2, 3], [4, 5, 6], [7, 8]]
|
|
|
|
To use a fill-in value instead, see the :func:`grouper` recipe.
|
|
|
|
If the length of *iterable* is not divisible by *n* and *strict* is
|
|
``True``, then ``ValueError`` will be raised before the last
|
|
list is yielded.
|
|
|
|
"""
|
|
iterator = iter(partial(take, n, iter(iterable)), [])
|
|
if strict:
|
|
if n is None:
|
|
raise ValueError('n must not be None when using strict mode.')
|
|
|
|
def ret():
|
|
for chunk in iterator:
|
|
if len(chunk) != n:
|
|
raise ValueError('iterable is not divisible by n.')
|
|
yield chunk
|
|
|
|
return iter(ret())
|
|
else:
|
|
return iterator
|
|
|
|
|
|
def first(iterable, default=_marker):
|
|
"""Return the first item of *iterable*, or *default* if *iterable* is
|
|
empty.
|
|
|
|
>>> first([0, 1, 2, 3])
|
|
0
|
|
>>> first([], 'some default')
|
|
'some default'
|
|
|
|
If *default* is not provided and there are no items in the iterable,
|
|
raise ``ValueError``.
|
|
|
|
:func:`first` is useful when you have a generator of expensive-to-retrieve
|
|
values and want any arbitrary one. It is marginally shorter than
|
|
``next(iter(iterable), default)``.
|
|
|
|
"""
|
|
try:
|
|
return next(iter(iterable))
|
|
except StopIteration as e:
|
|
if default is _marker:
|
|
raise ValueError(
|
|
'first() was called on an empty iterable, and no '
|
|
'default value was provided.'
|
|
) from e
|
|
return default
|
|
|
|
|
|
def last(iterable, default=_marker):
|
|
"""Return the last item of *iterable*, or *default* if *iterable* is
|
|
empty.
|
|
|
|
>>> last([0, 1, 2, 3])
|
|
3
|
|
>>> last([], 'some default')
|
|
'some default'
|
|
|
|
If *default* is not provided and there are no items in the iterable,
|
|
raise ``ValueError``.
|
|
"""
|
|
try:
|
|
if isinstance(iterable, Sequence):
|
|
return iterable[-1]
|
|
# Work around https://bugs.python.org/issue38525
|
|
elif hasattr(iterable, '__reversed__') and (hexversion != 0x030800F0):
|
|
return next(reversed(iterable))
|
|
else:
|
|
return deque(iterable, maxlen=1)[-1]
|
|
except (IndexError, TypeError, StopIteration):
|
|
if default is _marker:
|
|
raise ValueError(
|
|
'last() was called on an empty iterable, and no default was '
|
|
'provided.'
|
|
)
|
|
return default
|
|
|
|
|
|
def nth_or_last(iterable, n, default=_marker):
|
|
"""Return the nth or the last item of *iterable*,
|
|
or *default* if *iterable* is empty.
|
|
|
|
>>> nth_or_last([0, 1, 2, 3], 2)
|
|
2
|
|
>>> nth_or_last([0, 1], 2)
|
|
1
|
|
>>> nth_or_last([], 0, 'some default')
|
|
'some default'
|
|
|
|
If *default* is not provided and there are no items in the iterable,
|
|
raise ``ValueError``.
|
|
"""
|
|
return last(islice(iterable, n + 1), default=default)
|
|
|
|
|
|
class peekable:
|
|
"""Wrap an iterator to allow lookahead and prepending elements.
|
|
|
|
Call :meth:`peek` on the result to get the value that will be returned
|
|
by :func:`next`. This won't advance the iterator:
|
|
|
|
>>> p = peekable(['a', 'b'])
|
|
>>> p.peek()
|
|
'a'
|
|
>>> next(p)
|
|
'a'
|
|
|
|
Pass :meth:`peek` a default value to return that instead of raising
|
|
``StopIteration`` when the iterator is exhausted.
|
|
|
|
>>> p = peekable([])
|
|
>>> p.peek('hi')
|
|
'hi'
|
|
|
|
peekables also offer a :meth:`prepend` method, which "inserts" items
|
|
at the head of the iterable:
|
|
|
|
>>> p = peekable([1, 2, 3])
|
|
>>> p.prepend(10, 11, 12)
|
|
>>> next(p)
|
|
10
|
|
>>> p.peek()
|
|
11
|
|
>>> list(p)
|
|
[11, 12, 1, 2, 3]
|
|
|
|
peekables can be indexed. Index 0 is the item that will be returned by
|
|
:func:`next`, index 1 is the item after that, and so on:
|
|
The values up to the given index will be cached.
|
|
|
|
>>> p = peekable(['a', 'b', 'c', 'd'])
|
|
>>> p[0]
|
|
'a'
|
|
>>> p[1]
|
|
'b'
|
|
>>> next(p)
|
|
'a'
|
|
|
|
Negative indexes are supported, but be aware that they will cache the
|
|
remaining items in the source iterator, which may require significant
|
|
storage.
|
|
|
|
To check whether a peekable is exhausted, check its truth value:
|
|
|
|
>>> p = peekable(['a', 'b'])
|
|
>>> if p: # peekable has items
|
|
... list(p)
|
|
['a', 'b']
|
|
>>> if not p: # peekable is exhausted
|
|
... list(p)
|
|
[]
|
|
|
|
"""
|
|
|
|
def __init__(self, iterable):
|
|
self._it = iter(iterable)
|
|
self._cache = deque()
|
|
|
|
def __iter__(self):
|
|
return self
|
|
|
|
def __bool__(self):
|
|
try:
|
|
self.peek()
|
|
except StopIteration:
|
|
return False
|
|
return True
|
|
|
|
def peek(self, default=_marker):
|
|
"""Return the item that will be next returned from ``next()``.
|
|
|
|
Return ``default`` if there are no items left. If ``default`` is not
|
|
provided, raise ``StopIteration``.
|
|
|
|
"""
|
|
if not self._cache:
|
|
try:
|
|
self._cache.append(next(self._it))
|
|
except StopIteration:
|
|
if default is _marker:
|
|
raise
|
|
return default
|
|
return self._cache[0]
|
|
|
|
def prepend(self, *items):
|
|
"""Stack up items to be the next ones returned from ``next()`` or
|
|
``self.peek()``. The items will be returned in
|
|
first in, first out order::
|
|
|
|
>>> p = peekable([1, 2, 3])
|
|
>>> p.prepend(10, 11, 12)
|
|
>>> next(p)
|
|
10
|
|
>>> list(p)
|
|
[11, 12, 1, 2, 3]
|
|
|
|
It is possible, by prepending items, to "resurrect" a peekable that
|
|
previously raised ``StopIteration``.
|
|
|
|
>>> p = peekable([])
|
|
>>> next(p)
|
|
Traceback (most recent call last):
|
|
...
|
|
StopIteration
|
|
>>> p.prepend(1)
|
|
>>> next(p)
|
|
1
|
|
>>> next(p)
|
|
Traceback (most recent call last):
|
|
...
|
|
StopIteration
|
|
|
|
"""
|
|
self._cache.extendleft(reversed(items))
|
|
|
|
def __next__(self):
|
|
if self._cache:
|
|
return self._cache.popleft()
|
|
|
|
return next(self._it)
|
|
|
|
def _get_slice(self, index):
|
|
# Normalize the slice's arguments
|
|
step = 1 if (index.step is None) else index.step
|
|
if step > 0:
|
|
start = 0 if (index.start is None) else index.start
|
|
stop = maxsize if (index.stop is None) else index.stop
|
|
elif step < 0:
|
|
start = -1 if (index.start is None) else index.start
|
|
stop = (-maxsize - 1) if (index.stop is None) else index.stop
|
|
else:
|
|
raise ValueError('slice step cannot be zero')
|
|
|
|
# If either the start or stop index is negative, we'll need to cache
|
|
# the rest of the iterable in order to slice from the right side.
|
|
if (start < 0) or (stop < 0):
|
|
self._cache.extend(self._it)
|
|
# Otherwise we'll need to find the rightmost index and cache to that
|
|
# point.
|
|
else:
|
|
n = min(max(start, stop) + 1, maxsize)
|
|
cache_len = len(self._cache)
|
|
if n >= cache_len:
|
|
self._cache.extend(islice(self._it, n - cache_len))
|
|
|
|
return list(self._cache)[index]
|
|
|
|
def __getitem__(self, index):
|
|
if isinstance(index, slice):
|
|
return self._get_slice(index)
|
|
|
|
cache_len = len(self._cache)
|
|
if index < 0:
|
|
self._cache.extend(self._it)
|
|
elif index >= cache_len:
|
|
self._cache.extend(islice(self._it, index + 1 - cache_len))
|
|
|
|
return self._cache[index]
|
|
|
|
|
|
def consumer(func):
|
|
"""Decorator that automatically advances a PEP-342-style "reverse iterator"
|
|
to its first yield point so you don't have to call ``next()`` on it
|
|
manually.
|
|
|
|
>>> @consumer
|
|
... def tally():
|
|
... i = 0
|
|
... while True:
|
|
... print('Thing number %s is %s.' % (i, (yield)))
|
|
... i += 1
|
|
...
|
|
>>> t = tally()
|
|
>>> t.send('red')
|
|
Thing number 0 is red.
|
|
>>> t.send('fish')
|
|
Thing number 1 is fish.
|
|
|
|
Without the decorator, you would have to call ``next(t)`` before
|
|
``t.send()`` could be used.
|
|
|
|
"""
|
|
|
|
@wraps(func)
|
|
def wrapper(*args, **kwargs):
|
|
gen = func(*args, **kwargs)
|
|
next(gen)
|
|
return gen
|
|
|
|
return wrapper
|
|
|
|
|
|
def ilen(iterable):
|
|
"""Return the number of items in *iterable*.
|
|
|
|
>>> ilen(x for x in range(1000000) if x % 3 == 0)
|
|
333334
|
|
|
|
This consumes the iterable, so handle with care.
|
|
|
|
"""
|
|
# This approach was selected because benchmarks showed it's likely the
|
|
# fastest of the known implementations at the time of writing.
|
|
# See GitHub tracker: #236, #230.
|
|
counter = count()
|
|
deque(zip(iterable, counter), maxlen=0)
|
|
return next(counter)
|
|
|
|
|
|
def iterate(func, start):
|
|
"""Return ``start``, ``func(start)``, ``func(func(start))``, ...
|
|
|
|
>>> from itertools import islice
|
|
>>> list(islice(iterate(lambda x: 2*x, 1), 10))
|
|
[1, 2, 4, 8, 16, 32, 64, 128, 256, 512]
|
|
|
|
"""
|
|
while True:
|
|
yield start
|
|
start = func(start)
|
|
|
|
|
|
def with_iter(context_manager):
|
|
"""Wrap an iterable in a ``with`` statement, so it closes once exhausted.
|
|
|
|
For example, this will close the file when the iterator is exhausted::
|
|
|
|
upper_lines = (line.upper() for line in with_iter(open('foo')))
|
|
|
|
Any context manager which returns an iterable is a candidate for
|
|
``with_iter``.
|
|
|
|
"""
|
|
with context_manager as iterable:
|
|
yield from iterable
|
|
|
|
|
|
def one(iterable, too_short=None, too_long=None):
|
|
"""Return the first item from *iterable*, which is expected to contain only
|
|
that item. Raise an exception if *iterable* is empty or has more than one
|
|
item.
|
|
|
|
:func:`one` is useful for ensuring that an iterable contains only one item.
|
|
For example, it can be used to retrieve the result of a database query
|
|
that is expected to return a single row.
|
|
|
|
If *iterable* is empty, ``ValueError`` will be raised. You may specify a
|
|
different exception with the *too_short* keyword:
|
|
|
|
>>> it = []
|
|
>>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL
|
|
Traceback (most recent call last):
|
|
...
|
|
ValueError: too many items in iterable (expected 1)'
|
|
>>> too_short = IndexError('too few items')
|
|
>>> one(it, too_short=too_short) # doctest: +IGNORE_EXCEPTION_DETAIL
|
|
Traceback (most recent call last):
|
|
...
|
|
IndexError: too few items
|
|
|
|
Similarly, if *iterable* contains more than one item, ``ValueError`` will
|
|
be raised. You may specify a different exception with the *too_long*
|
|
keyword:
|
|
|
|
>>> it = ['too', 'many']
|
|
>>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL
|
|
Traceback (most recent call last):
|
|
...
|
|
ValueError: Expected exactly one item in iterable, but got 'too',
|
|
'many', and perhaps more.
|
|
>>> too_long = RuntimeError
|
|
>>> one(it, too_long=too_long) # doctest: +IGNORE_EXCEPTION_DETAIL
|
|
Traceback (most recent call last):
|
|
...
|
|
RuntimeError
|
|
|
|
Note that :func:`one` attempts to advance *iterable* twice to ensure there
|
|
is only one item. See :func:`spy` or :func:`peekable` to check iterable
|
|
contents less destructively.
|
|
|
|
"""
|
|
it = iter(iterable)
|
|
|
|
try:
|
|
first_value = next(it)
|
|
except StopIteration as e:
|
|
raise (
|
|
too_short or ValueError('too few items in iterable (expected 1)')
|
|
) from e
|
|
|
|
try:
|
|
second_value = next(it)
|
|
except StopIteration:
|
|
pass
|
|
else:
|
|
msg = (
|
|
'Expected exactly one item in iterable, but got {!r}, {!r}, '
|
|
'and perhaps more.'.format(first_value, second_value)
|
|
)
|
|
raise too_long or ValueError(msg)
|
|
|
|
return first_value
|
|
|
|
|
|
def raise_(exception, *args):
|
|
raise exception(*args)
|
|
|
|
|
|
def strictly_n(iterable, n, too_short=None, too_long=None):
|
|
"""Validate that *iterable* has exactly *n* items and return them if
|
|
it does. If it has fewer than *n* items, call function *too_short*
|
|
with those items. If it has more than *n* items, call function
|
|
*too_long* with the first ``n + 1`` items.
|
|
|
|
>>> iterable = ['a', 'b', 'c', 'd']
|
|
>>> n = 4
|
|
>>> list(strictly_n(iterable, n))
|
|
['a', 'b', 'c', 'd']
|
|
|
|
By default, *too_short* and *too_long* are functions that raise
|
|
``ValueError``.
|
|
|
|
>>> list(strictly_n('ab', 3)) # doctest: +IGNORE_EXCEPTION_DETAIL
|
|
Traceback (most recent call last):
|
|
...
|
|
ValueError: too few items in iterable (got 2)
|
|
|
|
>>> list(strictly_n('abc', 2)) # doctest: +IGNORE_EXCEPTION_DETAIL
|
|
Traceback (most recent call last):
|
|
...
|
|
ValueError: too many items in iterable (got at least 3)
|
|
|
|
You can instead supply functions that do something else.
|
|
*too_short* will be called with the number of items in *iterable*.
|
|
*too_long* will be called with `n + 1`.
|
|
|
|
>>> def too_short(item_count):
|
|
... raise RuntimeError
|
|
>>> it = strictly_n('abcd', 6, too_short=too_short)
|
|
>>> list(it) # doctest: +IGNORE_EXCEPTION_DETAIL
|
|
Traceback (most recent call last):
|
|
...
|
|
RuntimeError
|
|
|
|
>>> def too_long(item_count):
|
|
... print('The boss is going to hear about this')
|
|
>>> it = strictly_n('abcdef', 4, too_long=too_long)
|
|
>>> list(it)
|
|
The boss is going to hear about this
|
|
['a', 'b', 'c', 'd']
|
|
|
|
"""
|
|
if too_short is None:
|
|
too_short = lambda item_count: raise_(
|
|
ValueError,
|
|
'Too few items in iterable (got {})'.format(item_count),
|
|
)
|
|
|
|
if too_long is None:
|
|
too_long = lambda item_count: raise_(
|
|
ValueError,
|
|
'Too many items in iterable (got at least {})'.format(item_count),
|
|
)
|
|
|
|
it = iter(iterable)
|
|
for i in range(n):
|
|
try:
|
|
item = next(it)
|
|
except StopIteration:
|
|
too_short(i)
|
|
return
|
|
else:
|
|
yield item
|
|
|
|
try:
|
|
next(it)
|
|
except StopIteration:
|
|
pass
|
|
else:
|
|
too_long(n + 1)
|
|
|
|
|
|
def distinct_permutations(iterable, r=None):
|
|
"""Yield successive distinct permutations of the elements in *iterable*.
|
|
|
|
>>> sorted(distinct_permutations([1, 0, 1]))
|
|
[(0, 1, 1), (1, 0, 1), (1, 1, 0)]
|
|
|
|
Equivalent to ``set(permutations(iterable))``, except duplicates are not
|
|
generated and thrown away. For larger input sequences this is much more
|
|
efficient.
|
|
|
|
Duplicate permutations arise when there are duplicated elements in the
|
|
input iterable. The number of items returned is
|
|
`n! / (x_1! * x_2! * ... * x_n!)`, where `n` is the total number of
|
|
items input, and each `x_i` is the count of a distinct item in the input
|
|
sequence.
|
|
|
|
If *r* is given, only the *r*-length permutations are yielded.
|
|
|
|
>>> sorted(distinct_permutations([1, 0, 1], r=2))
|
|
[(0, 1), (1, 0), (1, 1)]
|
|
>>> sorted(distinct_permutations(range(3), r=2))
|
|
[(0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)]
|
|
|
|
"""
|
|
# Algorithm: https://w.wiki/Qai
|
|
def _full(A):
|
|
while True:
|
|
# Yield the permutation we have
|
|
yield tuple(A)
|
|
|
|
# Find the largest index i such that A[i] < A[i + 1]
|
|
for i in range(size - 2, -1, -1):
|
|
if A[i] < A[i + 1]:
|
|
break
|
|
# If no such index exists, this permutation is the last one
|
|
else:
|
|
return
|
|
|
|
# Find the largest index j greater than j such that A[i] < A[j]
|
|
for j in range(size - 1, i, -1):
|
|
if A[i] < A[j]:
|
|
break
|
|
|
|
# Swap the value of A[i] with that of A[j], then reverse the
|
|
# sequence from A[i + 1] to form the new permutation
|
|
A[i], A[j] = A[j], A[i]
|
|
A[i + 1 :] = A[: i - size : -1] # A[i + 1:][::-1]
|
|
|
|
# Algorithm: modified from the above
|
|
def _partial(A, r):
|
|
# Split A into the first r items and the last r items
|
|
head, tail = A[:r], A[r:]
|
|
right_head_indexes = range(r - 1, -1, -1)
|
|
left_tail_indexes = range(len(tail))
|
|
|
|
while True:
|
|
# Yield the permutation we have
|
|
yield tuple(head)
|
|
|
|
# Starting from the right, find the first index of the head with
|
|
# value smaller than the maximum value of the tail - call it i.
|
|
pivot = tail[-1]
|
|
for i in right_head_indexes:
|
|
if head[i] < pivot:
|
|
break
|
|
pivot = head[i]
|
|
else:
|
|
return
|
|
|
|
# Starting from the left, find the first value of the tail
|
|
# with a value greater than head[i] and swap.
|
|
for j in left_tail_indexes:
|
|
if tail[j] > head[i]:
|
|
head[i], tail[j] = tail[j], head[i]
|
|
break
|
|
# If we didn't find one, start from the right and find the first
|
|
# index of the head with a value greater than head[i] and swap.
|
|
else:
|
|
for j in right_head_indexes:
|
|
if head[j] > head[i]:
|
|
head[i], head[j] = head[j], head[i]
|
|
break
|
|
|
|
# Reverse head[i + 1:] and swap it with tail[:r - (i + 1)]
|
|
tail += head[: i - r : -1] # head[i + 1:][::-1]
|
|
i += 1
|
|
head[i:], tail[:] = tail[: r - i], tail[r - i :]
|
|
|
|
items = sorted(iterable)
|
|
|
|
size = len(items)
|
|
if r is None:
|
|
r = size
|
|
|
|
if 0 < r <= size:
|
|
return _full(items) if (r == size) else _partial(items, r)
|
|
|
|
return iter(() if r else ((),))
|
|
|
|
|
|
def intersperse(e, iterable, n=1):
|
|
"""Intersperse filler element *e* among the items in *iterable*, leaving
|
|
*n* items between each filler element.
|
|
|
|
>>> list(intersperse('!', [1, 2, 3, 4, 5]))
|
|
[1, '!', 2, '!', 3, '!', 4, '!', 5]
|
|
|
|
>>> list(intersperse(None, [1, 2, 3, 4, 5], n=2))
|
|
[1, 2, None, 3, 4, None, 5]
|
|
|
|
"""
|
|
if n == 0:
|
|
raise ValueError('n must be > 0')
|
|
elif n == 1:
|
|
# interleave(repeat(e), iterable) -> e, x_0, e, x_1, e, x_2...
|
|
# islice(..., 1, None) -> x_0, e, x_1, e, x_2...
|
|
return islice(interleave(repeat(e), iterable), 1, None)
|
|
else:
|
|
# interleave(filler, chunks) -> [e], [x_0, x_1], [e], [x_2, x_3]...
|
|
# islice(..., 1, None) -> [x_0, x_1], [e], [x_2, x_3]...
|
|
# flatten(...) -> x_0, x_1, e, x_2, x_3...
|
|
filler = repeat([e])
|
|
chunks = chunked(iterable, n)
|
|
return flatten(islice(interleave(filler, chunks), 1, None))
|
|
|
|
|
|
def unique_to_each(*iterables):
|
|
"""Return the elements from each of the input iterables that aren't in the
|
|
other input iterables.
|
|
|
|
For example, suppose you have a set of packages, each with a set of
|
|
dependencies::
|
|
|
|
{'pkg_1': {'A', 'B'}, 'pkg_2': {'B', 'C'}, 'pkg_3': {'B', 'D'}}
|
|
|
|
If you remove one package, which dependencies can also be removed?
|
|
|
|
If ``pkg_1`` is removed, then ``A`` is no longer necessary - it is not
|
|
associated with ``pkg_2`` or ``pkg_3``. Similarly, ``C`` is only needed for
|
|
``pkg_2``, and ``D`` is only needed for ``pkg_3``::
|
|
|
|
>>> unique_to_each({'A', 'B'}, {'B', 'C'}, {'B', 'D'})
|
|
[['A'], ['C'], ['D']]
|
|
|
|
If there are duplicates in one input iterable that aren't in the others
|
|
they will be duplicated in the output. Input order is preserved::
|
|
|
|
>>> unique_to_each("mississippi", "missouri")
|
|
[['p', 'p'], ['o', 'u', 'r']]
|
|
|
|
It is assumed that the elements of each iterable are hashable.
|
|
|
|
"""
|
|
pool = [list(it) for it in iterables]
|
|
counts = Counter(chain.from_iterable(map(set, pool)))
|
|
uniques = {element for element in counts if counts[element] == 1}
|
|
return [list(filter(uniques.__contains__, it)) for it in pool]
|
|
|
|
|
|
def windowed(seq, n, fillvalue=None, step=1):
|
|
"""Return a sliding window of width *n* over the given iterable.
|
|
|
|
>>> all_windows = windowed([1, 2, 3, 4, 5], 3)
|
|
>>> list(all_windows)
|
|
[(1, 2, 3), (2, 3, 4), (3, 4, 5)]
|
|
|
|
When the window is larger than the iterable, *fillvalue* is used in place
|
|
of missing values:
|
|
|
|
>>> list(windowed([1, 2, 3], 4))
|
|
[(1, 2, 3, None)]
|
|
|
|
Each window will advance in increments of *step*:
|
|
|
|
>>> list(windowed([1, 2, 3, 4, 5, 6], 3, fillvalue='!', step=2))
|
|
[(1, 2, 3), (3, 4, 5), (5, 6, '!')]
|
|
|
|
To slide into the iterable's items, use :func:`chain` to add filler items
|
|
to the left:
|
|
|
|
>>> iterable = [1, 2, 3, 4]
|
|
>>> n = 3
|
|
>>> padding = [None] * (n - 1)
|
|
>>> list(windowed(chain(padding, iterable), 3))
|
|
[(None, None, 1), (None, 1, 2), (1, 2, 3), (2, 3, 4)]
|
|
"""
|
|
if n < 0:
|
|
raise ValueError('n must be >= 0')
|
|
if n == 0:
|
|
yield tuple()
|
|
return
|
|
if step < 1:
|
|
raise ValueError('step must be >= 1')
|
|
|
|
window = deque(maxlen=n)
|
|
i = n
|
|
for _ in map(window.append, seq):
|
|
i -= 1
|
|
if not i:
|
|
i = step
|
|
yield tuple(window)
|
|
|
|
size = len(window)
|
|
if size == 0:
|
|
return
|
|
elif size < n:
|
|
yield tuple(chain(window, repeat(fillvalue, n - size)))
|
|
elif 0 < i < min(step, n):
|
|
window += (fillvalue,) * i
|
|
yield tuple(window)
|
|
|
|
|
|
def substrings(iterable):
|
|
"""Yield all of the substrings of *iterable*.
|
|
|
|
>>> [''.join(s) for s in substrings('more')]
|
|
['m', 'o', 'r', 'e', 'mo', 'or', 're', 'mor', 'ore', 'more']
|
|
|
|
Note that non-string iterables can also be subdivided.
|
|
|
|
>>> list(substrings([0, 1, 2]))
|
|
[(0,), (1,), (2,), (0, 1), (1, 2), (0, 1, 2)]
|
|
|
|
"""
|
|
# The length-1 substrings
|
|
seq = []
|
|
for item in iter(iterable):
|
|
seq.append(item)
|
|
yield (item,)
|
|
seq = tuple(seq)
|
|
item_count = len(seq)
|
|
|
|
# And the rest
|
|
for n in range(2, item_count + 1):
|
|
for i in range(item_count - n + 1):
|
|
yield seq[i : i + n]
|
|
|
|
|
|
def substrings_indexes(seq, reverse=False):
|
|
"""Yield all substrings and their positions in *seq*
|
|
|
|
The items yielded will be a tuple of the form ``(substr, i, j)``, where
|
|
``substr == seq[i:j]``.
|
|
|
|
This function only works for iterables that support slicing, such as
|
|
``str`` objects.
|
|
|
|
>>> for item in substrings_indexes('more'):
|
|
... print(item)
|
|
('m', 0, 1)
|
|
('o', 1, 2)
|
|
('r', 2, 3)
|
|
('e', 3, 4)
|
|
('mo', 0, 2)
|
|
('or', 1, 3)
|
|
('re', 2, 4)
|
|
('mor', 0, 3)
|
|
('ore', 1, 4)
|
|
('more', 0, 4)
|
|
|
|
Set *reverse* to ``True`` to yield the same items in the opposite order.
|
|
|
|
|
|
"""
|
|
r = range(1, len(seq) + 1)
|
|
if reverse:
|
|
r = reversed(r)
|
|
return (
|
|
(seq[i : i + L], i, i + L) for L in r for i in range(len(seq) - L + 1)
|
|
)
|
|
|
|
|
|
class bucket:
|
|
"""Wrap *iterable* and return an object that buckets it iterable into
|
|
child iterables based on a *key* function.
|
|
|
|
>>> iterable = ['a1', 'b1', 'c1', 'a2', 'b2', 'c2', 'b3']
|
|
>>> s = bucket(iterable, key=lambda x: x[0]) # Bucket by 1st character
|
|
>>> sorted(list(s)) # Get the keys
|
|
['a', 'b', 'c']
|
|
>>> a_iterable = s['a']
|
|
>>> next(a_iterable)
|
|
'a1'
|
|
>>> next(a_iterable)
|
|
'a2'
|
|
>>> list(s['b'])
|
|
['b1', 'b2', 'b3']
|
|
|
|
The original iterable will be advanced and its items will be cached until
|
|
they are used by the child iterables. This may require significant storage.
|
|
|
|
By default, attempting to select a bucket to which no items belong will
|
|
exhaust the iterable and cache all values.
|
|
If you specify a *validator* function, selected buckets will instead be
|
|
checked against it.
|
|
|
|
>>> from itertools import count
|
|
>>> it = count(1, 2) # Infinite sequence of odd numbers
|
|
>>> key = lambda x: x % 10 # Bucket by last digit
|
|
>>> validator = lambda x: x in {1, 3, 5, 7, 9} # Odd digits only
|
|
>>> s = bucket(it, key=key, validator=validator)
|
|
>>> 2 in s
|
|
False
|
|
>>> list(s[2])
|
|
[]
|
|
|
|
"""
|
|
|
|
def __init__(self, iterable, key, validator=None):
|
|
self._it = iter(iterable)
|
|
self._key = key
|
|
self._cache = defaultdict(deque)
|
|
self._validator = validator or (lambda x: True)
|
|
|
|
def __contains__(self, value):
|
|
if not self._validator(value):
|
|
return False
|
|
|
|
try:
|
|
item = next(self[value])
|
|
except StopIteration:
|
|
return False
|
|
else:
|
|
self._cache[value].appendleft(item)
|
|
|
|
return True
|
|
|
|
def _get_values(self, value):
|
|
"""
|
|
Helper to yield items from the parent iterator that match *value*.
|
|
Items that don't match are stored in the local cache as they
|
|
are encountered.
|
|
"""
|
|
while True:
|
|
# If we've cached some items that match the target value, emit
|
|
# the first one and evict it from the cache.
|
|
if self._cache[value]:
|
|
yield self._cache[value].popleft()
|
|
# Otherwise we need to advance the parent iterator to search for
|
|
# a matching item, caching the rest.
|
|
else:
|
|
while True:
|
|
try:
|
|
item = next(self._it)
|
|
except StopIteration:
|
|
return
|
|
item_value = self._key(item)
|
|
if item_value == value:
|
|
yield item
|
|
break
|
|
elif self._validator(item_value):
|
|
self._cache[item_value].append(item)
|
|
|
|
def __iter__(self):
|
|
for item in self._it:
|
|
item_value = self._key(item)
|
|
if self._validator(item_value):
|
|
self._cache[item_value].append(item)
|
|
|
|
yield from self._cache.keys()
|
|
|
|
def __getitem__(self, value):
|
|
if not self._validator(value):
|
|
return iter(())
|
|
|
|
return self._get_values(value)
|
|
|
|
|
|
def spy(iterable, n=1):
|
|
"""Return a 2-tuple with a list containing the first *n* elements of
|
|
*iterable*, and an iterator with the same items as *iterable*.
|
|
This allows you to "look ahead" at the items in the iterable without
|
|
advancing it.
|
|
|
|
There is one item in the list by default:
|
|
|
|
>>> iterable = 'abcdefg'
|
|
>>> head, iterable = spy(iterable)
|
|
>>> head
|
|
['a']
|
|
>>> list(iterable)
|
|
['a', 'b', 'c', 'd', 'e', 'f', 'g']
|
|
|
|
You may use unpacking to retrieve items instead of lists:
|
|
|
|
>>> (head,), iterable = spy('abcdefg')
|
|
>>> head
|
|
'a'
|
|
>>> (first, second), iterable = spy('abcdefg', 2)
|
|
>>> first
|
|
'a'
|
|
>>> second
|
|
'b'
|
|
|
|
The number of items requested can be larger than the number of items in
|
|
the iterable:
|
|
|
|
>>> iterable = [1, 2, 3, 4, 5]
|
|
>>> head, iterable = spy(iterable, 10)
|
|
>>> head
|
|
[1, 2, 3, 4, 5]
|
|
>>> list(iterable)
|
|
[1, 2, 3, 4, 5]
|
|
|
|
"""
|
|
it = iter(iterable)
|
|
head = take(n, it)
|
|
|
|
return head.copy(), chain(head, it)
|
|
|
|
|
|
def interleave(*iterables):
|
|
"""Return a new iterable yielding from each iterable in turn,
|
|
until the shortest is exhausted.
|
|
|
|
>>> list(interleave([1, 2, 3], [4, 5], [6, 7, 8]))
|
|
[1, 4, 6, 2, 5, 7]
|
|
|
|
For a version that doesn't terminate after the shortest iterable is
|
|
exhausted, see :func:`interleave_longest`.
|
|
|
|
"""
|
|
return chain.from_iterable(zip(*iterables))
|
|
|
|
|
|
def interleave_longest(*iterables):
|
|
"""Return a new iterable yielding from each iterable in turn,
|
|
skipping any that are exhausted.
|
|
|
|
>>> list(interleave_longest([1, 2, 3], [4, 5], [6, 7, 8]))
|
|
[1, 4, 6, 2, 5, 7, 3, 8]
|
|
|
|
This function produces the same output as :func:`roundrobin`, but may
|
|
perform better for some inputs (in particular when the number of iterables
|
|
is large).
|
|
|
|
"""
|
|
i = chain.from_iterable(zip_longest(*iterables, fillvalue=_marker))
|
|
return (x for x in i if x is not _marker)
|
|
|
|
|
|
def interleave_evenly(iterables, lengths=None):
|
|
"""
|
|
Interleave multiple iterables so that their elements are evenly distributed
|
|
throughout the output sequence.
|
|
|
|
>>> iterables = [1, 2, 3, 4, 5], ['a', 'b']
|
|
>>> list(interleave_evenly(iterables))
|
|
[1, 2, 'a', 3, 4, 'b', 5]
|
|
|
|
>>> iterables = [[1, 2, 3], [4, 5], [6, 7, 8]]
|
|
>>> list(interleave_evenly(iterables))
|
|
[1, 6, 4, 2, 7, 3, 8, 5]
|
|
|
|
This function requires iterables of known length. Iterables without
|
|
``__len__()`` can be used by manually specifying lengths with *lengths*:
|
|
|
|
>>> from itertools import combinations, repeat
|
|
>>> iterables = [combinations(range(4), 2), ['a', 'b', 'c']]
|
|
>>> lengths = [4 * (4 - 1) // 2, 3]
|
|
>>> list(interleave_evenly(iterables, lengths=lengths))
|
|
[(0, 1), (0, 2), 'a', (0, 3), (1, 2), 'b', (1, 3), (2, 3), 'c']
|
|
|
|
Based on Bresenham's algorithm.
|
|
"""
|
|
if lengths is None:
|
|
try:
|
|
lengths = [len(it) for it in iterables]
|
|
except TypeError:
|
|
raise ValueError(
|
|
'Iterable lengths could not be determined automatically. '
|
|
'Specify them with the lengths keyword.'
|
|
)
|
|
elif len(iterables) != len(lengths):
|
|
raise ValueError('Mismatching number of iterables and lengths.')
|
|
|
|
dims = len(lengths)
|
|
|
|
# sort iterables by length, descending
|
|
lengths_permute = sorted(
|
|
range(dims), key=lambda i: lengths[i], reverse=True
|
|
)
|
|
lengths_desc = [lengths[i] for i in lengths_permute]
|
|
iters_desc = [iter(iterables[i]) for i in lengths_permute]
|
|
|
|
# the longest iterable is the primary one (Bresenham: the longest
|
|
# distance along an axis)
|
|
delta_primary, deltas_secondary = lengths_desc[0], lengths_desc[1:]
|
|
iter_primary, iters_secondary = iters_desc[0], iters_desc[1:]
|
|
errors = [delta_primary // dims] * len(deltas_secondary)
|
|
|
|
to_yield = sum(lengths)
|
|
while to_yield:
|
|
yield next(iter_primary)
|
|
to_yield -= 1
|
|
# update errors for each secondary iterable
|
|
errors = [e - delta for e, delta in zip(errors, deltas_secondary)]
|
|
|
|
# those iterables for which the error is negative are yielded
|
|
# ("diagonal step" in Bresenham)
|
|
for i, e in enumerate(errors):
|
|
if e < 0:
|
|
yield next(iters_secondary[i])
|
|
to_yield -= 1
|
|
errors[i] += delta_primary
|
|
|
|
|
|
def collapse(iterable, base_type=None, levels=None):
|
|
"""Flatten an iterable with multiple levels of nesting (e.g., a list of
|
|
lists of tuples) into non-iterable types.
|
|
|
|
>>> iterable = [(1, 2), ([3, 4], [[5], [6]])]
|
|
>>> list(collapse(iterable))
|
|
[1, 2, 3, 4, 5, 6]
|
|
|
|
Binary and text strings are not considered iterable and
|
|
will not be collapsed.
|
|
|
|
To avoid collapsing other types, specify *base_type*:
|
|
|
|
>>> iterable = ['ab', ('cd', 'ef'), ['gh', 'ij']]
|
|
>>> list(collapse(iterable, base_type=tuple))
|
|
['ab', ('cd', 'ef'), 'gh', 'ij']
|
|
|
|
Specify *levels* to stop flattening after a certain level:
|
|
|
|
>>> iterable = [('a', ['b']), ('c', ['d'])]
|
|
>>> list(collapse(iterable)) # Fully flattened
|
|
['a', 'b', 'c', 'd']
|
|
>>> list(collapse(iterable, levels=1)) # Only one level flattened
|
|
['a', ['b'], 'c', ['d']]
|
|
|
|
"""
|
|
|
|
def walk(node, level):
|
|
if (
|
|
((levels is not None) and (level > levels))
|
|
or isinstance(node, (str, bytes))
|
|
or ((base_type is not None) and isinstance(node, base_type))
|
|
):
|
|
yield node
|
|
return
|
|
|
|
try:
|
|
tree = iter(node)
|
|
except TypeError:
|
|
yield node
|
|
return
|
|
else:
|
|
for child in tree:
|
|
yield from walk(child, level + 1)
|
|
|
|
yield from walk(iterable, 0)
|
|
|
|
|
|
def side_effect(func, iterable, chunk_size=None, before=None, after=None):
|
|
"""Invoke *func* on each item in *iterable* (or on each *chunk_size* group
|
|
of items) before yielding the item.
|
|
|
|
`func` must be a function that takes a single argument. Its return value
|
|
will be discarded.
|
|
|
|
*before* and *after* are optional functions that take no arguments. They
|
|
will be executed before iteration starts and after it ends, respectively.
|
|
|
|
`side_effect` can be used for logging, updating progress bars, or anything
|
|
that is not functionally "pure."
|
|
|
|
Emitting a status message:
|
|
|
|
>>> from more_itertools import consume
|
|
>>> func = lambda item: print('Received {}'.format(item))
|
|
>>> consume(side_effect(func, range(2)))
|
|
Received 0
|
|
Received 1
|
|
|
|
Operating on chunks of items:
|
|
|
|
>>> pair_sums = []
|
|
>>> func = lambda chunk: pair_sums.append(sum(chunk))
|
|
>>> list(side_effect(func, [0, 1, 2, 3, 4, 5], 2))
|
|
[0, 1, 2, 3, 4, 5]
|
|
>>> list(pair_sums)
|
|
[1, 5, 9]
|
|
|
|
Writing to a file-like object:
|
|
|
|
>>> from io import StringIO
|
|
>>> from more_itertools import consume
|
|
>>> f = StringIO()
|
|
>>> func = lambda x: print(x, file=f)
|
|
>>> before = lambda: print(u'HEADER', file=f)
|
|
>>> after = f.close
|
|
>>> it = [u'a', u'b', u'c']
|
|
>>> consume(side_effect(func, it, before=before, after=after))
|
|
>>> f.closed
|
|
True
|
|
|
|
"""
|
|
try:
|
|
if before is not None:
|
|
before()
|
|
|
|
if chunk_size is None:
|
|
for item in iterable:
|
|
func(item)
|
|
yield item
|
|
else:
|
|
for chunk in chunked(iterable, chunk_size):
|
|
func(chunk)
|
|
yield from chunk
|
|
finally:
|
|
if after is not None:
|
|
after()
|
|
|
|
|
|
def sliced(seq, n, strict=False):
|
|
"""Yield slices of length *n* from the sequence *seq*.
|
|
|
|
>>> list(sliced((1, 2, 3, 4, 5, 6), 3))
|
|
[(1, 2, 3), (4, 5, 6)]
|
|
|
|
By the default, the last yielded slice will have fewer than *n* elements
|
|
if the length of *seq* is not divisible by *n*:
|
|
|
|
>>> list(sliced((1, 2, 3, 4, 5, 6, 7, 8), 3))
|
|
[(1, 2, 3), (4, 5, 6), (7, 8)]
|
|
|
|
If the length of *seq* is not divisible by *n* and *strict* is
|
|
``True``, then ``ValueError`` will be raised before the last
|
|
slice is yielded.
|
|
|
|
This function will only work for iterables that support slicing.
|
|
For non-sliceable iterables, see :func:`chunked`.
|
|
|
|
"""
|
|
iterator = takewhile(len, (seq[i : i + n] for i in count(0, n)))
|
|
if strict:
|
|
|
|
def ret():
|
|
for _slice in iterator:
|
|
if len(_slice) != n:
|
|
raise ValueError("seq is not divisible by n.")
|
|
yield _slice
|
|
|
|
return iter(ret())
|
|
else:
|
|
return iterator
|
|
|
|
|
|
def split_at(iterable, pred, maxsplit=-1, keep_separator=False):
|
|
"""Yield lists of items from *iterable*, where each list is delimited by
|
|
an item where callable *pred* returns ``True``.
|
|
|
|
>>> list(split_at('abcdcba', lambda x: x == 'b'))
|
|
[['a'], ['c', 'd', 'c'], ['a']]
|
|
|
|
>>> list(split_at(range(10), lambda n: n % 2 == 1))
|
|
[[0], [2], [4], [6], [8], []]
|
|
|
|
At most *maxsplit* splits are done. If *maxsplit* is not specified or -1,
|
|
then there is no limit on the number of splits:
|
|
|
|
>>> list(split_at(range(10), lambda n: n % 2 == 1, maxsplit=2))
|
|
[[0], [2], [4, 5, 6, 7, 8, 9]]
|
|
|
|
By default, the delimiting items are not included in the output.
|
|
The include them, set *keep_separator* to ``True``.
|
|
|
|
>>> list(split_at('abcdcba', lambda x: x == 'b', keep_separator=True))
|
|
[['a'], ['b'], ['c', 'd', 'c'], ['b'], ['a']]
|
|
|
|
"""
|
|
if maxsplit == 0:
|
|
yield list(iterable)
|
|
return
|
|
|
|
buf = []
|
|
it = iter(iterable)
|
|
for item in it:
|
|
if pred(item):
|
|
yield buf
|
|
if keep_separator:
|
|
yield [item]
|
|
if maxsplit == 1:
|
|
yield list(it)
|
|
return
|
|
buf = []
|
|
maxsplit -= 1
|
|
else:
|
|
buf.append(item)
|
|
yield buf
|
|
|
|
|
|
def split_before(iterable, pred, maxsplit=-1):
|
|
"""Yield lists of items from *iterable*, where each list ends just before
|
|
an item for which callable *pred* returns ``True``:
|
|
|
|
>>> list(split_before('OneTwo', lambda s: s.isupper()))
|
|
[['O', 'n', 'e'], ['T', 'w', 'o']]
|
|
|
|
>>> list(split_before(range(10), lambda n: n % 3 == 0))
|
|
[[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
|
|
|
|
At most *maxsplit* splits are done. If *maxsplit* is not specified or -1,
|
|
then there is no limit on the number of splits:
|
|
|
|
>>> list(split_before(range(10), lambda n: n % 3 == 0, maxsplit=2))
|
|
[[0, 1, 2], [3, 4, 5], [6, 7, 8, 9]]
|
|
"""
|
|
if maxsplit == 0:
|
|
yield list(iterable)
|
|
return
|
|
|
|
buf = []
|
|
it = iter(iterable)
|
|
for item in it:
|
|
if pred(item) and buf:
|
|
yield buf
|
|
if maxsplit == 1:
|
|
yield [item] + list(it)
|
|
return
|
|
buf = []
|
|
maxsplit -= 1
|
|
buf.append(item)
|
|
if buf:
|
|
yield buf
|
|
|
|
|
|
def split_after(iterable, pred, maxsplit=-1):
|
|
"""Yield lists of items from *iterable*, where each list ends with an
|
|
item where callable *pred* returns ``True``:
|
|
|
|
>>> list(split_after('one1two2', lambda s: s.isdigit()))
|
|
[['o', 'n', 'e', '1'], ['t', 'w', 'o', '2']]
|
|
|
|
>>> list(split_after(range(10), lambda n: n % 3 == 0))
|
|
[[0], [1, 2, 3], [4, 5, 6], [7, 8, 9]]
|
|
|
|
At most *maxsplit* splits are done. If *maxsplit* is not specified or -1,
|
|
then there is no limit on the number of splits:
|
|
|
|
>>> list(split_after(range(10), lambda n: n % 3 == 0, maxsplit=2))
|
|
[[0], [1, 2, 3], [4, 5, 6, 7, 8, 9]]
|
|
|
|
"""
|
|
if maxsplit == 0:
|
|
yield list(iterable)
|
|
return
|
|
|
|
buf = []
|
|
it = iter(iterable)
|
|
for item in it:
|
|
buf.append(item)
|
|
if pred(item) and buf:
|
|
yield buf
|
|
if maxsplit == 1:
|
|
yield list(it)
|
|
return
|
|
buf = []
|
|
maxsplit -= 1
|
|
if buf:
|
|
yield buf
|
|
|
|
|
|
def split_when(iterable, pred, maxsplit=-1):
|
|
"""Split *iterable* into pieces based on the output of *pred*.
|
|
*pred* should be a function that takes successive pairs of items and
|
|
returns ``True`` if the iterable should be split in between them.
|
|
|
|
For example, to find runs of increasing numbers, split the iterable when
|
|
element ``i`` is larger than element ``i + 1``:
|
|
|
|
>>> list(split_when([1, 2, 3, 3, 2, 5, 2, 4, 2], lambda x, y: x > y))
|
|
[[1, 2, 3, 3], [2, 5], [2, 4], [2]]
|
|
|
|
At most *maxsplit* splits are done. If *maxsplit* is not specified or -1,
|
|
then there is no limit on the number of splits:
|
|
|
|
>>> list(split_when([1, 2, 3, 3, 2, 5, 2, 4, 2],
|
|
... lambda x, y: x > y, maxsplit=2))
|
|
[[1, 2, 3, 3], [2, 5], [2, 4, 2]]
|
|
|
|
"""
|
|
if maxsplit == 0:
|
|
yield list(iterable)
|
|
return
|
|
|
|
it = iter(iterable)
|
|
try:
|
|
cur_item = next(it)
|
|
except StopIteration:
|
|
return
|
|
|
|
buf = [cur_item]
|
|
for next_item in it:
|
|
if pred(cur_item, next_item):
|
|
yield buf
|
|
if maxsplit == 1:
|
|
yield [next_item] + list(it)
|
|
return
|
|
buf = []
|
|
maxsplit -= 1
|
|
|
|
buf.append(next_item)
|
|
cur_item = next_item
|
|
|
|
yield buf
|
|
|
|
|
|
def split_into(iterable, sizes):
|
|
"""Yield a list of sequential items from *iterable* of length 'n' for each
|
|
integer 'n' in *sizes*.
|
|
|
|
>>> list(split_into([1,2,3,4,5,6], [1,2,3]))
|
|
[[1], [2, 3], [4, 5, 6]]
|
|
|
|
If the sum of *sizes* is smaller than the length of *iterable*, then the
|
|
remaining items of *iterable* will not be returned.
|
|
|
|
>>> list(split_into([1,2,3,4,5,6], [2,3]))
|
|
[[1, 2], [3, 4, 5]]
|
|
|
|
If the sum of *sizes* is larger than the length of *iterable*, fewer items
|
|
will be returned in the iteration that overruns *iterable* and further
|
|
lists will be empty:
|
|
|
|
>>> list(split_into([1,2,3,4], [1,2,3,4]))
|
|
[[1], [2, 3], [4], []]
|
|
|
|
When a ``None`` object is encountered in *sizes*, the returned list will
|
|
contain items up to the end of *iterable* the same way that itertools.slice
|
|
does:
|
|
|
|
>>> list(split_into([1,2,3,4,5,6,7,8,9,0], [2,3,None]))
|
|
[[1, 2], [3, 4, 5], [6, 7, 8, 9, 0]]
|
|
|
|
:func:`split_into` can be useful for grouping a series of items where the
|
|
sizes of the groups are not uniform. An example would be where in a row
|
|
from a table, multiple columns represent elements of the same feature
|
|
(e.g. a point represented by x,y,z) but, the format is not the same for
|
|
all columns.
|
|
"""
|
|
# convert the iterable argument into an iterator so its contents can
|
|
# be consumed by islice in case it is a generator
|
|
it = iter(iterable)
|
|
|
|
for size in sizes:
|
|
if size is None:
|
|
yield list(it)
|
|
return
|
|
else:
|
|
yield list(islice(it, size))
|
|
|
|
|
|
def padded(iterable, fillvalue=None, n=None, next_multiple=False):
|
|
"""Yield the elements from *iterable*, followed by *fillvalue*, such that
|
|
at least *n* items are emitted.
|
|
|
|
>>> list(padded([1, 2, 3], '?', 5))
|
|
[1, 2, 3, '?', '?']
|
|
|
|
If *next_multiple* is ``True``, *fillvalue* will be emitted until the
|
|
number of items emitted is a multiple of *n*::
|
|
|
|
>>> list(padded([1, 2, 3, 4], n=3, next_multiple=True))
|
|
[1, 2, 3, 4, None, None]
|
|
|
|
If *n* is ``None``, *fillvalue* will be emitted indefinitely.
|
|
|
|
"""
|
|
it = iter(iterable)
|
|
if n is None:
|
|
yield from chain(it, repeat(fillvalue))
|
|
elif n < 1:
|
|
raise ValueError('n must be at least 1')
|
|
else:
|
|
item_count = 0
|
|
for item in it:
|
|
yield item
|
|
item_count += 1
|
|
|
|
remaining = (n - item_count) % n if next_multiple else n - item_count
|
|
for _ in range(remaining):
|
|
yield fillvalue
|
|
|
|
|
|
def repeat_each(iterable, n=2):
|
|
"""Repeat each element in *iterable* *n* times.
|
|
|
|
>>> list(repeat_each('ABC', 3))
|
|
['A', 'A', 'A', 'B', 'B', 'B', 'C', 'C', 'C']
|
|
"""
|
|
return chain.from_iterable(map(repeat, iterable, repeat(n)))
|
|
|
|
|
|
def repeat_last(iterable, default=None):
|
|
"""After the *iterable* is exhausted, keep yielding its last element.
|
|
|
|
>>> list(islice(repeat_last(range(3)), 5))
|
|
[0, 1, 2, 2, 2]
|
|
|
|
If the iterable is empty, yield *default* forever::
|
|
|
|
>>> list(islice(repeat_last(range(0), 42), 5))
|
|
[42, 42, 42, 42, 42]
|
|
|
|
"""
|
|
item = _marker
|
|
for item in iterable:
|
|
yield item
|
|
final = default if item is _marker else item
|
|
yield from repeat(final)
|
|
|
|
|
|
def distribute(n, iterable):
|
|
"""Distribute the items from *iterable* among *n* smaller iterables.
|
|
|
|
>>> group_1, group_2 = distribute(2, [1, 2, 3, 4, 5, 6])
|
|
>>> list(group_1)
|
|
[1, 3, 5]
|
|
>>> list(group_2)
|
|
[2, 4, 6]
|
|
|
|
If the length of *iterable* is not evenly divisible by *n*, then the
|
|
length of the returned iterables will not be identical:
|
|
|
|
>>> children = distribute(3, [1, 2, 3, 4, 5, 6, 7])
|
|
>>> [list(c) for c in children]
|
|
[[1, 4, 7], [2, 5], [3, 6]]
|
|
|
|
If the length of *iterable* is smaller than *n*, then the last returned
|
|
iterables will be empty:
|
|
|
|
>>> children = distribute(5, [1, 2, 3])
|
|
>>> [list(c) for c in children]
|
|
[[1], [2], [3], [], []]
|
|
|
|
This function uses :func:`itertools.tee` and may require significant
|
|
storage. If you need the order items in the smaller iterables to match the
|
|
original iterable, see :func:`divide`.
|
|
|
|
"""
|
|
if n < 1:
|
|
raise ValueError('n must be at least 1')
|
|
|
|
children = tee(iterable, n)
|
|
return [islice(it, index, None, n) for index, it in enumerate(children)]
|
|
|
|
|
|
def stagger(iterable, offsets=(-1, 0, 1), longest=False, fillvalue=None):
|
|
"""Yield tuples whose elements are offset from *iterable*.
|
|
The amount by which the `i`-th item in each tuple is offset is given by
|
|
the `i`-th item in *offsets*.
|
|
|
|
>>> list(stagger([0, 1, 2, 3]))
|
|
[(None, 0, 1), (0, 1, 2), (1, 2, 3)]
|
|
>>> list(stagger(range(8), offsets=(0, 2, 4)))
|
|
[(0, 2, 4), (1, 3, 5), (2, 4, 6), (3, 5, 7)]
|
|
|
|
By default, the sequence will end when the final element of a tuple is the
|
|
last item in the iterable. To continue until the first element of a tuple
|
|
is the last item in the iterable, set *longest* to ``True``::
|
|
|
|
>>> list(stagger([0, 1, 2, 3], longest=True))
|
|
[(None, 0, 1), (0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)]
|
|
|
|
By default, ``None`` will be used to replace offsets beyond the end of the
|
|
sequence. Specify *fillvalue* to use some other value.
|
|
|
|
"""
|
|
children = tee(iterable, len(offsets))
|
|
|
|
return zip_offset(
|
|
*children, offsets=offsets, longest=longest, fillvalue=fillvalue
|
|
)
|
|
|
|
|
|
def zip_equal(*iterables):
|
|
"""``zip`` the input *iterables* together, but raise
|
|
``UnequalIterablesError`` if they aren't all the same length.
|
|
|
|
>>> it_1 = range(3)
|
|
>>> it_2 = iter('abc')
|
|
>>> list(zip_equal(it_1, it_2))
|
|
[(0, 'a'), (1, 'b'), (2, 'c')]
|
|
|
|
>>> it_1 = range(3)
|
|
>>> it_2 = iter('abcd')
|
|
>>> list(zip_equal(it_1, it_2)) # doctest: +IGNORE_EXCEPTION_DETAIL
|
|
Traceback (most recent call last):
|
|
...
|
|
more_itertools.more.UnequalIterablesError: Iterables have different
|
|
lengths
|
|
|
|
"""
|
|
if hexversion >= 0x30A00A6:
|
|
warnings.warn(
|
|
(
|
|
'zip_equal will be removed in a future version of '
|
|
'more-itertools. Use the builtin zip function with '
|
|
'strict=True instead.'
|
|
),
|
|
DeprecationWarning,
|
|
)
|
|
|
|
return _zip_equal(*iterables)
|
|
|
|
|
|
def zip_offset(*iterables, offsets, longest=False, fillvalue=None):
|
|
"""``zip`` the input *iterables* together, but offset the `i`-th iterable
|
|
by the `i`-th item in *offsets*.
|
|
|
|
>>> list(zip_offset('0123', 'abcdef', offsets=(0, 1)))
|
|
[('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e')]
|
|
|
|
This can be used as a lightweight alternative to SciPy or pandas to analyze
|
|
data sets in which some series have a lead or lag relationship.
|
|
|
|
By default, the sequence will end when the shortest iterable is exhausted.
|
|
To continue until the longest iterable is exhausted, set *longest* to
|
|
``True``.
|
|
|
|
>>> list(zip_offset('0123', 'abcdef', offsets=(0, 1), longest=True))
|
|
[('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e'), (None, 'f')]
|
|
|
|
By default, ``None`` will be used to replace offsets beyond the end of the
|
|
sequence. Specify *fillvalue* to use some other value.
|
|
|
|
"""
|
|
if len(iterables) != len(offsets):
|
|
raise ValueError("Number of iterables and offsets didn't match")
|
|
|
|
staggered = []
|
|
for it, n in zip(iterables, offsets):
|
|
if n < 0:
|
|
staggered.append(chain(repeat(fillvalue, -n), it))
|
|
elif n > 0:
|
|
staggered.append(islice(it, n, None))
|
|
else:
|
|
staggered.append(it)
|
|
|
|
if longest:
|
|
return zip_longest(*staggered, fillvalue=fillvalue)
|
|
|
|
return zip(*staggered)
|
|
|
|
|
|
def sort_together(iterables, key_list=(0,), key=None, reverse=False):
|
|
"""Return the input iterables sorted together, with *key_list* as the
|
|
priority for sorting. All iterables are trimmed to the length of the
|
|
shortest one.
|
|
|
|
This can be used like the sorting function in a spreadsheet. If each
|
|
iterable represents a column of data, the key list determines which
|
|
columns are used for sorting.
|
|
|
|
By default, all iterables are sorted using the ``0``-th iterable::
|
|
|
|
>>> iterables = [(4, 3, 2, 1), ('a', 'b', 'c', 'd')]
|
|
>>> sort_together(iterables)
|
|
[(1, 2, 3, 4), ('d', 'c', 'b', 'a')]
|
|
|
|
Set a different key list to sort according to another iterable.
|
|
Specifying multiple keys dictates how ties are broken::
|
|
|
|
>>> iterables = [(3, 1, 2), (0, 1, 0), ('c', 'b', 'a')]
|
|
>>> sort_together(iterables, key_list=(1, 2))
|
|
[(2, 3, 1), (0, 0, 1), ('a', 'c', 'b')]
|
|
|
|
To sort by a function of the elements of the iterable, pass a *key*
|
|
function. Its arguments are the elements of the iterables corresponding to
|
|
the key list::
|
|
|
|
>>> names = ('a', 'b', 'c')
|
|
>>> lengths = (1, 2, 3)
|
|
>>> widths = (5, 2, 1)
|
|
>>> def area(length, width):
|
|
... return length * width
|
|
>>> sort_together([names, lengths, widths], key_list=(1, 2), key=area)
|
|
[('c', 'b', 'a'), (3, 2, 1), (1, 2, 5)]
|
|
|
|
Set *reverse* to ``True`` to sort in descending order.
|
|
|
|
>>> sort_together([(1, 2, 3), ('c', 'b', 'a')], reverse=True)
|
|
[(3, 2, 1), ('a', 'b', 'c')]
|
|
|
|
"""
|
|
if key is None:
|
|
# if there is no key function, the key argument to sorted is an
|
|
# itemgetter
|
|
key_argument = itemgetter(*key_list)
|
|
else:
|
|
# if there is a key function, call it with the items at the offsets
|
|
# specified by the key function as arguments
|
|
key_list = list(key_list)
|
|
if len(key_list) == 1:
|
|
# if key_list contains a single item, pass the item at that offset
|
|
# as the only argument to the key function
|
|
key_offset = key_list[0]
|
|
key_argument = lambda zipped_items: key(zipped_items[key_offset])
|
|
else:
|
|
# if key_list contains multiple items, use itemgetter to return a
|
|
# tuple of items, which we pass as *args to the key function
|
|
get_key_items = itemgetter(*key_list)
|
|
key_argument = lambda zipped_items: key(
|
|
*get_key_items(zipped_items)
|
|
)
|
|
|
|
return list(
|
|
zip(*sorted(zip(*iterables), key=key_argument, reverse=reverse))
|
|
)
|
|
|
|
|
|
def unzip(iterable):
|
|
"""The inverse of :func:`zip`, this function disaggregates the elements
|
|
of the zipped *iterable*.
|
|
|
|
The ``i``-th iterable contains the ``i``-th element from each element
|
|
of the zipped iterable. The first element is used to determine the
|
|
length of the remaining elements.
|
|
|
|
>>> iterable = [('a', 1), ('b', 2), ('c', 3), ('d', 4)]
|
|
>>> letters, numbers = unzip(iterable)
|
|
>>> list(letters)
|
|
['a', 'b', 'c', 'd']
|
|
>>> list(numbers)
|
|
[1, 2, 3, 4]
|
|
|
|
This is similar to using ``zip(*iterable)``, but it avoids reading
|
|
*iterable* into memory. Note, however, that this function uses
|
|
:func:`itertools.tee` and thus may require significant storage.
|
|
|
|
"""
|
|
head, iterable = spy(iter(iterable))
|
|
if not head:
|
|
# empty iterable, e.g. zip([], [], [])
|
|
return ()
|
|
# spy returns a one-length iterable as head
|
|
head = head[0]
|
|
iterables = tee(iterable, len(head))
|
|
|
|
def itemgetter(i):
|
|
def getter(obj):
|
|
try:
|
|
return obj[i]
|
|
except IndexError:
|
|
# basically if we have an iterable like
|
|
# iter([(1, 2, 3), (4, 5), (6,)])
|
|
# the second unzipped iterable would fail at the third tuple
|
|
# since it would try to access tup[1]
|
|
# same with the third unzipped iterable and the second tuple
|
|
# to support these "improperly zipped" iterables,
|
|
# we create a custom itemgetter
|
|
# which just stops the unzipped iterables
|
|
# at first length mismatch
|
|
raise StopIteration
|
|
|
|
return getter
|
|
|
|
return tuple(map(itemgetter(i), it) for i, it in enumerate(iterables))
|
|
|
|
|
|
def divide(n, iterable):
|
|
"""Divide the elements from *iterable* into *n* parts, maintaining
|
|
order.
|
|
|
|
>>> group_1, group_2 = divide(2, [1, 2, 3, 4, 5, 6])
|
|
>>> list(group_1)
|
|
[1, 2, 3]
|
|
>>> list(group_2)
|
|
[4, 5, 6]
|
|
|
|
If the length of *iterable* is not evenly divisible by *n*, then the
|
|
length of the returned iterables will not be identical:
|
|
|
|
>>> children = divide(3, [1, 2, 3, 4, 5, 6, 7])
|
|
>>> [list(c) for c in children]
|
|
[[1, 2, 3], [4, 5], [6, 7]]
|
|
|
|
If the length of the iterable is smaller than n, then the last returned
|
|
iterables will be empty:
|
|
|
|
>>> children = divide(5, [1, 2, 3])
|
|
>>> [list(c) for c in children]
|
|
[[1], [2], [3], [], []]
|
|
|
|
This function will exhaust the iterable before returning and may require
|
|
significant storage. If order is not important, see :func:`distribute`,
|
|
which does not first pull the iterable into memory.
|
|
|
|
"""
|
|
if n < 1:
|
|
raise ValueError('n must be at least 1')
|
|
|
|
try:
|
|
iterable[:0]
|
|
except TypeError:
|
|
seq = tuple(iterable)
|
|
else:
|
|
seq = iterable
|
|
|
|
q, r = divmod(len(seq), n)
|
|
|
|
ret = []
|
|
stop = 0
|
|
for i in range(1, n + 1):
|
|
start = stop
|
|
stop += q + 1 if i <= r else q
|
|
ret.append(iter(seq[start:stop]))
|
|
|
|
return ret
|
|
|
|
|
|
def always_iterable(obj, base_type=(str, bytes)):
|
|
"""If *obj* is iterable, return an iterator over its items::
|
|
|
|
>>> obj = (1, 2, 3)
|
|
>>> list(always_iterable(obj))
|
|
[1, 2, 3]
|
|
|
|
If *obj* is not iterable, return a one-item iterable containing *obj*::
|
|
|
|
>>> obj = 1
|
|
>>> list(always_iterable(obj))
|
|
[1]
|
|
|
|
If *obj* is ``None``, return an empty iterable:
|
|
|
|
>>> obj = None
|
|
>>> list(always_iterable(None))
|
|
[]
|
|
|
|
By default, binary and text strings are not considered iterable::
|
|
|
|
>>> obj = 'foo'
|
|
>>> list(always_iterable(obj))
|
|
['foo']
|
|
|
|
If *base_type* is set, objects for which ``isinstance(obj, base_type)``
|
|
returns ``True`` won't be considered iterable.
|
|
|
|
>>> obj = {'a': 1}
|
|
>>> list(always_iterable(obj)) # Iterate over the dict's keys
|
|
['a']
|
|
>>> list(always_iterable(obj, base_type=dict)) # Treat dicts as a unit
|
|
[{'a': 1}]
|
|
|
|
Set *base_type* to ``None`` to avoid any special handling and treat objects
|
|
Python considers iterable as iterable:
|
|
|
|
>>> obj = 'foo'
|
|
>>> list(always_iterable(obj, base_type=None))
|
|
['f', 'o', 'o']
|
|
"""
|
|
if obj is None:
|
|
return iter(())
|
|
|
|
if (base_type is not None) and isinstance(obj, base_type):
|
|
return iter((obj,))
|
|
|
|
try:
|
|
return iter(obj)
|
|
except TypeError:
|
|
return iter((obj,))
|
|
|
|
|
|
def adjacent(predicate, iterable, distance=1):
|
|
"""Return an iterable over `(bool, item)` tuples where the `item` is
|
|
drawn from *iterable* and the `bool` indicates whether
|
|
that item satisfies the *predicate* or is adjacent to an item that does.
|
|
|
|
For example, to find whether items are adjacent to a ``3``::
|
|
|
|
>>> list(adjacent(lambda x: x == 3, range(6)))
|
|
[(False, 0), (False, 1), (True, 2), (True, 3), (True, 4), (False, 5)]
|
|
|
|
Set *distance* to change what counts as adjacent. For example, to find
|
|
whether items are two places away from a ``3``:
|
|
|
|
>>> list(adjacent(lambda x: x == 3, range(6), distance=2))
|
|
[(False, 0), (True, 1), (True, 2), (True, 3), (True, 4), (True, 5)]
|
|
|
|
This is useful for contextualizing the results of a search function.
|
|
For example, a code comparison tool might want to identify lines that
|
|
have changed, but also surrounding lines to give the viewer of the diff
|
|
context.
|
|
|
|
The predicate function will only be called once for each item in the
|
|
iterable.
|
|
|
|
See also :func:`groupby_transform`, which can be used with this function
|
|
to group ranges of items with the same `bool` value.
|
|
|
|
"""
|
|
# Allow distance=0 mainly for testing that it reproduces results with map()
|
|
if distance < 0:
|
|
raise ValueError('distance must be at least 0')
|
|
|
|
i1, i2 = tee(iterable)
|
|
padding = [False] * distance
|
|
selected = chain(padding, map(predicate, i1), padding)
|
|
adjacent_to_selected = map(any, windowed(selected, 2 * distance + 1))
|
|
return zip(adjacent_to_selected, i2)
|
|
|
|
|
|
def groupby_transform(iterable, keyfunc=None, valuefunc=None, reducefunc=None):
|
|
"""An extension of :func:`itertools.groupby` that can apply transformations
|
|
to the grouped data.
|
|
|
|
* *keyfunc* is a function computing a key value for each item in *iterable*
|
|
* *valuefunc* is a function that transforms the individual items from
|
|
*iterable* after grouping
|
|
* *reducefunc* is a function that transforms each group of items
|
|
|
|
>>> iterable = 'aAAbBBcCC'
|
|
>>> keyfunc = lambda k: k.upper()
|
|
>>> valuefunc = lambda v: v.lower()
|
|
>>> reducefunc = lambda g: ''.join(g)
|
|
>>> list(groupby_transform(iterable, keyfunc, valuefunc, reducefunc))
|
|
[('A', 'aaa'), ('B', 'bbb'), ('C', 'ccc')]
|
|
|
|
Each optional argument defaults to an identity function if not specified.
|
|
|
|
:func:`groupby_transform` is useful when grouping elements of an iterable
|
|
using a separate iterable as the key. To do this, :func:`zip` the iterables
|
|
and pass a *keyfunc* that extracts the first element and a *valuefunc*
|
|
that extracts the second element::
|
|
|
|
>>> from operator import itemgetter
|
|
>>> keys = [0, 0, 1, 1, 1, 2, 2, 2, 3]
|
|
>>> values = 'abcdefghi'
|
|
>>> iterable = zip(keys, values)
|
|
>>> grouper = groupby_transform(iterable, itemgetter(0), itemgetter(1))
|
|
>>> [(k, ''.join(g)) for k, g in grouper]
|
|
[(0, 'ab'), (1, 'cde'), (2, 'fgh'), (3, 'i')]
|
|
|
|
Note that the order of items in the iterable is significant.
|
|
Only adjacent items are grouped together, so if you don't want any
|
|
duplicate groups, you should sort the iterable by the key function.
|
|
|
|
"""
|
|
ret = groupby(iterable, keyfunc)
|
|
if valuefunc:
|
|
ret = ((k, map(valuefunc, g)) for k, g in ret)
|
|
if reducefunc:
|
|
ret = ((k, reducefunc(g)) for k, g in ret)
|
|
|
|
return ret
|
|
|
|
|
|
class numeric_range(abc.Sequence, abc.Hashable):
|
|
"""An extension of the built-in ``range()`` function whose arguments can
|
|
be any orderable numeric type.
|
|
|
|
With only *stop* specified, *start* defaults to ``0`` and *step*
|
|
defaults to ``1``. The output items will match the type of *stop*:
|
|
|
|
>>> list(numeric_range(3.5))
|
|
[0.0, 1.0, 2.0, 3.0]
|
|
|
|
With only *start* and *stop* specified, *step* defaults to ``1``. The
|
|
output items will match the type of *start*:
|
|
|
|
>>> from decimal import Decimal
|
|
>>> start = Decimal('2.1')
|
|
>>> stop = Decimal('5.1')
|
|
>>> list(numeric_range(start, stop))
|
|
[Decimal('2.1'), Decimal('3.1'), Decimal('4.1')]
|
|
|
|
With *start*, *stop*, and *step* specified the output items will match
|
|
the type of ``start + step``:
|
|
|
|
>>> from fractions import Fraction
|
|
>>> start = Fraction(1, 2) # Start at 1/2
|
|
>>> stop = Fraction(5, 2) # End at 5/2
|
|
>>> step = Fraction(1, 2) # Count by 1/2
|
|
>>> list(numeric_range(start, stop, step))
|
|
[Fraction(1, 2), Fraction(1, 1), Fraction(3, 2), Fraction(2, 1)]
|
|
|
|
If *step* is zero, ``ValueError`` is raised. Negative steps are supported:
|
|
|
|
>>> list(numeric_range(3, -1, -1.0))
|
|
[3.0, 2.0, 1.0, 0.0]
|
|
|
|
Be aware of the limitations of floating point numbers; the representation
|
|
of the yielded numbers may be surprising.
|
|
|
|
``datetime.datetime`` objects can be used for *start* and *stop*, if *step*
|
|
is a ``datetime.timedelta`` object:
|
|
|
|
>>> import datetime
|
|
>>> start = datetime.datetime(2019, 1, 1)
|
|
>>> stop = datetime.datetime(2019, 1, 3)
|
|
>>> step = datetime.timedelta(days=1)
|
|
>>> items = iter(numeric_range(start, stop, step))
|
|
>>> next(items)
|
|
datetime.datetime(2019, 1, 1, 0, 0)
|
|
>>> next(items)
|
|
datetime.datetime(2019, 1, 2, 0, 0)
|
|
|
|
"""
|
|
|
|
_EMPTY_HASH = hash(range(0, 0))
|
|
|
|
def __init__(self, *args):
|
|
argc = len(args)
|
|
if argc == 1:
|
|
(self._stop,) = args
|
|
self._start = type(self._stop)(0)
|
|
self._step = type(self._stop - self._start)(1)
|
|
elif argc == 2:
|
|
self._start, self._stop = args
|
|
self._step = type(self._stop - self._start)(1)
|
|
elif argc == 3:
|
|
self._start, self._stop, self._step = args
|
|
elif argc == 0:
|
|
raise TypeError(
|
|
'numeric_range expected at least '
|
|
'1 argument, got {}'.format(argc)
|
|
)
|
|
else:
|
|
raise TypeError(
|
|
'numeric_range expected at most '
|
|
'3 arguments, got {}'.format(argc)
|
|
)
|
|
|
|
self._zero = type(self._step)(0)
|
|
if self._step == self._zero:
|
|
raise ValueError('numeric_range() arg 3 must not be zero')
|
|
self._growing = self._step > self._zero
|
|
self._init_len()
|
|
|
|
def __bool__(self):
|
|
if self._growing:
|
|
return self._start < self._stop
|
|
else:
|
|
return self._start > self._stop
|
|
|
|
def __contains__(self, elem):
|
|
if self._growing:
|
|
if self._start <= elem < self._stop:
|
|
return (elem - self._start) % self._step == self._zero
|
|
else:
|
|
if self._start >= elem > self._stop:
|
|
return (self._start - elem) % (-self._step) == self._zero
|
|
|
|
return False
|
|
|
|
def __eq__(self, other):
|
|
if isinstance(other, numeric_range):
|
|
empty_self = not bool(self)
|
|
empty_other = not bool(other)
|
|
if empty_self or empty_other:
|
|
return empty_self and empty_other # True if both empty
|
|
else:
|
|
return (
|
|
self._start == other._start
|
|
and self._step == other._step
|
|
and self._get_by_index(-1) == other._get_by_index(-1)
|
|
)
|
|
else:
|
|
return False
|
|
|
|
def __getitem__(self, key):
|
|
if isinstance(key, int):
|
|
return self._get_by_index(key)
|
|
elif isinstance(key, slice):
|
|
step = self._step if key.step is None else key.step * self._step
|
|
|
|
if key.start is None or key.start <= -self._len:
|
|
start = self._start
|
|
elif key.start >= self._len:
|
|
start = self._stop
|
|
else: # -self._len < key.start < self._len
|
|
start = self._get_by_index(key.start)
|
|
|
|
if key.stop is None or key.stop >= self._len:
|
|
stop = self._stop
|
|
elif key.stop <= -self._len:
|
|
stop = self._start
|
|
else: # -self._len < key.stop < self._len
|
|
stop = self._get_by_index(key.stop)
|
|
|
|
return numeric_range(start, stop, step)
|
|
else:
|
|
raise TypeError(
|
|
'numeric range indices must be '
|
|
'integers or slices, not {}'.format(type(key).__name__)
|
|
)
|
|
|
|
def __hash__(self):
|
|
if self:
|
|
return hash((self._start, self._get_by_index(-1), self._step))
|
|
else:
|
|
return self._EMPTY_HASH
|
|
|
|
def __iter__(self):
|
|
values = (self._start + (n * self._step) for n in count())
|
|
if self._growing:
|
|
return takewhile(partial(gt, self._stop), values)
|
|
else:
|
|
return takewhile(partial(lt, self._stop), values)
|
|
|
|
def __len__(self):
|
|
return self._len
|
|
|
|
def _init_len(self):
|
|
if self._growing:
|
|
start = self._start
|
|
stop = self._stop
|
|
step = self._step
|
|
else:
|
|
start = self._stop
|
|
stop = self._start
|
|
step = -self._step
|
|
distance = stop - start
|
|
if distance <= self._zero:
|
|
self._len = 0
|
|
else: # distance > 0 and step > 0: regular euclidean division
|
|
q, r = divmod(distance, step)
|
|
self._len = int(q) + int(r != self._zero)
|
|
|
|
def __reduce__(self):
|
|
return numeric_range, (self._start, self._stop, self._step)
|
|
|
|
def __repr__(self):
|
|
if self._step == 1:
|
|
return "numeric_range({}, {})".format(
|
|
repr(self._start), repr(self._stop)
|
|
)
|
|
else:
|
|
return "numeric_range({}, {}, {})".format(
|
|
repr(self._start), repr(self._stop), repr(self._step)
|
|
)
|
|
|
|
def __reversed__(self):
|
|
return iter(
|
|
numeric_range(
|
|
self._get_by_index(-1), self._start - self._step, -self._step
|
|
)
|
|
)
|
|
|
|
def count(self, value):
|
|
return int(value in self)
|
|
|
|
def index(self, value):
|
|
if self._growing:
|
|
if self._start <= value < self._stop:
|
|
q, r = divmod(value - self._start, self._step)
|
|
if r == self._zero:
|
|
return int(q)
|
|
else:
|
|
if self._start >= value > self._stop:
|
|
q, r = divmod(self._start - value, -self._step)
|
|
if r == self._zero:
|
|
return int(q)
|
|
|
|
raise ValueError("{} is not in numeric range".format(value))
|
|
|
|
def _get_by_index(self, i):
|
|
if i < 0:
|
|
i += self._len
|
|
if i < 0 or i >= self._len:
|
|
raise IndexError("numeric range object index out of range")
|
|
return self._start + i * self._step
|
|
|
|
|
|
def count_cycle(iterable, n=None):
|
|
"""Cycle through the items from *iterable* up to *n* times, yielding
|
|
the number of completed cycles along with each item. If *n* is omitted the
|
|
process repeats indefinitely.
|
|
|
|
>>> list(count_cycle('AB', 3))
|
|
[(0, 'A'), (0, 'B'), (1, 'A'), (1, 'B'), (2, 'A'), (2, 'B')]
|
|
|
|
"""
|
|
iterable = tuple(iterable)
|
|
if not iterable:
|
|
return iter(())
|
|
counter = count() if n is None else range(n)
|
|
return ((i, item) for i in counter for item in iterable)
|
|
|
|
|
|
def mark_ends(iterable):
|
|
"""Yield 3-tuples of the form ``(is_first, is_last, item)``.
|
|
|
|
>>> list(mark_ends('ABC'))
|
|
[(True, False, 'A'), (False, False, 'B'), (False, True, 'C')]
|
|
|
|
Use this when looping over an iterable to take special action on its first
|
|
and/or last items:
|
|
|
|
>>> iterable = ['Header', 100, 200, 'Footer']
|
|
>>> total = 0
|
|
>>> for is_first, is_last, item in mark_ends(iterable):
|
|
... if is_first:
|
|
... continue # Skip the header
|
|
... if is_last:
|
|
... continue # Skip the footer
|
|
... total += item
|
|
>>> print(total)
|
|
300
|
|
"""
|
|
it = iter(iterable)
|
|
|
|
try:
|
|
b = next(it)
|
|
except StopIteration:
|
|
return
|
|
|
|
try:
|
|
for i in count():
|
|
a = b
|
|
b = next(it)
|
|
yield i == 0, False, a
|
|
|
|
except StopIteration:
|
|
yield i == 0, True, a
|
|
|
|
|
|
def locate(iterable, pred=bool, window_size=None):
|
|
"""Yield the index of each item in *iterable* for which *pred* returns
|
|
``True``.
|
|
|
|
*pred* defaults to :func:`bool`, which will select truthy items:
|
|
|
|
>>> list(locate([0, 1, 1, 0, 1, 0, 0]))
|
|
[1, 2, 4]
|
|
|
|
Set *pred* to a custom function to, e.g., find the indexes for a particular
|
|
item.
|
|
|
|
>>> list(locate(['a', 'b', 'c', 'b'], lambda x: x == 'b'))
|
|
[1, 3]
|
|
|
|
If *window_size* is given, then the *pred* function will be called with
|
|
that many items. This enables searching for sub-sequences:
|
|
|
|
>>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]
|
|
>>> pred = lambda *args: args == (1, 2, 3)
|
|
>>> list(locate(iterable, pred=pred, window_size=3))
|
|
[1, 5, 9]
|
|
|
|
Use with :func:`seekable` to find indexes and then retrieve the associated
|
|
items:
|
|
|
|
>>> from itertools import count
|
|
>>> from more_itertools import seekable
|
|
>>> source = (3 * n + 1 if (n % 2) else n // 2 for n in count())
|
|
>>> it = seekable(source)
|
|
>>> pred = lambda x: x > 100
|
|
>>> indexes = locate(it, pred=pred)
|
|
>>> i = next(indexes)
|
|
>>> it.seek(i)
|
|
>>> next(it)
|
|
106
|
|
|
|
"""
|
|
if window_size is None:
|
|
return compress(count(), map(pred, iterable))
|
|
|
|
if window_size < 1:
|
|
raise ValueError('window size must be at least 1')
|
|
|
|
it = windowed(iterable, window_size, fillvalue=_marker)
|
|
return compress(count(), starmap(pred, it))
|
|
|
|
|
|
def longest_common_prefix(iterables):
|
|
"""Yield elements of the longest common prefix amongst given *iterables*.
|
|
|
|
>>> ''.join(longest_common_prefix(['abcd', 'abc', 'abf']))
|
|
'ab'
|
|
|
|
"""
|
|
return (c[0] for c in takewhile(all_equal, zip(*iterables)))
|
|
|
|
|
|
def lstrip(iterable, pred):
|
|
"""Yield the items from *iterable*, but strip any from the beginning
|
|
for which *pred* returns ``True``.
|
|
|
|
For example, to remove a set of items from the start of an iterable:
|
|
|
|
>>> iterable = (None, False, None, 1, 2, None, 3, False, None)
|
|
>>> pred = lambda x: x in {None, False, ''}
|
|
>>> list(lstrip(iterable, pred))
|
|
[1, 2, None, 3, False, None]
|
|
|
|
This function is analogous to to :func:`str.lstrip`, and is essentially
|
|
an wrapper for :func:`itertools.dropwhile`.
|
|
|
|
"""
|
|
return dropwhile(pred, iterable)
|
|
|
|
|
|
def rstrip(iterable, pred):
|
|
"""Yield the items from *iterable*, but strip any from the end
|
|
for which *pred* returns ``True``.
|
|
|
|
For example, to remove a set of items from the end of an iterable:
|
|
|
|
>>> iterable = (None, False, None, 1, 2, None, 3, False, None)
|
|
>>> pred = lambda x: x in {None, False, ''}
|
|
>>> list(rstrip(iterable, pred))
|
|
[None, False, None, 1, 2, None, 3]
|
|
|
|
This function is analogous to :func:`str.rstrip`.
|
|
|
|
"""
|
|
cache = []
|
|
cache_append = cache.append
|
|
cache_clear = cache.clear
|
|
for x in iterable:
|
|
if pred(x):
|
|
cache_append(x)
|
|
else:
|
|
yield from cache
|
|
cache_clear()
|
|
yield x
|
|
|
|
|
|
def strip(iterable, pred):
|
|
"""Yield the items from *iterable*, but strip any from the
|
|
beginning and end for which *pred* returns ``True``.
|
|
|
|
For example, to remove a set of items from both ends of an iterable:
|
|
|
|
>>> iterable = (None, False, None, 1, 2, None, 3, False, None)
|
|
>>> pred = lambda x: x in {None, False, ''}
|
|
>>> list(strip(iterable, pred))
|
|
[1, 2, None, 3]
|
|
|
|
This function is analogous to :func:`str.strip`.
|
|
|
|
"""
|
|
return rstrip(lstrip(iterable, pred), pred)
|
|
|
|
|
|
class islice_extended:
|
|
"""An extension of :func:`itertools.islice` that supports negative values
|
|
for *stop*, *start*, and *step*.
|
|
|
|
>>> iterable = iter('abcdefgh')
|
|
>>> list(islice_extended(iterable, -4, -1))
|
|
['e', 'f', 'g']
|
|
|
|
Slices with negative values require some caching of *iterable*, but this
|
|
function takes care to minimize the amount of memory required.
|
|
|
|
For example, you can use a negative step with an infinite iterator:
|
|
|
|
>>> from itertools import count
|
|
>>> list(islice_extended(count(), 110, 99, -2))
|
|
[110, 108, 106, 104, 102, 100]
|
|
|
|
You can also use slice notation directly:
|
|
|
|
>>> iterable = map(str, count())
|
|
>>> it = islice_extended(iterable)[10:20:2]
|
|
>>> list(it)
|
|
['10', '12', '14', '16', '18']
|
|
|
|
"""
|
|
|
|
def __init__(self, iterable, *args):
|
|
it = iter(iterable)
|
|
if args:
|
|
self._iterable = _islice_helper(it, slice(*args))
|
|
else:
|
|
self._iterable = it
|
|
|
|
def __iter__(self):
|
|
return self
|
|
|
|
def __next__(self):
|
|
return next(self._iterable)
|
|
|
|
def __getitem__(self, key):
|
|
if isinstance(key, slice):
|
|
return islice_extended(_islice_helper(self._iterable, key))
|
|
|
|
raise TypeError('islice_extended.__getitem__ argument must be a slice')
|
|
|
|
|
|
def _islice_helper(it, s):
|
|
start = s.start
|
|
stop = s.stop
|
|
if s.step == 0:
|
|
raise ValueError('step argument must be a non-zero integer or None.')
|
|
step = s.step or 1
|
|
|
|
if step > 0:
|
|
start = 0 if (start is None) else start
|
|
|
|
if start < 0:
|
|
# Consume all but the last -start items
|
|
cache = deque(enumerate(it, 1), maxlen=-start)
|
|
len_iter = cache[-1][0] if cache else 0
|
|
|
|
# Adjust start to be positive
|
|
i = max(len_iter + start, 0)
|
|
|
|
# Adjust stop to be positive
|
|
if stop is None:
|
|
j = len_iter
|
|
elif stop >= 0:
|
|
j = min(stop, len_iter)
|
|
else:
|
|
j = max(len_iter + stop, 0)
|
|
|
|
# Slice the cache
|
|
n = j - i
|
|
if n <= 0:
|
|
return
|
|
|
|
for index, item in islice(cache, 0, n, step):
|
|
yield item
|
|
elif (stop is not None) and (stop < 0):
|
|
# Advance to the start position
|
|
next(islice(it, start, start), None)
|
|
|
|
# When stop is negative, we have to carry -stop items while
|
|
# iterating
|
|
cache = deque(islice(it, -stop), maxlen=-stop)
|
|
|
|
for index, item in enumerate(it):
|
|
cached_item = cache.popleft()
|
|
if index % step == 0:
|
|
yield cached_item
|
|
cache.append(item)
|
|
else:
|
|
# When both start and stop are positive we have the normal case
|
|
yield from islice(it, start, stop, step)
|
|
else:
|
|
start = -1 if (start is None) else start
|
|
|
|
if (stop is not None) and (stop < 0):
|
|
# Consume all but the last items
|
|
n = -stop - 1
|
|
cache = deque(enumerate(it, 1), maxlen=n)
|
|
len_iter = cache[-1][0] if cache else 0
|
|
|
|
# If start and stop are both negative they are comparable and
|
|
# we can just slice. Otherwise we can adjust start to be negative
|
|
# and then slice.
|
|
if start < 0:
|
|
i, j = start, stop
|
|
else:
|
|
i, j = min(start - len_iter, -1), None
|
|
|
|
for index, item in list(cache)[i:j:step]:
|
|
yield item
|
|
else:
|
|
# Advance to the stop position
|
|
if stop is not None:
|
|
m = stop + 1
|
|
next(islice(it, m, m), None)
|
|
|
|
# stop is positive, so if start is negative they are not comparable
|
|
# and we need the rest of the items.
|
|
if start < 0:
|
|
i = start
|
|
n = None
|
|
# stop is None and start is positive, so we just need items up to
|
|
# the start index.
|
|
elif stop is None:
|
|
i = None
|
|
n = start + 1
|
|
# Both stop and start are positive, so they are comparable.
|
|
else:
|
|
i = None
|
|
n = start - stop
|
|
if n <= 0:
|
|
return
|
|
|
|
cache = list(islice(it, n))
|
|
|
|
yield from cache[i::step]
|
|
|
|
|
|
def always_reversible(iterable):
|
|
"""An extension of :func:`reversed` that supports all iterables, not
|
|
just those which implement the ``Reversible`` or ``Sequence`` protocols.
|
|
|
|
>>> print(*always_reversible(x for x in range(3)))
|
|
2 1 0
|
|
|
|
If the iterable is already reversible, this function returns the
|
|
result of :func:`reversed()`. If the iterable is not reversible,
|
|
this function will cache the remaining items in the iterable and
|
|
yield them in reverse order, which may require significant storage.
|
|
"""
|
|
try:
|
|
return reversed(iterable)
|
|
except TypeError:
|
|
return reversed(list(iterable))
|
|
|
|
|
|
def consecutive_groups(iterable, ordering=lambda x: x):
|
|
"""Yield groups of consecutive items using :func:`itertools.groupby`.
|
|
The *ordering* function determines whether two items are adjacent by
|
|
returning their position.
|
|
|
|
By default, the ordering function is the identity function. This is
|
|
suitable for finding runs of numbers:
|
|
|
|
>>> iterable = [1, 10, 11, 12, 20, 30, 31, 32, 33, 40]
|
|
>>> for group in consecutive_groups(iterable):
|
|
... print(list(group))
|
|
[1]
|
|
[10, 11, 12]
|
|
[20]
|
|
[30, 31, 32, 33]
|
|
[40]
|
|
|
|
For finding runs of adjacent letters, try using the :meth:`index` method
|
|
of a string of letters:
|
|
|
|
>>> from string import ascii_lowercase
|
|
>>> iterable = 'abcdfgilmnop'
|
|
>>> ordering = ascii_lowercase.index
|
|
>>> for group in consecutive_groups(iterable, ordering):
|
|
... print(list(group))
|
|
['a', 'b', 'c', 'd']
|
|
['f', 'g']
|
|
['i']
|
|
['l', 'm', 'n', 'o', 'p']
|
|
|
|
Each group of consecutive items is an iterator that shares it source with
|
|
*iterable*. When an an output group is advanced, the previous group is
|
|
no longer available unless its elements are copied (e.g., into a ``list``).
|
|
|
|
>>> iterable = [1, 2, 11, 12, 21, 22]
|
|
>>> saved_groups = []
|
|
>>> for group in consecutive_groups(iterable):
|
|
... saved_groups.append(list(group)) # Copy group elements
|
|
>>> saved_groups
|
|
[[1, 2], [11, 12], [21, 22]]
|
|
|
|
"""
|
|
for k, g in groupby(
|
|
enumerate(iterable), key=lambda x: x[0] - ordering(x[1])
|
|
):
|
|
yield map(itemgetter(1), g)
|
|
|
|
|
|
def difference(iterable, func=sub, *, initial=None):
|
|
"""This function is the inverse of :func:`itertools.accumulate`. By default
|
|
it will compute the first difference of *iterable* using
|
|
:func:`operator.sub`:
|
|
|
|
>>> from itertools import accumulate
|
|
>>> iterable = accumulate([0, 1, 2, 3, 4]) # produces 0, 1, 3, 6, 10
|
|
>>> list(difference(iterable))
|
|
[0, 1, 2, 3, 4]
|
|
|
|
*func* defaults to :func:`operator.sub`, but other functions can be
|
|
specified. They will be applied as follows::
|
|
|
|
A, B, C, D, ... --> A, func(B, A), func(C, B), func(D, C), ...
|
|
|
|
For example, to do progressive division:
|
|
|
|
>>> iterable = [1, 2, 6, 24, 120]
|
|
>>> func = lambda x, y: x // y
|
|
>>> list(difference(iterable, func))
|
|
[1, 2, 3, 4, 5]
|
|
|
|
If the *initial* keyword is set, the first element will be skipped when
|
|
computing successive differences.
|
|
|
|
>>> it = [10, 11, 13, 16] # from accumulate([1, 2, 3], initial=10)
|
|
>>> list(difference(it, initial=10))
|
|
[1, 2, 3]
|
|
|
|
"""
|
|
a, b = tee(iterable)
|
|
try:
|
|
first = [next(b)]
|
|
except StopIteration:
|
|
return iter([])
|
|
|
|
if initial is not None:
|
|
first = []
|
|
|
|
return chain(first, map(func, b, a))
|
|
|
|
|
|
class SequenceView(Sequence):
|
|
"""Return a read-only view of the sequence object *target*.
|
|
|
|
:class:`SequenceView` objects are analogous to Python's built-in
|
|
"dictionary view" types. They provide a dynamic view of a sequence's items,
|
|
meaning that when the sequence updates, so does the view.
|
|
|
|
>>> seq = ['0', '1', '2']
|
|
>>> view = SequenceView(seq)
|
|
>>> view
|
|
SequenceView(['0', '1', '2'])
|
|
>>> seq.append('3')
|
|
>>> view
|
|
SequenceView(['0', '1', '2', '3'])
|
|
|
|
Sequence views support indexing, slicing, and length queries. They act
|
|
like the underlying sequence, except they don't allow assignment:
|
|
|
|
>>> view[1]
|
|
'1'
|
|
>>> view[1:-1]
|
|
['1', '2']
|
|
>>> len(view)
|
|
4
|
|
|
|
Sequence views are useful as an alternative to copying, as they don't
|
|
require (much) extra storage.
|
|
|
|
"""
|
|
|
|
def __init__(self, target):
|
|
if not isinstance(target, Sequence):
|
|
raise TypeError
|
|
self._target = target
|
|
|
|
def __getitem__(self, index):
|
|
return self._target[index]
|
|
|
|
def __len__(self):
|
|
return len(self._target)
|
|
|
|
def __repr__(self):
|
|
return '{}({})'.format(self.__class__.__name__, repr(self._target))
|
|
|
|
|
|
class seekable:
|
|
"""Wrap an iterator to allow for seeking backward and forward. This
|
|
progressively caches the items in the source iterable so they can be
|
|
re-visited.
|
|
|
|
Call :meth:`seek` with an index to seek to that position in the source
|
|
iterable.
|
|
|
|
To "reset" an iterator, seek to ``0``:
|
|
|
|
>>> from itertools import count
|
|
>>> it = seekable((str(n) for n in count()))
|
|
>>> next(it), next(it), next(it)
|
|
('0', '1', '2')
|
|
>>> it.seek(0)
|
|
>>> next(it), next(it), next(it)
|
|
('0', '1', '2')
|
|
>>> next(it)
|
|
'3'
|
|
|
|
You can also seek forward:
|
|
|
|
>>> it = seekable((str(n) for n in range(20)))
|
|
>>> it.seek(10)
|
|
>>> next(it)
|
|
'10'
|
|
>>> it.seek(20) # Seeking past the end of the source isn't a problem
|
|
>>> list(it)
|
|
[]
|
|
>>> it.seek(0) # Resetting works even after hitting the end
|
|
>>> next(it), next(it), next(it)
|
|
('0', '1', '2')
|
|
|
|
Call :meth:`peek` to look ahead one item without advancing the iterator:
|
|
|
|
>>> it = seekable('1234')
|
|
>>> it.peek()
|
|
'1'
|
|
>>> list(it)
|
|
['1', '2', '3', '4']
|
|
>>> it.peek(default='empty')
|
|
'empty'
|
|
|
|
Before the iterator is at its end, calling :func:`bool` on it will return
|
|
``True``. After it will return ``False``:
|
|
|
|
>>> it = seekable('5678')
|
|
>>> bool(it)
|
|
True
|
|
>>> list(it)
|
|
['5', '6', '7', '8']
|
|
>>> bool(it)
|
|
False
|
|
|
|
You may view the contents of the cache with the :meth:`elements` method.
|
|
That returns a :class:`SequenceView`, a view that updates automatically:
|
|
|
|
>>> it = seekable((str(n) for n in range(10)))
|
|
>>> next(it), next(it), next(it)
|
|
('0', '1', '2')
|
|
>>> elements = it.elements()
|
|
>>> elements
|
|
SequenceView(['0', '1', '2'])
|
|
>>> next(it)
|
|
'3'
|
|
>>> elements
|
|
SequenceView(['0', '1', '2', '3'])
|
|
|
|
By default, the cache grows as the source iterable progresses, so beware of
|
|
wrapping very large or infinite iterables. Supply *maxlen* to limit the
|
|
size of the cache (this of course limits how far back you can seek).
|
|
|
|
>>> from itertools import count
|
|
>>> it = seekable((str(n) for n in count()), maxlen=2)
|
|
>>> next(it), next(it), next(it), next(it)
|
|
('0', '1', '2', '3')
|
|
>>> list(it.elements())
|
|
['2', '3']
|
|
>>> it.seek(0)
|
|
>>> next(it), next(it), next(it), next(it)
|
|
('2', '3', '4', '5')
|
|
>>> next(it)
|
|
'6'
|
|
|
|
"""
|
|
|
|
def __init__(self, iterable, maxlen=None):
|
|
self._source = iter(iterable)
|
|
if maxlen is None:
|
|
self._cache = []
|
|
else:
|
|
self._cache = deque([], maxlen)
|
|
self._index = None
|
|
|
|
def __iter__(self):
|
|
return self
|
|
|
|
def __next__(self):
|
|
if self._index is not None:
|
|
try:
|
|
item = self._cache[self._index]
|
|
except IndexError:
|
|
self._index = None
|
|
else:
|
|
self._index += 1
|
|
return item
|
|
|
|
item = next(self._source)
|
|
self._cache.append(item)
|
|
return item
|
|
|
|
def __bool__(self):
|
|
try:
|
|
self.peek()
|
|
except StopIteration:
|
|
return False
|
|
return True
|
|
|
|
def peek(self, default=_marker):
|
|
try:
|
|
peeked = next(self)
|
|
except StopIteration:
|
|
if default is _marker:
|
|
raise
|
|
return default
|
|
if self._index is None:
|
|
self._index = len(self._cache)
|
|
self._index -= 1
|
|
return peeked
|
|
|
|
def elements(self):
|
|
return SequenceView(self._cache)
|
|
|
|
def seek(self, index):
|
|
self._index = index
|
|
remainder = index - len(self._cache)
|
|
if remainder > 0:
|
|
consume(self, remainder)
|
|
|
|
|
|
class run_length:
|
|
"""
|
|
:func:`run_length.encode` compresses an iterable with run-length encoding.
|
|
It yields groups of repeated items with the count of how many times they
|
|
were repeated:
|
|
|
|
>>> uncompressed = 'abbcccdddd'
|
|
>>> list(run_length.encode(uncompressed))
|
|
[('a', 1), ('b', 2), ('c', 3), ('d', 4)]
|
|
|
|
:func:`run_length.decode` decompresses an iterable that was previously
|
|
compressed with run-length encoding. It yields the items of the
|
|
decompressed iterable:
|
|
|
|
>>> compressed = [('a', 1), ('b', 2), ('c', 3), ('d', 4)]
|
|
>>> list(run_length.decode(compressed))
|
|
['a', 'b', 'b', 'c', 'c', 'c', 'd', 'd', 'd', 'd']
|
|
|
|
"""
|
|
|
|
@staticmethod
|
|
def encode(iterable):
|
|
return ((k, ilen(g)) for k, g in groupby(iterable))
|
|
|
|
@staticmethod
|
|
def decode(iterable):
|
|
return chain.from_iterable(repeat(k, n) for k, n in iterable)
|
|
|
|
|
|
def exactly_n(iterable, n, predicate=bool):
|
|
"""Return ``True`` if exactly ``n`` items in the iterable are ``True``
|
|
according to the *predicate* function.
|
|
|
|
>>> exactly_n([True, True, False], 2)
|
|
True
|
|
>>> exactly_n([True, True, False], 1)
|
|
False
|
|
>>> exactly_n([0, 1, 2, 3, 4, 5], 3, lambda x: x < 3)
|
|
True
|
|
|
|
The iterable will be advanced until ``n + 1`` truthy items are encountered,
|
|
so avoid calling it on infinite iterables.
|
|
|
|
"""
|
|
return len(take(n + 1, filter(predicate, iterable))) == n
|
|
|
|
|
|
def circular_shifts(iterable):
|
|
"""Return a list of circular shifts of *iterable*.
|
|
|
|
>>> circular_shifts(range(4))
|
|
[(0, 1, 2, 3), (1, 2, 3, 0), (2, 3, 0, 1), (3, 0, 1, 2)]
|
|
"""
|
|
lst = list(iterable)
|
|
return take(len(lst), windowed(cycle(lst), len(lst)))
|
|
|
|
|
|
def make_decorator(wrapping_func, result_index=0):
|
|
"""Return a decorator version of *wrapping_func*, which is a function that
|
|
modifies an iterable. *result_index* is the position in that function's
|
|
signature where the iterable goes.
|
|
|
|
This lets you use itertools on the "production end," i.e. at function
|
|
definition. This can augment what the function returns without changing the
|
|
function's code.
|
|
|
|
For example, to produce a decorator version of :func:`chunked`:
|
|
|
|
>>> from more_itertools import chunked
|
|
>>> chunker = make_decorator(chunked, result_index=0)
|
|
>>> @chunker(3)
|
|
... def iter_range(n):
|
|
... return iter(range(n))
|
|
...
|
|
>>> list(iter_range(9))
|
|
[[0, 1, 2], [3, 4, 5], [6, 7, 8]]
|
|
|
|
To only allow truthy items to be returned:
|
|
|
|
>>> truth_serum = make_decorator(filter, result_index=1)
|
|
>>> @truth_serum(bool)
|
|
... def boolean_test():
|
|
... return [0, 1, '', ' ', False, True]
|
|
...
|
|
>>> list(boolean_test())
|
|
[1, ' ', True]
|
|
|
|
The :func:`peekable` and :func:`seekable` wrappers make for practical
|
|
decorators:
|
|
|
|
>>> from more_itertools import peekable
|
|
>>> peekable_function = make_decorator(peekable)
|
|
>>> @peekable_function()
|
|
... def str_range(*args):
|
|
... return (str(x) for x in range(*args))
|
|
...
|
|
>>> it = str_range(1, 20, 2)
|
|
>>> next(it), next(it), next(it)
|
|
('1', '3', '5')
|
|
>>> it.peek()
|
|
'7'
|
|
>>> next(it)
|
|
'7'
|
|
|
|
"""
|
|
# See https://sites.google.com/site/bbayles/index/decorator_factory for
|
|
# notes on how this works.
|
|
def decorator(*wrapping_args, **wrapping_kwargs):
|
|
def outer_wrapper(f):
|
|
def inner_wrapper(*args, **kwargs):
|
|
result = f(*args, **kwargs)
|
|
wrapping_args_ = list(wrapping_args)
|
|
wrapping_args_.insert(result_index, result)
|
|
return wrapping_func(*wrapping_args_, **wrapping_kwargs)
|
|
|
|
return inner_wrapper
|
|
|
|
return outer_wrapper
|
|
|
|
return decorator
|
|
|
|
|
|
def map_reduce(iterable, keyfunc, valuefunc=None, reducefunc=None):
|
|
"""Return a dictionary that maps the items in *iterable* to categories
|
|
defined by *keyfunc*, transforms them with *valuefunc*, and
|
|
then summarizes them by category with *reducefunc*.
|
|
|
|
*valuefunc* defaults to the identity function if it is unspecified.
|
|
If *reducefunc* is unspecified, no summarization takes place:
|
|
|
|
>>> keyfunc = lambda x: x.upper()
|
|
>>> result = map_reduce('abbccc', keyfunc)
|
|
>>> sorted(result.items())
|
|
[('A', ['a']), ('B', ['b', 'b']), ('C', ['c', 'c', 'c'])]
|
|
|
|
Specifying *valuefunc* transforms the categorized items:
|
|
|
|
>>> keyfunc = lambda x: x.upper()
|
|
>>> valuefunc = lambda x: 1
|
|
>>> result = map_reduce('abbccc', keyfunc, valuefunc)
|
|
>>> sorted(result.items())
|
|
[('A', [1]), ('B', [1, 1]), ('C', [1, 1, 1])]
|
|
|
|
Specifying *reducefunc* summarizes the categorized items:
|
|
|
|
>>> keyfunc = lambda x: x.upper()
|
|
>>> valuefunc = lambda x: 1
|
|
>>> reducefunc = sum
|
|
>>> result = map_reduce('abbccc', keyfunc, valuefunc, reducefunc)
|
|
>>> sorted(result.items())
|
|
[('A', 1), ('B', 2), ('C', 3)]
|
|
|
|
You may want to filter the input iterable before applying the map/reduce
|
|
procedure:
|
|
|
|
>>> all_items = range(30)
|
|
>>> items = [x for x in all_items if 10 <= x <= 20] # Filter
|
|
>>> keyfunc = lambda x: x % 2 # Evens map to 0; odds to 1
|
|
>>> categories = map_reduce(items, keyfunc=keyfunc)
|
|
>>> sorted(categories.items())
|
|
[(0, [10, 12, 14, 16, 18, 20]), (1, [11, 13, 15, 17, 19])]
|
|
>>> summaries = map_reduce(items, keyfunc=keyfunc, reducefunc=sum)
|
|
>>> sorted(summaries.items())
|
|
[(0, 90), (1, 75)]
|
|
|
|
Note that all items in the iterable are gathered into a list before the
|
|
summarization step, which may require significant storage.
|
|
|
|
The returned object is a :obj:`collections.defaultdict` with the
|
|
``default_factory`` set to ``None``, such that it behaves like a normal
|
|
dictionary.
|
|
|
|
"""
|
|
valuefunc = (lambda x: x) if (valuefunc is None) else valuefunc
|
|
|
|
ret = defaultdict(list)
|
|
for item in iterable:
|
|
key = keyfunc(item)
|
|
value = valuefunc(item)
|
|
ret[key].append(value)
|
|
|
|
if reducefunc is not None:
|
|
for key, value_list in ret.items():
|
|
ret[key] = reducefunc(value_list)
|
|
|
|
ret.default_factory = None
|
|
return ret
|
|
|
|
|
|
def rlocate(iterable, pred=bool, window_size=None):
|
|
"""Yield the index of each item in *iterable* for which *pred* returns
|
|
``True``, starting from the right and moving left.
|
|
|
|
*pred* defaults to :func:`bool`, which will select truthy items:
|
|
|
|
>>> list(rlocate([0, 1, 1, 0, 1, 0, 0])) # Truthy at 1, 2, and 4
|
|
[4, 2, 1]
|
|
|
|
Set *pred* to a custom function to, e.g., find the indexes for a particular
|
|
item:
|
|
|
|
>>> iterable = iter('abcb')
|
|
>>> pred = lambda x: x == 'b'
|
|
>>> list(rlocate(iterable, pred))
|
|
[3, 1]
|
|
|
|
If *window_size* is given, then the *pred* function will be called with
|
|
that many items. This enables searching for sub-sequences:
|
|
|
|
>>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]
|
|
>>> pred = lambda *args: args == (1, 2, 3)
|
|
>>> list(rlocate(iterable, pred=pred, window_size=3))
|
|
[9, 5, 1]
|
|
|
|
Beware, this function won't return anything for infinite iterables.
|
|
If *iterable* is reversible, ``rlocate`` will reverse it and search from
|
|
the right. Otherwise, it will search from the left and return the results
|
|
in reverse order.
|
|
|
|
See :func:`locate` to for other example applications.
|
|
|
|
"""
|
|
if window_size is None:
|
|
try:
|
|
len_iter = len(iterable)
|
|
return (len_iter - i - 1 for i in locate(reversed(iterable), pred))
|
|
except TypeError:
|
|
pass
|
|
|
|
return reversed(list(locate(iterable, pred, window_size)))
|
|
|
|
|
|
def replace(iterable, pred, substitutes, count=None, window_size=1):
|
|
"""Yield the items from *iterable*, replacing the items for which *pred*
|
|
returns ``True`` with the items from the iterable *substitutes*.
|
|
|
|
>>> iterable = [1, 1, 0, 1, 1, 0, 1, 1]
|
|
>>> pred = lambda x: x == 0
|
|
>>> substitutes = (2, 3)
|
|
>>> list(replace(iterable, pred, substitutes))
|
|
[1, 1, 2, 3, 1, 1, 2, 3, 1, 1]
|
|
|
|
If *count* is given, the number of replacements will be limited:
|
|
|
|
>>> iterable = [1, 1, 0, 1, 1, 0, 1, 1, 0]
|
|
>>> pred = lambda x: x == 0
|
|
>>> substitutes = [None]
|
|
>>> list(replace(iterable, pred, substitutes, count=2))
|
|
[1, 1, None, 1, 1, None, 1, 1, 0]
|
|
|
|
Use *window_size* to control the number of items passed as arguments to
|
|
*pred*. This allows for locating and replacing subsequences.
|
|
|
|
>>> iterable = [0, 1, 2, 5, 0, 1, 2, 5]
|
|
>>> window_size = 3
|
|
>>> pred = lambda *args: args == (0, 1, 2) # 3 items passed to pred
|
|
>>> substitutes = [3, 4] # Splice in these items
|
|
>>> list(replace(iterable, pred, substitutes, window_size=window_size))
|
|
[3, 4, 5, 3, 4, 5]
|
|
|
|
"""
|
|
if window_size < 1:
|
|
raise ValueError('window_size must be at least 1')
|
|
|
|
# Save the substitutes iterable, since it's used more than once
|
|
substitutes = tuple(substitutes)
|
|
|
|
# Add padding such that the number of windows matches the length of the
|
|
# iterable
|
|
it = chain(iterable, [_marker] * (window_size - 1))
|
|
windows = windowed(it, window_size)
|
|
|
|
n = 0
|
|
for w in windows:
|
|
# If the current window matches our predicate (and we haven't hit
|
|
# our maximum number of replacements), splice in the substitutes
|
|
# and then consume the following windows that overlap with this one.
|
|
# For example, if the iterable is (0, 1, 2, 3, 4...)
|
|
# and the window size is 2, we have (0, 1), (1, 2), (2, 3)...
|
|
# If the predicate matches on (0, 1), we need to zap (0, 1) and (1, 2)
|
|
if pred(*w):
|
|
if (count is None) or (n < count):
|
|
n += 1
|
|
yield from substitutes
|
|
consume(windows, window_size - 1)
|
|
continue
|
|
|
|
# If there was no match (or we've reached the replacement limit),
|
|
# yield the first item from the window.
|
|
if w and (w[0] is not _marker):
|
|
yield w[0]
|
|
|
|
|
|
def partitions(iterable):
|
|
"""Yield all possible order-preserving partitions of *iterable*.
|
|
|
|
>>> iterable = 'abc'
|
|
>>> for part in partitions(iterable):
|
|
... print([''.join(p) for p in part])
|
|
['abc']
|
|
['a', 'bc']
|
|
['ab', 'c']
|
|
['a', 'b', 'c']
|
|
|
|
This is unrelated to :func:`partition`.
|
|
|
|
"""
|
|
sequence = list(iterable)
|
|
n = len(sequence)
|
|
for i in powerset(range(1, n)):
|
|
yield [sequence[i:j] for i, j in zip((0,) + i, i + (n,))]
|
|
|
|
|
|
def set_partitions(iterable, k=None):
|
|
"""
|
|
Yield the set partitions of *iterable* into *k* parts. Set partitions are
|
|
not order-preserving.
|
|
|
|
>>> iterable = 'abc'
|
|
>>> for part in set_partitions(iterable, 2):
|
|
... print([''.join(p) for p in part])
|
|
['a', 'bc']
|
|
['ab', 'c']
|
|
['b', 'ac']
|
|
|
|
|
|
If *k* is not given, every set partition is generated.
|
|
|
|
>>> iterable = 'abc'
|
|
>>> for part in set_partitions(iterable):
|
|
... print([''.join(p) for p in part])
|
|
['abc']
|
|
['a', 'bc']
|
|
['ab', 'c']
|
|
['b', 'ac']
|
|
['a', 'b', 'c']
|
|
|
|
"""
|
|
L = list(iterable)
|
|
n = len(L)
|
|
if k is not None:
|
|
if k < 1:
|
|
raise ValueError(
|
|
"Can't partition in a negative or zero number of groups"
|
|
)
|
|
elif k > n:
|
|
return
|
|
|
|
def set_partitions_helper(L, k):
|
|
n = len(L)
|
|
if k == 1:
|
|
yield [L]
|
|
elif n == k:
|
|
yield [[s] for s in L]
|
|
else:
|
|
e, *M = L
|
|
for p in set_partitions_helper(M, k - 1):
|
|
yield [[e], *p]
|
|
for p in set_partitions_helper(M, k):
|
|
for i in range(len(p)):
|
|
yield p[:i] + [[e] + p[i]] + p[i + 1 :]
|
|
|
|
if k is None:
|
|
for k in range(1, n + 1):
|
|
yield from set_partitions_helper(L, k)
|
|
else:
|
|
yield from set_partitions_helper(L, k)
|
|
|
|
|
|
class time_limited:
|
|
"""
|
|
Yield items from *iterable* until *limit_seconds* have passed.
|
|
If the time limit expires before all items have been yielded, the
|
|
``timed_out`` parameter will be set to ``True``.
|
|
|
|
>>> from time import sleep
|
|
>>> def generator():
|
|
... yield 1
|
|
... yield 2
|
|
... sleep(0.2)
|
|
... yield 3
|
|
>>> iterable = time_limited(0.1, generator())
|
|
>>> list(iterable)
|
|
[1, 2]
|
|
>>> iterable.timed_out
|
|
True
|
|
|
|
Note that the time is checked before each item is yielded, and iteration
|
|
stops if the time elapsed is greater than *limit_seconds*. If your time
|
|
limit is 1 second, but it takes 2 seconds to generate the first item from
|
|
the iterable, the function will run for 2 seconds and not yield anything.
|
|
|
|
"""
|
|
|
|
def __init__(self, limit_seconds, iterable):
|
|
if limit_seconds < 0:
|
|
raise ValueError('limit_seconds must be positive')
|
|
self.limit_seconds = limit_seconds
|
|
self._iterable = iter(iterable)
|
|
self._start_time = monotonic()
|
|
self.timed_out = False
|
|
|
|
def __iter__(self):
|
|
return self
|
|
|
|
def __next__(self):
|
|
item = next(self._iterable)
|
|
if monotonic() - self._start_time > self.limit_seconds:
|
|
self.timed_out = True
|
|
raise StopIteration
|
|
|
|
return item
|
|
|
|
|
|
def only(iterable, default=None, too_long=None):
|
|
"""If *iterable* has only one item, return it.
|
|
If it has zero items, return *default*.
|
|
If it has more than one item, raise the exception given by *too_long*,
|
|
which is ``ValueError`` by default.
|
|
|
|
>>> only([], default='missing')
|
|
'missing'
|
|
>>> only([1])
|
|
1
|
|
>>> only([1, 2]) # doctest: +IGNORE_EXCEPTION_DETAIL
|
|
Traceback (most recent call last):
|
|
...
|
|
ValueError: Expected exactly one item in iterable, but got 1, 2,
|
|
and perhaps more.'
|
|
>>> only([1, 2], too_long=TypeError) # doctest: +IGNORE_EXCEPTION_DETAIL
|
|
Traceback (most recent call last):
|
|
...
|
|
TypeError
|
|
|
|
Note that :func:`only` attempts to advance *iterable* twice to ensure there
|
|
is only one item. See :func:`spy` or :func:`peekable` to check
|
|
iterable contents less destructively.
|
|
"""
|
|
it = iter(iterable)
|
|
first_value = next(it, default)
|
|
|
|
try:
|
|
second_value = next(it)
|
|
except StopIteration:
|
|
pass
|
|
else:
|
|
msg = (
|
|
'Expected exactly one item in iterable, but got {!r}, {!r}, '
|
|
'and perhaps more.'.format(first_value, second_value)
|
|
)
|
|
raise too_long or ValueError(msg)
|
|
|
|
return first_value
|
|
|
|
|
|
class _IChunk:
|
|
def __init__(self, iterable, n):
|
|
self._it = islice(iterable, n)
|
|
self._cache = deque()
|
|
|
|
def fill_cache(self):
|
|
self._cache.extend(self._it)
|
|
|
|
def __iter__(self):
|
|
return self
|
|
|
|
def __next__(self):
|
|
try:
|
|
return next(self._it)
|
|
except StopIteration:
|
|
if self._cache:
|
|
return self._cache.popleft()
|
|
else:
|
|
raise
|
|
|
|
|
|
def ichunked(iterable, n):
|
|
"""Break *iterable* into sub-iterables with *n* elements each.
|
|
:func:`ichunked` is like :func:`chunked`, but it yields iterables
|
|
instead of lists.
|
|
|
|
If the sub-iterables are read in order, the elements of *iterable*
|
|
won't be stored in memory.
|
|
If they are read out of order, :func:`itertools.tee` is used to cache
|
|
elements as necessary.
|
|
|
|
>>> from itertools import count
|
|
>>> all_chunks = ichunked(count(), 4)
|
|
>>> c_1, c_2, c_3 = next(all_chunks), next(all_chunks), next(all_chunks)
|
|
>>> list(c_2) # c_1's elements have been cached; c_3's haven't been
|
|
[4, 5, 6, 7]
|
|
>>> list(c_1)
|
|
[0, 1, 2, 3]
|
|
>>> list(c_3)
|
|
[8, 9, 10, 11]
|
|
|
|
"""
|
|
source = peekable(iter(iterable))
|
|
ichunk_marker = object()
|
|
while True:
|
|
# Check to see whether we're at the end of the source iterable
|
|
item = source.peek(ichunk_marker)
|
|
if item is ichunk_marker:
|
|
return
|
|
|
|
chunk = _IChunk(source, n)
|
|
yield chunk
|
|
|
|
# Advance the source iterable and fill previous chunk's cache
|
|
chunk.fill_cache()
|
|
|
|
|
|
def iequals(*iterables):
|
|
"""Return ``True`` if all given *iterables* are equal to each other,
|
|
which means that they contain the same elements in the same order.
|
|
|
|
The function is useful for comparing iterables of different data types
|
|
or iterables that do not support equality checks.
|
|
|
|
>>> iequals("abc", ['a', 'b', 'c'], ('a', 'b', 'c'), iter("abc"))
|
|
True
|
|
|
|
>>> iequals("abc", "acb")
|
|
False
|
|
|
|
Not to be confused with :func:`all_equals`, which checks whether all
|
|
elements of iterable are equal to each other.
|
|
|
|
"""
|
|
return all(map(all_equal, zip_longest(*iterables, fillvalue=object())))
|
|
|
|
|
|
def distinct_combinations(iterable, r):
|
|
"""Yield the distinct combinations of *r* items taken from *iterable*.
|
|
|
|
>>> list(distinct_combinations([0, 0, 1], 2))
|
|
[(0, 0), (0, 1)]
|
|
|
|
Equivalent to ``set(combinations(iterable))``, except duplicates are not
|
|
generated and thrown away. For larger input sequences this is much more
|
|
efficient.
|
|
|
|
"""
|
|
if r < 0:
|
|
raise ValueError('r must be non-negative')
|
|
elif r == 0:
|
|
yield ()
|
|
return
|
|
pool = tuple(iterable)
|
|
generators = [unique_everseen(enumerate(pool), key=itemgetter(1))]
|
|
current_combo = [None] * r
|
|
level = 0
|
|
while generators:
|
|
try:
|
|
cur_idx, p = next(generators[-1])
|
|
except StopIteration:
|
|
generators.pop()
|
|
level -= 1
|
|
continue
|
|
current_combo[level] = p
|
|
if level + 1 == r:
|
|
yield tuple(current_combo)
|
|
else:
|
|
generators.append(
|
|
unique_everseen(
|
|
enumerate(pool[cur_idx + 1 :], cur_idx + 1),
|
|
key=itemgetter(1),
|
|
)
|
|
)
|
|
level += 1
|
|
|
|
|
|
def filter_except(validator, iterable, *exceptions):
|
|
"""Yield the items from *iterable* for which the *validator* function does
|
|
not raise one of the specified *exceptions*.
|
|
|
|
*validator* is called for each item in *iterable*.
|
|
It should be a function that accepts one argument and raises an exception
|
|
if that item is not valid.
|
|
|
|
>>> iterable = ['1', '2', 'three', '4', None]
|
|
>>> list(filter_except(int, iterable, ValueError, TypeError))
|
|
['1', '2', '4']
|
|
|
|
If an exception other than one given by *exceptions* is raised by
|
|
*validator*, it is raised like normal.
|
|
"""
|
|
for item in iterable:
|
|
try:
|
|
validator(item)
|
|
except exceptions:
|
|
pass
|
|
else:
|
|
yield item
|
|
|
|
|
|
def map_except(function, iterable, *exceptions):
|
|
"""Transform each item from *iterable* with *function* and yield the
|
|
result, unless *function* raises one of the specified *exceptions*.
|
|
|
|
*function* is called to transform each item in *iterable*.
|
|
It should accept one argument.
|
|
|
|
>>> iterable = ['1', '2', 'three', '4', None]
|
|
>>> list(map_except(int, iterable, ValueError, TypeError))
|
|
[1, 2, 4]
|
|
|
|
If an exception other than one given by *exceptions* is raised by
|
|
*function*, it is raised like normal.
|
|
"""
|
|
for item in iterable:
|
|
try:
|
|
yield function(item)
|
|
except exceptions:
|
|
pass
|
|
|
|
|
|
def map_if(iterable, pred, func, func_else=lambda x: x):
|
|
"""Evaluate each item from *iterable* using *pred*. If the result is
|
|
equivalent to ``True``, transform the item with *func* and yield it.
|
|
Otherwise, transform the item with *func_else* and yield it.
|
|
|
|
*pred*, *func*, and *func_else* should each be functions that accept
|
|
one argument. By default, *func_else* is the identity function.
|
|
|
|
>>> from math import sqrt
|
|
>>> iterable = list(range(-5, 5))
|
|
>>> iterable
|
|
[-5, -4, -3, -2, -1, 0, 1, 2, 3, 4]
|
|
>>> list(map_if(iterable, lambda x: x > 3, lambda x: 'toobig'))
|
|
[-5, -4, -3, -2, -1, 0, 1, 2, 3, 'toobig']
|
|
>>> list(map_if(iterable, lambda x: x >= 0,
|
|
... lambda x: f'{sqrt(x):.2f}', lambda x: None))
|
|
[None, None, None, None, None, '0.00', '1.00', '1.41', '1.73', '2.00']
|
|
"""
|
|
for item in iterable:
|
|
yield func(item) if pred(item) else func_else(item)
|
|
|
|
|
|
def _sample_unweighted(iterable, k):
|
|
# Implementation of "Algorithm L" from the 1994 paper by Kim-Hung Li:
|
|
# "Reservoir-Sampling Algorithms of Time Complexity O(n(1+log(N/n)))".
|
|
|
|
# Fill up the reservoir (collection of samples) with the first `k` samples
|
|
reservoir = take(k, iterable)
|
|
|
|
# Generate random number that's the largest in a sample of k U(0,1) numbers
|
|
# Largest order statistic: https://en.wikipedia.org/wiki/Order_statistic
|
|
W = exp(log(random()) / k)
|
|
|
|
# The number of elements to skip before changing the reservoir is a random
|
|
# number with a geometric distribution. Sample it using random() and logs.
|
|
next_index = k + floor(log(random()) / log(1 - W))
|
|
|
|
for index, element in enumerate(iterable, k):
|
|
|
|
if index == next_index:
|
|
reservoir[randrange(k)] = element
|
|
# The new W is the largest in a sample of k U(0, `old_W`) numbers
|
|
W *= exp(log(random()) / k)
|
|
next_index += floor(log(random()) / log(1 - W)) + 1
|
|
|
|
return reservoir
|
|
|
|
|
|
def _sample_weighted(iterable, k, weights):
|
|
# Implementation of "A-ExpJ" from the 2006 paper by Efraimidis et al. :
|
|
# "Weighted random sampling with a reservoir".
|
|
|
|
# Log-transform for numerical stability for weights that are small/large
|
|
weight_keys = (log(random()) / weight for weight in weights)
|
|
|
|
# Fill up the reservoir (collection of samples) with the first `k`
|
|
# weight-keys and elements, then heapify the list.
|
|
reservoir = take(k, zip(weight_keys, iterable))
|
|
heapify(reservoir)
|
|
|
|
# The number of jumps before changing the reservoir is a random variable
|
|
# with an exponential distribution. Sample it using random() and logs.
|
|
smallest_weight_key, _ = reservoir[0]
|
|
weights_to_skip = log(random()) / smallest_weight_key
|
|
|
|
for weight, element in zip(weights, iterable):
|
|
if weight >= weights_to_skip:
|
|
# The notation here is consistent with the paper, but we store
|
|
# the weight-keys in log-space for better numerical stability.
|
|
smallest_weight_key, _ = reservoir[0]
|
|
t_w = exp(weight * smallest_weight_key)
|
|
r_2 = uniform(t_w, 1) # generate U(t_w, 1)
|
|
weight_key = log(r_2) / weight
|
|
heapreplace(reservoir, (weight_key, element))
|
|
smallest_weight_key, _ = reservoir[0]
|
|
weights_to_skip = log(random()) / smallest_weight_key
|
|
else:
|
|
weights_to_skip -= weight
|
|
|
|
# Equivalent to [element for weight_key, element in sorted(reservoir)]
|
|
return [heappop(reservoir)[1] for _ in range(k)]
|
|
|
|
|
|
def sample(iterable, k, weights=None):
|
|
"""Return a *k*-length list of elements chosen (without replacement)
|
|
from the *iterable*. Like :func:`random.sample`, but works on iterables
|
|
of unknown length.
|
|
|
|
>>> iterable = range(100)
|
|
>>> sample(iterable, 5) # doctest: +SKIP
|
|
[81, 60, 96, 16, 4]
|
|
|
|
An iterable with *weights* may also be given:
|
|
|
|
>>> iterable = range(100)
|
|
>>> weights = (i * i + 1 for i in range(100))
|
|
>>> sampled = sample(iterable, 5, weights=weights) # doctest: +SKIP
|
|
[79, 67, 74, 66, 78]
|
|
|
|
The algorithm can also be used to generate weighted random permutations.
|
|
The relative weight of each item determines the probability that it
|
|
appears late in the permutation.
|
|
|
|
>>> data = "abcdefgh"
|
|
>>> weights = range(1, len(data) + 1)
|
|
>>> sample(data, k=len(data), weights=weights) # doctest: +SKIP
|
|
['c', 'a', 'b', 'e', 'g', 'd', 'h', 'f']
|
|
"""
|
|
if k == 0:
|
|
return []
|
|
|
|
iterable = iter(iterable)
|
|
if weights is None:
|
|
return _sample_unweighted(iterable, k)
|
|
else:
|
|
weights = iter(weights)
|
|
return _sample_weighted(iterable, k, weights)
|
|
|
|
|
|
def is_sorted(iterable, key=None, reverse=False, strict=False):
|
|
"""Returns ``True`` if the items of iterable are in sorted order, and
|
|
``False`` otherwise. *key* and *reverse* have the same meaning that they do
|
|
in the built-in :func:`sorted` function.
|
|
|
|
>>> is_sorted(['1', '2', '3', '4', '5'], key=int)
|
|
True
|
|
>>> is_sorted([5, 4, 3, 1, 2], reverse=True)
|
|
False
|
|
|
|
If *strict*, tests for strict sorting, that is, returns ``False`` if equal
|
|
elements are found:
|
|
|
|
>>> is_sorted([1, 2, 2])
|
|
True
|
|
>>> is_sorted([1, 2, 2], strict=True)
|
|
False
|
|
|
|
The function returns ``False`` after encountering the first out-of-order
|
|
item. If there are no out-of-order items, the iterable is exhausted.
|
|
"""
|
|
|
|
compare = (le if reverse else ge) if strict else (lt if reverse else gt)
|
|
it = iterable if key is None else map(key, iterable)
|
|
return not any(starmap(compare, pairwise(it)))
|
|
|
|
|
|
class AbortThread(BaseException):
|
|
pass
|
|
|
|
|
|
class callback_iter:
|
|
"""Convert a function that uses callbacks to an iterator.
|
|
|
|
Let *func* be a function that takes a `callback` keyword argument.
|
|
For example:
|
|
|
|
>>> def func(callback=None):
|
|
... for i, c in [(1, 'a'), (2, 'b'), (3, 'c')]:
|
|
... if callback:
|
|
... callback(i, c)
|
|
... return 4
|
|
|
|
|
|
Use ``with callback_iter(func)`` to get an iterator over the parameters
|
|
that are delivered to the callback.
|
|
|
|
>>> with callback_iter(func) as it:
|
|
... for args, kwargs in it:
|
|
... print(args)
|
|
(1, 'a')
|
|
(2, 'b')
|
|
(3, 'c')
|
|
|
|
The function will be called in a background thread. The ``done`` property
|
|
indicates whether it has completed execution.
|
|
|
|
>>> it.done
|
|
True
|
|
|
|
If it completes successfully, its return value will be available
|
|
in the ``result`` property.
|
|
|
|
>>> it.result
|
|
4
|
|
|
|
Notes:
|
|
|
|
* If the function uses some keyword argument besides ``callback``, supply
|
|
*callback_kwd*.
|
|
* If it finished executing, but raised an exception, accessing the
|
|
``result`` property will raise the same exception.
|
|
* If it hasn't finished executing, accessing the ``result``
|
|
property from within the ``with`` block will raise ``RuntimeError``.
|
|
* If it hasn't finished executing, accessing the ``result`` property from
|
|
outside the ``with`` block will raise a
|
|
``more_itertools.AbortThread`` exception.
|
|
* Provide *wait_seconds* to adjust how frequently the it is polled for
|
|
output.
|
|
|
|
"""
|
|
|
|
def __init__(self, func, callback_kwd='callback', wait_seconds=0.1):
|
|
self._func = func
|
|
self._callback_kwd = callback_kwd
|
|
self._aborted = False
|
|
self._future = None
|
|
self._wait_seconds = wait_seconds
|
|
# Lazily import concurrent.future
|
|
self._executor = __import__(
|
|
'concurrent.futures'
|
|
).futures.ThreadPoolExecutor(max_workers=1)
|
|
self._iterator = self._reader()
|
|
|
|
def __enter__(self):
|
|
return self
|
|
|
|
def __exit__(self, exc_type, exc_value, traceback):
|
|
self._aborted = True
|
|
self._executor.shutdown()
|
|
|
|
def __iter__(self):
|
|
return self
|
|
|
|
def __next__(self):
|
|
return next(self._iterator)
|
|
|
|
@property
|
|
def done(self):
|
|
if self._future is None:
|
|
return False
|
|
return self._future.done()
|
|
|
|
@property
|
|
def result(self):
|
|
if not self.done:
|
|
raise RuntimeError('Function has not yet completed')
|
|
|
|
return self._future.result()
|
|
|
|
def _reader(self):
|
|
q = Queue()
|
|
|
|
def callback(*args, **kwargs):
|
|
if self._aborted:
|
|
raise AbortThread('canceled by user')
|
|
|
|
q.put((args, kwargs))
|
|
|
|
self._future = self._executor.submit(
|
|
self._func, **{self._callback_kwd: callback}
|
|
)
|
|
|
|
while True:
|
|
try:
|
|
item = q.get(timeout=self._wait_seconds)
|
|
except Empty:
|
|
pass
|
|
else:
|
|
q.task_done()
|
|
yield item
|
|
|
|
if self._future.done():
|
|
break
|
|
|
|
remaining = []
|
|
while True:
|
|
try:
|
|
item = q.get_nowait()
|
|
except Empty:
|
|
break
|
|
else:
|
|
q.task_done()
|
|
remaining.append(item)
|
|
q.join()
|
|
yield from remaining
|
|
|
|
|
|
def windowed_complete(iterable, n):
|
|
"""
|
|
Yield ``(beginning, middle, end)`` tuples, where:
|
|
|
|
* Each ``middle`` has *n* items from *iterable*
|
|
* Each ``beginning`` has the items before the ones in ``middle``
|
|
* Each ``end`` has the items after the ones in ``middle``
|
|
|
|
>>> iterable = range(7)
|
|
>>> n = 3
|
|
>>> for beginning, middle, end in windowed_complete(iterable, n):
|
|
... print(beginning, middle, end)
|
|
() (0, 1, 2) (3, 4, 5, 6)
|
|
(0,) (1, 2, 3) (4, 5, 6)
|
|
(0, 1) (2, 3, 4) (5, 6)
|
|
(0, 1, 2) (3, 4, 5) (6,)
|
|
(0, 1, 2, 3) (4, 5, 6) ()
|
|
|
|
Note that *n* must be at least 0 and most equal to the length of
|
|
*iterable*.
|
|
|
|
This function will exhaust the iterable and may require significant
|
|
storage.
|
|
"""
|
|
if n < 0:
|
|
raise ValueError('n must be >= 0')
|
|
|
|
seq = tuple(iterable)
|
|
size = len(seq)
|
|
|
|
if n > size:
|
|
raise ValueError('n must be <= len(seq)')
|
|
|
|
for i in range(size - n + 1):
|
|
beginning = seq[:i]
|
|
middle = seq[i : i + n]
|
|
end = seq[i + n :]
|
|
yield beginning, middle, end
|
|
|
|
|
|
def all_unique(iterable, key=None):
|
|
"""
|
|
Returns ``True`` if all the elements of *iterable* are unique (no two
|
|
elements are equal).
|
|
|
|
>>> all_unique('ABCB')
|
|
False
|
|
|
|
If a *key* function is specified, it will be used to make comparisons.
|
|
|
|
>>> all_unique('ABCb')
|
|
True
|
|
>>> all_unique('ABCb', str.lower)
|
|
False
|
|
|
|
The function returns as soon as the first non-unique element is
|
|
encountered. Iterables with a mix of hashable and unhashable items can
|
|
be used, but the function will be slower for unhashable items.
|
|
"""
|
|
seenset = set()
|
|
seenset_add = seenset.add
|
|
seenlist = []
|
|
seenlist_add = seenlist.append
|
|
for element in map(key, iterable) if key else iterable:
|
|
try:
|
|
if element in seenset:
|
|
return False
|
|
seenset_add(element)
|
|
except TypeError:
|
|
if element in seenlist:
|
|
return False
|
|
seenlist_add(element)
|
|
return True
|
|
|
|
|
|
def nth_product(index, *args):
|
|
"""Equivalent to ``list(product(*args))[index]``.
|
|
|
|
The products of *args* can be ordered lexicographically.
|
|
:func:`nth_product` computes the product at sort position *index* without
|
|
computing the previous products.
|
|
|
|
>>> nth_product(8, range(2), range(2), range(2), range(2))
|
|
(1, 0, 0, 0)
|
|
|
|
``IndexError`` will be raised if the given *index* is invalid.
|
|
"""
|
|
pools = list(map(tuple, reversed(args)))
|
|
ns = list(map(len, pools))
|
|
|
|
c = reduce(mul, ns)
|
|
|
|
if index < 0:
|
|
index += c
|
|
|
|
if not 0 <= index < c:
|
|
raise IndexError
|
|
|
|
result = []
|
|
for pool, n in zip(pools, ns):
|
|
result.append(pool[index % n])
|
|
index //= n
|
|
|
|
return tuple(reversed(result))
|
|
|
|
|
|
def nth_permutation(iterable, r, index):
|
|
"""Equivalent to ``list(permutations(iterable, r))[index]```
|
|
|
|
The subsequences of *iterable* that are of length *r* where order is
|
|
important can be ordered lexicographically. :func:`nth_permutation`
|
|
computes the subsequence at sort position *index* directly, without
|
|
computing the previous subsequences.
|
|
|
|
>>> nth_permutation('ghijk', 2, 5)
|
|
('h', 'i')
|
|
|
|
``ValueError`` will be raised If *r* is negative or greater than the length
|
|
of *iterable*.
|
|
``IndexError`` will be raised if the given *index* is invalid.
|
|
"""
|
|
pool = list(iterable)
|
|
n = len(pool)
|
|
|
|
if r is None or r == n:
|
|
r, c = n, factorial(n)
|
|
elif not 0 <= r < n:
|
|
raise ValueError
|
|
else:
|
|
c = factorial(n) // factorial(n - r)
|
|
|
|
if index < 0:
|
|
index += c
|
|
|
|
if not 0 <= index < c:
|
|
raise IndexError
|
|
|
|
if c == 0:
|
|
return tuple()
|
|
|
|
result = [0] * r
|
|
q = index * factorial(n) // c if r < n else index
|
|
for d in range(1, n + 1):
|
|
q, i = divmod(q, d)
|
|
if 0 <= n - d < r:
|
|
result[n - d] = i
|
|
if q == 0:
|
|
break
|
|
|
|
return tuple(map(pool.pop, result))
|
|
|
|
|
|
def value_chain(*args):
|
|
"""Yield all arguments passed to the function in the same order in which
|
|
they were passed. If an argument itself is iterable then iterate over its
|
|
values.
|
|
|
|
>>> list(value_chain(1, 2, 3, [4, 5, 6]))
|
|
[1, 2, 3, 4, 5, 6]
|
|
|
|
Binary and text strings are not considered iterable and are emitted
|
|
as-is:
|
|
|
|
>>> list(value_chain('12', '34', ['56', '78']))
|
|
['12', '34', '56', '78']
|
|
|
|
|
|
Multiple levels of nesting are not flattened.
|
|
|
|
"""
|
|
for value in args:
|
|
if isinstance(value, (str, bytes)):
|
|
yield value
|
|
continue
|
|
try:
|
|
yield from value
|
|
except TypeError:
|
|
yield value
|
|
|
|
|
|
def product_index(element, *args):
|
|
"""Equivalent to ``list(product(*args)).index(element)``
|
|
|
|
The products of *args* can be ordered lexicographically.
|
|
:func:`product_index` computes the first index of *element* without
|
|
computing the previous products.
|
|
|
|
>>> product_index([8, 2], range(10), range(5))
|
|
42
|
|
|
|
``ValueError`` will be raised if the given *element* isn't in the product
|
|
of *args*.
|
|
"""
|
|
index = 0
|
|
|
|
for x, pool in zip_longest(element, args, fillvalue=_marker):
|
|
if x is _marker or pool is _marker:
|
|
raise ValueError('element is not a product of args')
|
|
|
|
pool = tuple(pool)
|
|
index = index * len(pool) + pool.index(x)
|
|
|
|
return index
|
|
|
|
|
|
def combination_index(element, iterable):
|
|
"""Equivalent to ``list(combinations(iterable, r)).index(element)``
|
|
|
|
The subsequences of *iterable* that are of length *r* can be ordered
|
|
lexicographically. :func:`combination_index` computes the index of the
|
|
first *element*, without computing the previous combinations.
|
|
|
|
>>> combination_index('adf', 'abcdefg')
|
|
10
|
|
|
|
``ValueError`` will be raised if the given *element* isn't one of the
|
|
combinations of *iterable*.
|
|
"""
|
|
element = enumerate(element)
|
|
k, y = next(element, (None, None))
|
|
if k is None:
|
|
return 0
|
|
|
|
indexes = []
|
|
pool = enumerate(iterable)
|
|
for n, x in pool:
|
|
if x == y:
|
|
indexes.append(n)
|
|
tmp, y = next(element, (None, None))
|
|
if tmp is None:
|
|
break
|
|
else:
|
|
k = tmp
|
|
else:
|
|
raise ValueError('element is not a combination of iterable')
|
|
|
|
n, _ = last(pool, default=(n, None))
|
|
|
|
# Python versions below 3.8 don't have math.comb
|
|
index = 1
|
|
for i, j in enumerate(reversed(indexes), start=1):
|
|
j = n - j
|
|
if i <= j:
|
|
index += factorial(j) // (factorial(i) * factorial(j - i))
|
|
|
|
return factorial(n + 1) // (factorial(k + 1) * factorial(n - k)) - index
|
|
|
|
|
|
def permutation_index(element, iterable):
|
|
"""Equivalent to ``list(permutations(iterable, r)).index(element)```
|
|
|
|
The subsequences of *iterable* that are of length *r* where order is
|
|
important can be ordered lexicographically. :func:`permutation_index`
|
|
computes the index of the first *element* directly, without computing
|
|
the previous permutations.
|
|
|
|
>>> permutation_index([1, 3, 2], range(5))
|
|
19
|
|
|
|
``ValueError`` will be raised if the given *element* isn't one of the
|
|
permutations of *iterable*.
|
|
"""
|
|
index = 0
|
|
pool = list(iterable)
|
|
for i, x in zip(range(len(pool), -1, -1), element):
|
|
r = pool.index(x)
|
|
index = index * i + r
|
|
del pool[r]
|
|
|
|
return index
|
|
|
|
|
|
class countable:
|
|
"""Wrap *iterable* and keep a count of how many items have been consumed.
|
|
|
|
The ``items_seen`` attribute starts at ``0`` and increments as the iterable
|
|
is consumed:
|
|
|
|
>>> iterable = map(str, range(10))
|
|
>>> it = countable(iterable)
|
|
>>> it.items_seen
|
|
0
|
|
>>> next(it), next(it)
|
|
('0', '1')
|
|
>>> list(it)
|
|
['2', '3', '4', '5', '6', '7', '8', '9']
|
|
>>> it.items_seen
|
|
10
|
|
"""
|
|
|
|
def __init__(self, iterable):
|
|
self._it = iter(iterable)
|
|
self.items_seen = 0
|
|
|
|
def __iter__(self):
|
|
return self
|
|
|
|
def __next__(self):
|
|
item = next(self._it)
|
|
self.items_seen += 1
|
|
|
|
return item
|
|
|
|
|
|
def chunked_even(iterable, n):
|
|
"""Break *iterable* into lists of approximately length *n*.
|
|
Items are distributed such the lengths of the lists differ by at most
|
|
1 item.
|
|
|
|
>>> iterable = [1, 2, 3, 4, 5, 6, 7]
|
|
>>> n = 3
|
|
>>> list(chunked_even(iterable, n)) # List lengths: 3, 2, 2
|
|
[[1, 2, 3], [4, 5], [6, 7]]
|
|
>>> list(chunked(iterable, n)) # List lengths: 3, 3, 1
|
|
[[1, 2, 3], [4, 5, 6], [7]]
|
|
|
|
"""
|
|
|
|
len_method = getattr(iterable, '__len__', None)
|
|
|
|
if len_method is None:
|
|
return _chunked_even_online(iterable, n)
|
|
else:
|
|
return _chunked_even_finite(iterable, len_method(), n)
|
|
|
|
|
|
def _chunked_even_online(iterable, n):
|
|
buffer = []
|
|
maxbuf = n + (n - 2) * (n - 1)
|
|
for x in iterable:
|
|
buffer.append(x)
|
|
if len(buffer) == maxbuf:
|
|
yield buffer[:n]
|
|
buffer = buffer[n:]
|
|
yield from _chunked_even_finite(buffer, len(buffer), n)
|
|
|
|
|
|
def _chunked_even_finite(iterable, N, n):
|
|
if N < 1:
|
|
return
|
|
|
|
# Lists are either size `full_size <= n` or `partial_size = full_size - 1`
|
|
q, r = divmod(N, n)
|
|
num_lists = q + (1 if r > 0 else 0)
|
|
q, r = divmod(N, num_lists)
|
|
full_size = q + (1 if r > 0 else 0)
|
|
partial_size = full_size - 1
|
|
num_full = N - partial_size * num_lists
|
|
num_partial = num_lists - num_full
|
|
|
|
buffer = []
|
|
iterator = iter(iterable)
|
|
|
|
# Yield num_full lists of full_size
|
|
for x in iterator:
|
|
buffer.append(x)
|
|
if len(buffer) == full_size:
|
|
yield buffer
|
|
buffer = []
|
|
num_full -= 1
|
|
if num_full <= 0:
|
|
break
|
|
|
|
# Yield num_partial lists of partial_size
|
|
for x in iterator:
|
|
buffer.append(x)
|
|
if len(buffer) == partial_size:
|
|
yield buffer
|
|
buffer = []
|
|
num_partial -= 1
|
|
|
|
|
|
def zip_broadcast(*objects, scalar_types=(str, bytes), strict=False):
|
|
"""A version of :func:`zip` that "broadcasts" any scalar
|
|
(i.e., non-iterable) items into output tuples.
|
|
|
|
>>> iterable_1 = [1, 2, 3]
|
|
>>> iterable_2 = ['a', 'b', 'c']
|
|
>>> scalar = '_'
|
|
>>> list(zip_broadcast(iterable_1, iterable_2, scalar))
|
|
[(1, 'a', '_'), (2, 'b', '_'), (3, 'c', '_')]
|
|
|
|
The *scalar_types* keyword argument determines what types are considered
|
|
scalar. It is set to ``(str, bytes)`` by default. Set it to ``None`` to
|
|
treat strings and byte strings as iterable:
|
|
|
|
>>> list(zip_broadcast('abc', 0, 'xyz', scalar_types=None))
|
|
[('a', 0, 'x'), ('b', 0, 'y'), ('c', 0, 'z')]
|
|
|
|
If the *strict* keyword argument is ``True``, then
|
|
``UnequalIterablesError`` will be raised if any of the iterables have
|
|
different lengths.
|
|
"""
|
|
|
|
def is_scalar(obj):
|
|
if scalar_types and isinstance(obj, scalar_types):
|
|
return True
|
|
try:
|
|
iter(obj)
|
|
except TypeError:
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
size = len(objects)
|
|
if not size:
|
|
return
|
|
|
|
iterables, iterable_positions = [], []
|
|
scalars, scalar_positions = [], []
|
|
for i, obj in enumerate(objects):
|
|
if is_scalar(obj):
|
|
scalars.append(obj)
|
|
scalar_positions.append(i)
|
|
else:
|
|
iterables.append(iter(obj))
|
|
iterable_positions.append(i)
|
|
|
|
if len(scalars) == size:
|
|
yield tuple(objects)
|
|
return
|
|
|
|
zipper = _zip_equal if strict else zip
|
|
for item in zipper(*iterables):
|
|
new_item = [None] * size
|
|
|
|
for i, elem in zip(iterable_positions, item):
|
|
new_item[i] = elem
|
|
|
|
for i, elem in zip(scalar_positions, scalars):
|
|
new_item[i] = elem
|
|
|
|
yield tuple(new_item)
|
|
|
|
|
|
def unique_in_window(iterable, n, key=None):
|
|
"""Yield the items from *iterable* that haven't been seen recently.
|
|
*n* is the size of the lookback window.
|
|
|
|
>>> iterable = [0, 1, 0, 2, 3, 0]
|
|
>>> n = 3
|
|
>>> list(unique_in_window(iterable, n))
|
|
[0, 1, 2, 3, 0]
|
|
|
|
The *key* function, if provided, will be used to determine uniqueness:
|
|
|
|
>>> list(unique_in_window('abAcda', 3, key=lambda x: x.lower()))
|
|
['a', 'b', 'c', 'd', 'a']
|
|
|
|
The items in *iterable* must be hashable.
|
|
|
|
"""
|
|
if n <= 0:
|
|
raise ValueError('n must be greater than 0')
|
|
|
|
window = deque(maxlen=n)
|
|
uniques = set()
|
|
use_key = key is not None
|
|
|
|
for item in iterable:
|
|
k = key(item) if use_key else item
|
|
if k in uniques:
|
|
continue
|
|
|
|
if len(uniques) == n:
|
|
uniques.discard(window[0])
|
|
|
|
uniques.add(k)
|
|
window.append(k)
|
|
|
|
yield item
|
|
|
|
|
|
def duplicates_everseen(iterable, key=None):
|
|
"""Yield duplicate elements after their first appearance.
|
|
|
|
>>> list(duplicates_everseen('mississippi'))
|
|
['s', 'i', 's', 's', 'i', 'p', 'i']
|
|
>>> list(duplicates_everseen('AaaBbbCccAaa', str.lower))
|
|
['a', 'a', 'b', 'b', 'c', 'c', 'A', 'a', 'a']
|
|
|
|
This function is analagous to :func:`unique_everseen` and is subject to
|
|
the same performance considerations.
|
|
|
|
"""
|
|
seen_set = set()
|
|
seen_list = []
|
|
use_key = key is not None
|
|
|
|
for element in iterable:
|
|
k = key(element) if use_key else element
|
|
try:
|
|
if k not in seen_set:
|
|
seen_set.add(k)
|
|
else:
|
|
yield element
|
|
except TypeError:
|
|
if k not in seen_list:
|
|
seen_list.append(k)
|
|
else:
|
|
yield element
|
|
|
|
|
|
def duplicates_justseen(iterable, key=None):
|
|
"""Yields serially-duplicate elements after their first appearance.
|
|
|
|
>>> list(duplicates_justseen('mississippi'))
|
|
['s', 's', 'p']
|
|
>>> list(duplicates_justseen('AaaBbbCccAaa', str.lower))
|
|
['a', 'a', 'b', 'b', 'c', 'c', 'a', 'a']
|
|
|
|
This function is analagous to :func:`unique_justseen`.
|
|
|
|
"""
|
|
return flatten(
|
|
map(
|
|
lambda group_tuple: islice_extended(group_tuple[1])[1:],
|
|
groupby(iterable, key),
|
|
)
|
|
)
|
|
|
|
|
|
def minmax(iterable_or_value, *others, key=None, default=_marker):
|
|
"""Returns both the smallest and largest items in an iterable
|
|
or the largest of two or more arguments.
|
|
|
|
>>> minmax([3, 1, 5])
|
|
(1, 5)
|
|
|
|
>>> minmax(4, 2, 6)
|
|
(2, 6)
|
|
|
|
If a *key* function is provided, it will be used to transform the input
|
|
items for comparison.
|
|
|
|
>>> minmax([5, 30], key=str) # '30' sorts before '5'
|
|
(30, 5)
|
|
|
|
If a *default* value is provided, it will be returned if there are no
|
|
input items.
|
|
|
|
>>> minmax([], default=(0, 0))
|
|
(0, 0)
|
|
|
|
Otherwise ``ValueError`` is raised.
|
|
|
|
This function is based on the
|
|
`recipe <http://code.activestate.com/recipes/577916/>`__ by
|
|
Raymond Hettinger and takes care to minimize the number of comparisons
|
|
performed.
|
|
"""
|
|
iterable = (iterable_or_value, *others) if others else iterable_or_value
|
|
|
|
it = iter(iterable)
|
|
|
|
try:
|
|
lo = hi = next(it)
|
|
except StopIteration as e:
|
|
if default is _marker:
|
|
raise ValueError(
|
|
'`minmax()` argument is an empty iterable. '
|
|
'Provide a `default` value to suppress this error.'
|
|
) from e
|
|
return default
|
|
|
|
# Different branches depending on the presence of key. This saves a lot
|
|
# of unimportant copies which would slow the "key=None" branch
|
|
# significantly down.
|
|
if key is None:
|
|
for x, y in zip_longest(it, it, fillvalue=lo):
|
|
if y < x:
|
|
x, y = y, x
|
|
if x < lo:
|
|
lo = x
|
|
if hi < y:
|
|
hi = y
|
|
|
|
else:
|
|
lo_key = hi_key = key(lo)
|
|
|
|
for x, y in zip_longest(it, it, fillvalue=lo):
|
|
|
|
x_key, y_key = key(x), key(y)
|
|
|
|
if y_key < x_key:
|
|
x, y, x_key, y_key = y, x, y_key, x_key
|
|
if x_key < lo_key:
|
|
lo, lo_key = x, x_key
|
|
if hi_key < y_key:
|
|
hi, hi_key = y, y_key
|
|
|
|
return lo, hi
|
|
|
|
|
|
def constrained_batches(
|
|
iterable, max_size, max_count=None, get_len=len, strict=True
|
|
):
|
|
"""Yield batches of items from *iterable* with a combined size limited by
|
|
*max_size*.
|
|
|
|
>>> iterable = [b'12345', b'123', b'12345678', b'1', b'1', b'12', b'1']
|
|
>>> list(constrained_batches(iterable, 10))
|
|
[(b'12345', b'123'), (b'12345678', b'1', b'1'), (b'12', b'1')]
|
|
|
|
If a *max_count* is supplied, the number of items per batch is also
|
|
limited:
|
|
|
|
>>> iterable = [b'12345', b'123', b'12345678', b'1', b'1', b'12', b'1']
|
|
>>> list(constrained_batches(iterable, 10, max_count = 2))
|
|
[(b'12345', b'123'), (b'12345678', b'1'), (b'1', b'12'), (b'1',)]
|
|
|
|
If a *get_len* function is supplied, use that instead of :func:`len` to
|
|
determine item size.
|
|
|
|
If *strict* is ``True``, raise ``ValueError`` if any single item is bigger
|
|
than *max_size*. Otherwise, allow single items to exceed *max_size*.
|
|
"""
|
|
if max_size <= 0:
|
|
raise ValueError('maximum size must be greater than zero')
|
|
|
|
batch = []
|
|
batch_size = 0
|
|
batch_count = 0
|
|
for item in iterable:
|
|
item_len = get_len(item)
|
|
if strict and item_len > max_size:
|
|
raise ValueError('item size exceeds maximum size')
|
|
|
|
reached_count = batch_count == max_count
|
|
reached_size = item_len + batch_size > max_size
|
|
if batch_count and (reached_size or reached_count):
|
|
yield tuple(batch)
|
|
batch.clear()
|
|
batch_size = 0
|
|
batch_count = 0
|
|
|
|
batch.append(item)
|
|
batch_size += item_len
|
|
batch_count += 1
|
|
|
|
if batch:
|
|
yield tuple(batch)
|