1
0
mirror of https://github.com/Tautulli/Tautulli.git synced 2025-03-12 04:35:40 -07:00

Bump bleach from 5.0.1 to 6.0.0 ()

* Bump bleach from 5.0.1 to 6.0.0

Bumps [bleach](https://github.com/mozilla/bleach) from 5.0.1 to 6.0.0.
- [Release notes](https://github.com/mozilla/bleach/releases)
- [Changelog](https://github.com/mozilla/bleach/blob/main/CHANGES)
- [Commits](https://github.com/mozilla/bleach/compare/v5.0.1...v6.0.0)

---
updated-dependencies:
- dependency-name: bleach
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>

* Update bleach==6.0.0

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com>

[skip ci]
This commit is contained in:
dependabot[bot] 2023-03-02 20:55:01 -08:00 committed by GitHub
parent 6b1b6d0f32
commit 1466a391d1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 291 additions and 198 deletions

@ -11,9 +11,9 @@ from bleach.sanitizer import (
# yyyymmdd
__releasedate__ = "20220627"
__releasedate__ = "20230123"
# x.y.z or x.y.z.dev0 -- semver
__version__ = "5.0.1"
__version__ = "6.0.0"
__all__ = ["clean", "linkify"]
@ -52,7 +52,7 @@ def clean(
:arg str text: the text to clean
:arg list tags: allowed list of tags; defaults to
:arg set tags: set of allowed tags; defaults to
``bleach.sanitizer.ALLOWED_TAGS``
:arg dict attributes: allowed attributes; can be a callable, list or dict;

@ -38,6 +38,9 @@ from bleach._vendor.html5lib.filters.sanitizer import (
allowed_protocols,
allowed_css_properties,
allowed_svg_properties,
attr_val_is_uri,
svg_attr_val_allows_ref,
svg_allow_local_href,
) # noqa: E402 module level import not at top of file
from bleach._vendor.html5lib.filters.sanitizer import (
Filter as SanitizerFilter,
@ -78,127 +81,129 @@ TAG_TOKEN_TYPE_PARSEERROR = constants.tokenTypes["ParseError"]
#: List of valid HTML tags, from WHATWG HTML Living Standard as of 2018-10-17
#: https://html.spec.whatwg.org/multipage/indices.html#elements-3
HTML_TAGS = [
"a",
"abbr",
"address",
"area",
"article",
"aside",
"audio",
"b",
"base",
"bdi",
"bdo",
"blockquote",
"body",
"br",
"button",
"canvas",
"caption",
"cite",
"code",
"col",
"colgroup",
"data",
"datalist",
"dd",
"del",
"details",
"dfn",
"dialog",
"div",
"dl",
"dt",
"em",
"embed",
"fieldset",
"figcaption",
"figure",
"footer",
"form",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"head",
"header",
"hgroup",
"hr",
"html",
"i",
"iframe",
"img",
"input",
"ins",
"kbd",
"keygen",
"label",
"legend",
"li",
"link",
"map",
"mark",
"menu",
"meta",
"meter",
"nav",
"noscript",
"object",
"ol",
"optgroup",
"option",
"output",
"p",
"param",
"picture",
"pre",
"progress",
"q",
"rp",
"rt",
"ruby",
"s",
"samp",
"script",
"section",
"select",
"slot",
"small",
"source",
"span",
"strong",
"style",
"sub",
"summary",
"sup",
"table",
"tbody",
"td",
"template",
"textarea",
"tfoot",
"th",
"thead",
"time",
"title",
"tr",
"track",
"u",
"ul",
"var",
"video",
"wbr",
]
HTML_TAGS = frozenset(
(
"a",
"abbr",
"address",
"area",
"article",
"aside",
"audio",
"b",
"base",
"bdi",
"bdo",
"blockquote",
"body",
"br",
"button",
"canvas",
"caption",
"cite",
"code",
"col",
"colgroup",
"data",
"datalist",
"dd",
"del",
"details",
"dfn",
"dialog",
"div",
"dl",
"dt",
"em",
"embed",
"fieldset",
"figcaption",
"figure",
"footer",
"form",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"head",
"header",
"hgroup",
"hr",
"html",
"i",
"iframe",
"img",
"input",
"ins",
"kbd",
"keygen",
"label",
"legend",
"li",
"link",
"map",
"mark",
"menu",
"meta",
"meter",
"nav",
"noscript",
"object",
"ol",
"optgroup",
"option",
"output",
"p",
"param",
"picture",
"pre",
"progress",
"q",
"rp",
"rt",
"ruby",
"s",
"samp",
"script",
"section",
"select",
"slot",
"small",
"source",
"span",
"strong",
"style",
"sub",
"summary",
"sup",
"table",
"tbody",
"td",
"template",
"textarea",
"tfoot",
"th",
"thead",
"time",
"title",
"tr",
"track",
"u",
"ul",
"var",
"video",
"wbr",
)
)
#: List of block level HTML tags, as per https://github.com/mozilla/bleach/issues/369
#: from mozilla on 2019.07.11
#: https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements#Elements
HTML_TAGS_BLOCK_LEVEL = frozenset(
[
(
"address",
"article",
"aside",
@ -232,7 +237,7 @@ HTML_TAGS_BLOCK_LEVEL = frozenset(
"section",
"table",
"ul",
]
)
)
@ -473,7 +478,7 @@ class BleachHTMLParser(HTMLParser):
def __init__(self, tags, strip, consume_entities, **kwargs):
"""
:arg tags: list of allowed tags--everything else is either stripped or
:arg tags: set of allowed tags--everything else is either stripped or
escaped; if None, then this doesn't look at tags at all
:arg strip: whether to strip disallowed tags (True) or escape them (False);
if tags=None, then this doesn't have any effect
@ -481,7 +486,9 @@ class BleachHTMLParser(HTMLParser):
leave them as is when tokenizing (BleachHTMLTokenizer-added behavior)
"""
self.tags = [tag.lower() for tag in tags] if tags is not None else None
self.tags = (
frozenset((tag.lower() for tag in tags)) if tags is not None else None
)
self.strip = strip
self.consume_entities = consume_entities
super().__init__(**kwargs)
@ -691,7 +698,7 @@ class BleachHTMLSerializer(HTMLSerializer):
# Only leave entities in that are not ambiguous. If they're
# ambiguous, then we escape the ampersand.
if entity is not None and convert_entity(entity) is not None:
yield "&" + entity + ";"
yield f"&{entity};"
# Length of the entity plus 2--one for & at the beginning
# and one for ; at the end

@ -120,9 +120,10 @@ class Linker:
:arg list callbacks: list of callbacks to run when adjusting tag attributes;
defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``
:arg list skip_tags: list of tags that you don't want to linkify the
contents of; for example, you could set this to ``['pre']`` to skip
linkifying contents of ``pre`` tags
:arg set skip_tags: set of tags that you don't want to linkify the
contents of; for example, you could set this to ``{'pre'}`` to skip
linkifying contents of ``pre`` tags; ``None`` means you don't
want linkify to skip any tags
:arg bool parse_email: whether or not to linkify email addresses
@ -130,7 +131,7 @@ class Linker:
:arg email_re: email matching regex
:arg list recognized_tags: the list of tags that linkify knows about;
:arg set recognized_tags: the set of tags that linkify knows about;
everything else gets escaped
:returns: linkified text as unicode
@ -145,15 +146,18 @@ class Linker:
# Create a parser/tokenizer that allows all HTML tags and escapes
# anything not in that list.
self.parser = html5lib_shim.BleachHTMLParser(
tags=recognized_tags,
tags=frozenset(recognized_tags),
strip=False,
consume_entities=True,
consume_entities=False,
namespaceHTMLElements=False,
)
self.walker = html5lib_shim.getTreeWalker("etree")
self.serializer = html5lib_shim.BleachHTMLSerializer(
quote_attr_values="always",
omit_optional_tags=False,
# We want to leave entities as they are without escaping or
# resolving or expanding
resolve_entities=False,
# linkify does not sanitize
sanitize=False,
# linkify preserves attr order
@ -218,8 +222,8 @@ class LinkifyFilter(html5lib_shim.Filter):
:arg list callbacks: list of callbacks to run when adjusting tag attributes;
defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``
:arg list skip_tags: list of tags that you don't want to linkify the
contents of; for example, you could set this to ``['pre']`` to skip
:arg set skip_tags: set of tags that you don't want to linkify the
contents of; for example, you could set this to ``{'pre'}`` to skip
linkifying contents of ``pre`` tags
:arg bool parse_email: whether or not to linkify email addresses
@ -232,7 +236,7 @@ class LinkifyFilter(html5lib_shim.Filter):
super().__init__(source)
self.callbacks = callbacks or []
self.skip_tags = skip_tags or []
self.skip_tags = skip_tags or {}
self.parse_email = parse_email
self.url_re = url_re
@ -510,6 +514,62 @@ class LinkifyFilter(html5lib_shim.Filter):
yield {"type": "Characters", "data": str(new_text)}
yield token_buffer[-1]
def extract_entities(self, token):
"""Handles Characters tokens with entities
Our overridden tokenizer doesn't do anything with entities. However,
that means that the serializer will convert all ``&`` in Characters
tokens to ``&amp;``.
Since we don't want that, we extract entities here and convert them to
Entity tokens so the serializer will let them be.
:arg token: the Characters token to work on
:returns: generator of tokens
"""
data = token.get("data", "")
# If there isn't a & in the data, we can return now
if "&" not in data:
yield token
return
new_tokens = []
# For each possible entity that starts with a "&", we try to extract an
# actual entity and re-tokenize accordingly
for part in html5lib_shim.next_possible_entity(data):
if not part:
continue
if part.startswith("&"):
entity = html5lib_shim.match_entity(part)
if entity is not None:
if entity == "amp":
# LinkifyFilter can't match urls across token boundaries
# which is problematic with &amp; since that shows up in
# querystrings all the time. This special-cases &amp;
# and converts it to a & and sticks it in as a
# Characters token. It'll get merged with surrounding
# tokens in the BleachSanitizerfilter.__iter__ and
# escaped in the serializer.
new_tokens.append({"type": "Characters", "data": "&"})
else:
new_tokens.append({"type": "Entity", "name": entity})
# Length of the entity plus 2--one for & at the beginning
# and one for ; at the end
remainder = part[len(entity) + 2 :]
if remainder:
new_tokens.append({"type": "Characters", "data": remainder})
continue
new_tokens.append({"type": "Characters", "data": part})
yield from new_tokens
def __iter__(self):
in_a = False
in_skip_tag = None
@ -564,8 +624,8 @@ class LinkifyFilter(html5lib_shim.Filter):
new_stream = self.handle_links(new_stream)
for token in new_stream:
yield token
for new_token in new_stream:
yield from self.extract_entities(new_token)
# We've already yielded this token, so continue
continue

@ -8,21 +8,23 @@ from bleach import html5lib_shim
from bleach import parse_shim
#: List of allowed tags
ALLOWED_TAGS = [
"a",
"abbr",
"acronym",
"b",
"blockquote",
"code",
"em",
"i",
"li",
"ol",
"strong",
"ul",
]
#: Set of allowed tags
ALLOWED_TAGS = frozenset(
(
"a",
"abbr",
"acronym",
"b",
"blockquote",
"code",
"em",
"i",
"li",
"ol",
"strong",
"ul",
)
)
#: Map of allowed attributes by tag
@ -33,7 +35,7 @@ ALLOWED_ATTRIBUTES = {
}
#: List of allowed protocols
ALLOWED_PROTOCOLS = ["http", "https", "mailto"]
ALLOWED_PROTOCOLS = frozenset(("http", "https", "mailto"))
#: Invisible characters--0 to and including 31 except 9 (tab), 10 (lf), and 13 (cr)
INVISIBLE_CHARACTERS = "".join(
@ -48,6 +50,10 @@ INVISIBLE_CHARACTERS_RE = re.compile("[" + INVISIBLE_CHARACTERS + "]", re.UNICOD
INVISIBLE_REPLACEMENT_CHAR = "?"
class NoCssSanitizerWarning(UserWarning):
pass
class Cleaner:
"""Cleaner for cleaning HTML fragments of malicious content
@ -89,7 +95,7 @@ class Cleaner:
):
"""Initializes a Cleaner
:arg list tags: allowed list of tags; defaults to
:arg set tags: set of allowed tags; defaults to
``bleach.sanitizer.ALLOWED_TAGS``
:arg dict attributes: allowed attributes; can be a callable, list or dict;
@ -143,6 +149,25 @@ class Cleaner:
alphabetical_attributes=False,
)
if css_sanitizer is None:
# FIXME(willkg): this doesn't handle when attributes or an
# attributes value is a callable
attributes_values = []
if isinstance(attributes, list):
attributes_values = attributes
elif isinstance(attributes, dict):
attributes_values = []
for values in attributes.values():
if isinstance(values, (list, tuple)):
attributes_values.extend(values)
if "style" in attributes_values:
warnings.warn(
"'style' attribute specified, but css_sanitizer not set.",
category=NoCssSanitizerWarning,
)
def clean(self, text):
"""Cleans text and returns sanitized result as unicode
@ -155,9 +180,8 @@ class Cleaner:
"""
if not isinstance(text, str):
message = (
"argument cannot be of '{name}' type, must be of text type".format(
name=text.__class__.__name__
)
f"argument cannot be of {text.__class__.__name__!r} type, "
+ "must be of text type"
)
raise TypeError(message)
@ -167,13 +191,11 @@ class Cleaner:
dom = self.parser.parseFragment(text)
filtered = BleachSanitizerFilter(
source=self.walker(dom),
# Bleach-sanitizer-specific things
allowed_tags=self.tags,
attributes=self.attributes,
strip_disallowed_elements=self.strip,
strip_disallowed_tags=self.strip,
strip_html_comments=self.strip_comments,
css_sanitizer=self.css_sanitizer,
# html5lib-sanitizer things
allowed_elements=self.tags,
allowed_protocols=self.protocols,
)
@ -237,19 +259,21 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
def __init__(
self,
source,
allowed_elements=ALLOWED_TAGS,
allowed_tags=ALLOWED_TAGS,
attributes=ALLOWED_ATTRIBUTES,
allowed_protocols=ALLOWED_PROTOCOLS,
strip_disallowed_elements=False,
attr_val_is_uri=html5lib_shim.attr_val_is_uri,
svg_attr_val_allows_ref=html5lib_shim.svg_attr_val_allows_ref,
svg_allow_local_href=html5lib_shim.svg_allow_local_href,
strip_disallowed_tags=False,
strip_html_comments=True,
css_sanitizer=None,
**kwargs,
):
"""Creates a BleachSanitizerFilter instance
:arg source: html5lib TreeWalker stream as an html5lib TreeWalker
:arg list allowed_elements: allowed list of tags; defaults to
:arg set allowed_tags: set of allowed tags; defaults to
``bleach.sanitizer.ALLOWED_TAGS``
:arg dict attributes: allowed attributes; can be a callable, list or dict;
@ -258,8 +282,16 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
:arg list allowed_protocols: allowed list of protocols for links; defaults
to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
:arg bool strip_disallowed_elements: whether or not to strip disallowed
elements
:arg attr_val_is_uri: set of attributes that have URI values
:arg svg_attr_val_allows_ref: set of SVG attributes that can have
references
:arg svg_allow_local_href: set of SVG elements that can have local
hrefs
:arg bool strip_disallowed_tags: whether or not to strip disallowed
tags
:arg bool strip_html_comments: whether or not to strip HTML comments
@ -267,24 +299,24 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
sanitizing style attribute values and style text; defaults to None
"""
self.attr_filter = attribute_filter_factory(attributes)
self.strip_disallowed_elements = strip_disallowed_elements
self.strip_html_comments = strip_html_comments
self.css_sanitizer = css_sanitizer
# NOTE(willkg): This is the superclass of
# html5lib.filters.sanitizer.Filter. We call this directly skipping the
# __init__ for html5lib.filters.sanitizer.Filter because that does
# things we don't need to do and kicks up the deprecation warning for
# using Sanitizer.
html5lib_shim.Filter.__init__(self, source)
# filter out html5lib deprecation warnings to use bleach from BleachSanitizerFilter init
warnings.filterwarnings(
"ignore",
message="html5lib's sanitizer is deprecated",
category=DeprecationWarning,
module="bleach._vendor.html5lib",
)
return super().__init__(
source,
allowed_elements=allowed_elements,
allowed_protocols=allowed_protocols,
**kwargs,
)
self.allowed_tags = frozenset(allowed_tags)
self.allowed_protocols = frozenset(allowed_protocols)
self.attr_filter = attribute_filter_factory(attributes)
self.strip_disallowed_tags = strip_disallowed_tags
self.strip_html_comments = strip_html_comments
self.attr_val_is_uri = attr_val_is_uri
self.svg_attr_val_allows_ref = svg_attr_val_allows_ref
self.css_sanitizer = css_sanitizer
self.svg_allow_local_href = svg_allow_local_href
def sanitize_stream(self, token_iterator):
for token in token_iterator:
@ -354,10 +386,10 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
"""
token_type = token["type"]
if token_type in ["StartTag", "EndTag", "EmptyTag"]:
if token["name"] in self.allowed_elements:
if token["name"] in self.allowed_tags:
return self.allow_token(token)
elif self.strip_disallowed_elements:
elif self.strip_disallowed_tags:
return None
else:
@ -570,7 +602,7 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
def disallowed_token(self, token):
token_type = token["type"]
if token_type == "EndTag":
token["data"] = "</%s>" % token["name"]
token["data"] = f"</{token['name']}>"
elif token["data"]:
assert token_type in ("StartTag", "EmptyTag")
@ -586,25 +618,19 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
if ns is None or ns not in html5lib_shim.prefixes:
namespaced_name = name
else:
namespaced_name = "{}:{}".format(html5lib_shim.prefixes[ns], name)
namespaced_name = f"{html5lib_shim.prefixes[ns]}:{name}"
attrs.append(
' %s="%s"'
% (
namespaced_name,
# NOTE(willkg): HTMLSerializer escapes attribute values
# already, so if we do it here (like HTMLSerializer does),
# then we end up double-escaping.
v,
)
)
token["data"] = "<{}{}>".format(token["name"], "".join(attrs))
# NOTE(willkg): HTMLSerializer escapes attribute values
# already, so if we do it here (like HTMLSerializer does),
# then we end up double-escaping.
attrs.append(f' {namespaced_name}="{v}"')
token["data"] = f"<{token['name']}{''.join(attrs)}>"
else:
token["data"] = "<%s>" % token["name"]
token["data"] = f"<{token['name']}>"
if token.get("selfClosing"):
token["data"] = token["data"][:-1] + "/>"
token["data"] = f"{token['data'][:-1]}/>"
token["type"] = "Characters"

@ -5,7 +5,7 @@ backports.csv==1.0.7
backports.functools-lru-cache==1.6.4
backports.zoneinfo==0.2.1;python_version<"3.9"
beautifulsoup4==4.11.1
bleach==5.0.1
bleach==6.0.0
certifi==2022.12.7
cheroot==9.0.0
cherrypy==18.8.0