mirror of
https://github.com/Tautulli/Tautulli.git
synced 2025-03-12 04:35:40 -07:00
Bump bleach from 5.0.1 to 6.0.0 (#1979)
* Bump bleach from 5.0.1 to 6.0.0 Bumps [bleach](https://github.com/mozilla/bleach) from 5.0.1 to 6.0.0. - [Release notes](https://github.com/mozilla/bleach/releases) - [Changelog](https://github.com/mozilla/bleach/blob/main/CHANGES) - [Commits](https://github.com/mozilla/bleach/compare/v5.0.1...v6.0.0) --- updated-dependencies: - dependency-name: bleach dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] <support@github.com> * Update bleach==6.0.0 --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: JonnyWong16 <9099342+JonnyWong16@users.noreply.github.com> [skip ci]
This commit is contained in:
parent
6b1b6d0f32
commit
1466a391d1
@ -11,9 +11,9 @@ from bleach.sanitizer import (
|
||||
|
||||
|
||||
# yyyymmdd
|
||||
__releasedate__ = "20220627"
|
||||
__releasedate__ = "20230123"
|
||||
# x.y.z or x.y.z.dev0 -- semver
|
||||
__version__ = "5.0.1"
|
||||
__version__ = "6.0.0"
|
||||
|
||||
|
||||
__all__ = ["clean", "linkify"]
|
||||
@ -52,7 +52,7 @@ def clean(
|
||||
|
||||
:arg str text: the text to clean
|
||||
|
||||
:arg list tags: allowed list of tags; defaults to
|
||||
:arg set tags: set of allowed tags; defaults to
|
||||
``bleach.sanitizer.ALLOWED_TAGS``
|
||||
|
||||
:arg dict attributes: allowed attributes; can be a callable, list or dict;
|
||||
|
@ -38,6 +38,9 @@ from bleach._vendor.html5lib.filters.sanitizer import (
|
||||
allowed_protocols,
|
||||
allowed_css_properties,
|
||||
allowed_svg_properties,
|
||||
attr_val_is_uri,
|
||||
svg_attr_val_allows_ref,
|
||||
svg_allow_local_href,
|
||||
) # noqa: E402 module level import not at top of file
|
||||
from bleach._vendor.html5lib.filters.sanitizer import (
|
||||
Filter as SanitizerFilter,
|
||||
@ -78,127 +81,129 @@ TAG_TOKEN_TYPE_PARSEERROR = constants.tokenTypes["ParseError"]
|
||||
|
||||
#: List of valid HTML tags, from WHATWG HTML Living Standard as of 2018-10-17
|
||||
#: https://html.spec.whatwg.org/multipage/indices.html#elements-3
|
||||
HTML_TAGS = [
|
||||
"a",
|
||||
"abbr",
|
||||
"address",
|
||||
"area",
|
||||
"article",
|
||||
"aside",
|
||||
"audio",
|
||||
"b",
|
||||
"base",
|
||||
"bdi",
|
||||
"bdo",
|
||||
"blockquote",
|
||||
"body",
|
||||
"br",
|
||||
"button",
|
||||
"canvas",
|
||||
"caption",
|
||||
"cite",
|
||||
"code",
|
||||
"col",
|
||||
"colgroup",
|
||||
"data",
|
||||
"datalist",
|
||||
"dd",
|
||||
"del",
|
||||
"details",
|
||||
"dfn",
|
||||
"dialog",
|
||||
"div",
|
||||
"dl",
|
||||
"dt",
|
||||
"em",
|
||||
"embed",
|
||||
"fieldset",
|
||||
"figcaption",
|
||||
"figure",
|
||||
"footer",
|
||||
"form",
|
||||
"h1",
|
||||
"h2",
|
||||
"h3",
|
||||
"h4",
|
||||
"h5",
|
||||
"h6",
|
||||
"head",
|
||||
"header",
|
||||
"hgroup",
|
||||
"hr",
|
||||
"html",
|
||||
"i",
|
||||
"iframe",
|
||||
"img",
|
||||
"input",
|
||||
"ins",
|
||||
"kbd",
|
||||
"keygen",
|
||||
"label",
|
||||
"legend",
|
||||
"li",
|
||||
"link",
|
||||
"map",
|
||||
"mark",
|
||||
"menu",
|
||||
"meta",
|
||||
"meter",
|
||||
"nav",
|
||||
"noscript",
|
||||
"object",
|
||||
"ol",
|
||||
"optgroup",
|
||||
"option",
|
||||
"output",
|
||||
"p",
|
||||
"param",
|
||||
"picture",
|
||||
"pre",
|
||||
"progress",
|
||||
"q",
|
||||
"rp",
|
||||
"rt",
|
||||
"ruby",
|
||||
"s",
|
||||
"samp",
|
||||
"script",
|
||||
"section",
|
||||
"select",
|
||||
"slot",
|
||||
"small",
|
||||
"source",
|
||||
"span",
|
||||
"strong",
|
||||
"style",
|
||||
"sub",
|
||||
"summary",
|
||||
"sup",
|
||||
"table",
|
||||
"tbody",
|
||||
"td",
|
||||
"template",
|
||||
"textarea",
|
||||
"tfoot",
|
||||
"th",
|
||||
"thead",
|
||||
"time",
|
||||
"title",
|
||||
"tr",
|
||||
"track",
|
||||
"u",
|
||||
"ul",
|
||||
"var",
|
||||
"video",
|
||||
"wbr",
|
||||
]
|
||||
HTML_TAGS = frozenset(
|
||||
(
|
||||
"a",
|
||||
"abbr",
|
||||
"address",
|
||||
"area",
|
||||
"article",
|
||||
"aside",
|
||||
"audio",
|
||||
"b",
|
||||
"base",
|
||||
"bdi",
|
||||
"bdo",
|
||||
"blockquote",
|
||||
"body",
|
||||
"br",
|
||||
"button",
|
||||
"canvas",
|
||||
"caption",
|
||||
"cite",
|
||||
"code",
|
||||
"col",
|
||||
"colgroup",
|
||||
"data",
|
||||
"datalist",
|
||||
"dd",
|
||||
"del",
|
||||
"details",
|
||||
"dfn",
|
||||
"dialog",
|
||||
"div",
|
||||
"dl",
|
||||
"dt",
|
||||
"em",
|
||||
"embed",
|
||||
"fieldset",
|
||||
"figcaption",
|
||||
"figure",
|
||||
"footer",
|
||||
"form",
|
||||
"h1",
|
||||
"h2",
|
||||
"h3",
|
||||
"h4",
|
||||
"h5",
|
||||
"h6",
|
||||
"head",
|
||||
"header",
|
||||
"hgroup",
|
||||
"hr",
|
||||
"html",
|
||||
"i",
|
||||
"iframe",
|
||||
"img",
|
||||
"input",
|
||||
"ins",
|
||||
"kbd",
|
||||
"keygen",
|
||||
"label",
|
||||
"legend",
|
||||
"li",
|
||||
"link",
|
||||
"map",
|
||||
"mark",
|
||||
"menu",
|
||||
"meta",
|
||||
"meter",
|
||||
"nav",
|
||||
"noscript",
|
||||
"object",
|
||||
"ol",
|
||||
"optgroup",
|
||||
"option",
|
||||
"output",
|
||||
"p",
|
||||
"param",
|
||||
"picture",
|
||||
"pre",
|
||||
"progress",
|
||||
"q",
|
||||
"rp",
|
||||
"rt",
|
||||
"ruby",
|
||||
"s",
|
||||
"samp",
|
||||
"script",
|
||||
"section",
|
||||
"select",
|
||||
"slot",
|
||||
"small",
|
||||
"source",
|
||||
"span",
|
||||
"strong",
|
||||
"style",
|
||||
"sub",
|
||||
"summary",
|
||||
"sup",
|
||||
"table",
|
||||
"tbody",
|
||||
"td",
|
||||
"template",
|
||||
"textarea",
|
||||
"tfoot",
|
||||
"th",
|
||||
"thead",
|
||||
"time",
|
||||
"title",
|
||||
"tr",
|
||||
"track",
|
||||
"u",
|
||||
"ul",
|
||||
"var",
|
||||
"video",
|
||||
"wbr",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
#: List of block level HTML tags, as per https://github.com/mozilla/bleach/issues/369
|
||||
#: from mozilla on 2019.07.11
|
||||
#: https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements#Elements
|
||||
HTML_TAGS_BLOCK_LEVEL = frozenset(
|
||||
[
|
||||
(
|
||||
"address",
|
||||
"article",
|
||||
"aside",
|
||||
@ -232,7 +237,7 @@ HTML_TAGS_BLOCK_LEVEL = frozenset(
|
||||
"section",
|
||||
"table",
|
||||
"ul",
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
@ -473,7 +478,7 @@ class BleachHTMLParser(HTMLParser):
|
||||
|
||||
def __init__(self, tags, strip, consume_entities, **kwargs):
|
||||
"""
|
||||
:arg tags: list of allowed tags--everything else is either stripped or
|
||||
:arg tags: set of allowed tags--everything else is either stripped or
|
||||
escaped; if None, then this doesn't look at tags at all
|
||||
:arg strip: whether to strip disallowed tags (True) or escape them (False);
|
||||
if tags=None, then this doesn't have any effect
|
||||
@ -481,7 +486,9 @@ class BleachHTMLParser(HTMLParser):
|
||||
leave them as is when tokenizing (BleachHTMLTokenizer-added behavior)
|
||||
|
||||
"""
|
||||
self.tags = [tag.lower() for tag in tags] if tags is not None else None
|
||||
self.tags = (
|
||||
frozenset((tag.lower() for tag in tags)) if tags is not None else None
|
||||
)
|
||||
self.strip = strip
|
||||
self.consume_entities = consume_entities
|
||||
super().__init__(**kwargs)
|
||||
@ -691,7 +698,7 @@ class BleachHTMLSerializer(HTMLSerializer):
|
||||
# Only leave entities in that are not ambiguous. If they're
|
||||
# ambiguous, then we escape the ampersand.
|
||||
if entity is not None and convert_entity(entity) is not None:
|
||||
yield "&" + entity + ";"
|
||||
yield f"&{entity};"
|
||||
|
||||
# Length of the entity plus 2--one for & at the beginning
|
||||
# and one for ; at the end
|
||||
|
@ -120,9 +120,10 @@ class Linker:
|
||||
:arg list callbacks: list of callbacks to run when adjusting tag attributes;
|
||||
defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``
|
||||
|
||||
:arg list skip_tags: list of tags that you don't want to linkify the
|
||||
contents of; for example, you could set this to ``['pre']`` to skip
|
||||
linkifying contents of ``pre`` tags
|
||||
:arg set skip_tags: set of tags that you don't want to linkify the
|
||||
contents of; for example, you could set this to ``{'pre'}`` to skip
|
||||
linkifying contents of ``pre`` tags; ``None`` means you don't
|
||||
want linkify to skip any tags
|
||||
|
||||
:arg bool parse_email: whether or not to linkify email addresses
|
||||
|
||||
@ -130,7 +131,7 @@ class Linker:
|
||||
|
||||
:arg email_re: email matching regex
|
||||
|
||||
:arg list recognized_tags: the list of tags that linkify knows about;
|
||||
:arg set recognized_tags: the set of tags that linkify knows about;
|
||||
everything else gets escaped
|
||||
|
||||
:returns: linkified text as unicode
|
||||
@ -145,15 +146,18 @@ class Linker:
|
||||
# Create a parser/tokenizer that allows all HTML tags and escapes
|
||||
# anything not in that list.
|
||||
self.parser = html5lib_shim.BleachHTMLParser(
|
||||
tags=recognized_tags,
|
||||
tags=frozenset(recognized_tags),
|
||||
strip=False,
|
||||
consume_entities=True,
|
||||
consume_entities=False,
|
||||
namespaceHTMLElements=False,
|
||||
)
|
||||
self.walker = html5lib_shim.getTreeWalker("etree")
|
||||
self.serializer = html5lib_shim.BleachHTMLSerializer(
|
||||
quote_attr_values="always",
|
||||
omit_optional_tags=False,
|
||||
# We want to leave entities as they are without escaping or
|
||||
# resolving or expanding
|
||||
resolve_entities=False,
|
||||
# linkify does not sanitize
|
||||
sanitize=False,
|
||||
# linkify preserves attr order
|
||||
@ -218,8 +222,8 @@ class LinkifyFilter(html5lib_shim.Filter):
|
||||
:arg list callbacks: list of callbacks to run when adjusting tag attributes;
|
||||
defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``
|
||||
|
||||
:arg list skip_tags: list of tags that you don't want to linkify the
|
||||
contents of; for example, you could set this to ``['pre']`` to skip
|
||||
:arg set skip_tags: set of tags that you don't want to linkify the
|
||||
contents of; for example, you could set this to ``{'pre'}`` to skip
|
||||
linkifying contents of ``pre`` tags
|
||||
|
||||
:arg bool parse_email: whether or not to linkify email addresses
|
||||
@ -232,7 +236,7 @@ class LinkifyFilter(html5lib_shim.Filter):
|
||||
super().__init__(source)
|
||||
|
||||
self.callbacks = callbacks or []
|
||||
self.skip_tags = skip_tags or []
|
||||
self.skip_tags = skip_tags or {}
|
||||
self.parse_email = parse_email
|
||||
|
||||
self.url_re = url_re
|
||||
@ -510,6 +514,62 @@ class LinkifyFilter(html5lib_shim.Filter):
|
||||
yield {"type": "Characters", "data": str(new_text)}
|
||||
yield token_buffer[-1]
|
||||
|
||||
def extract_entities(self, token):
|
||||
"""Handles Characters tokens with entities
|
||||
|
||||
Our overridden tokenizer doesn't do anything with entities. However,
|
||||
that means that the serializer will convert all ``&`` in Characters
|
||||
tokens to ``&``.
|
||||
|
||||
Since we don't want that, we extract entities here and convert them to
|
||||
Entity tokens so the serializer will let them be.
|
||||
|
||||
:arg token: the Characters token to work on
|
||||
|
||||
:returns: generator of tokens
|
||||
|
||||
"""
|
||||
data = token.get("data", "")
|
||||
|
||||
# If there isn't a & in the data, we can return now
|
||||
if "&" not in data:
|
||||
yield token
|
||||
return
|
||||
|
||||
new_tokens = []
|
||||
|
||||
# For each possible entity that starts with a "&", we try to extract an
|
||||
# actual entity and re-tokenize accordingly
|
||||
for part in html5lib_shim.next_possible_entity(data):
|
||||
if not part:
|
||||
continue
|
||||
|
||||
if part.startswith("&"):
|
||||
entity = html5lib_shim.match_entity(part)
|
||||
if entity is not None:
|
||||
if entity == "amp":
|
||||
# LinkifyFilter can't match urls across token boundaries
|
||||
# which is problematic with & since that shows up in
|
||||
# querystrings all the time. This special-cases &
|
||||
# and converts it to a & and sticks it in as a
|
||||
# Characters token. It'll get merged with surrounding
|
||||
# tokens in the BleachSanitizerfilter.__iter__ and
|
||||
# escaped in the serializer.
|
||||
new_tokens.append({"type": "Characters", "data": "&"})
|
||||
else:
|
||||
new_tokens.append({"type": "Entity", "name": entity})
|
||||
|
||||
# Length of the entity plus 2--one for & at the beginning
|
||||
# and one for ; at the end
|
||||
remainder = part[len(entity) + 2 :]
|
||||
if remainder:
|
||||
new_tokens.append({"type": "Characters", "data": remainder})
|
||||
continue
|
||||
|
||||
new_tokens.append({"type": "Characters", "data": part})
|
||||
|
||||
yield from new_tokens
|
||||
|
||||
def __iter__(self):
|
||||
in_a = False
|
||||
in_skip_tag = None
|
||||
@ -564,8 +624,8 @@ class LinkifyFilter(html5lib_shim.Filter):
|
||||
|
||||
new_stream = self.handle_links(new_stream)
|
||||
|
||||
for token in new_stream:
|
||||
yield token
|
||||
for new_token in new_stream:
|
||||
yield from self.extract_entities(new_token)
|
||||
|
||||
# We've already yielded this token, so continue
|
||||
continue
|
||||
|
@ -8,21 +8,23 @@ from bleach import html5lib_shim
|
||||
from bleach import parse_shim
|
||||
|
||||
|
||||
#: List of allowed tags
|
||||
ALLOWED_TAGS = [
|
||||
"a",
|
||||
"abbr",
|
||||
"acronym",
|
||||
"b",
|
||||
"blockquote",
|
||||
"code",
|
||||
"em",
|
||||
"i",
|
||||
"li",
|
||||
"ol",
|
||||
"strong",
|
||||
"ul",
|
||||
]
|
||||
#: Set of allowed tags
|
||||
ALLOWED_TAGS = frozenset(
|
||||
(
|
||||
"a",
|
||||
"abbr",
|
||||
"acronym",
|
||||
"b",
|
||||
"blockquote",
|
||||
"code",
|
||||
"em",
|
||||
"i",
|
||||
"li",
|
||||
"ol",
|
||||
"strong",
|
||||
"ul",
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
#: Map of allowed attributes by tag
|
||||
@ -33,7 +35,7 @@ ALLOWED_ATTRIBUTES = {
|
||||
}
|
||||
|
||||
#: List of allowed protocols
|
||||
ALLOWED_PROTOCOLS = ["http", "https", "mailto"]
|
||||
ALLOWED_PROTOCOLS = frozenset(("http", "https", "mailto"))
|
||||
|
||||
#: Invisible characters--0 to and including 31 except 9 (tab), 10 (lf), and 13 (cr)
|
||||
INVISIBLE_CHARACTERS = "".join(
|
||||
@ -48,6 +50,10 @@ INVISIBLE_CHARACTERS_RE = re.compile("[" + INVISIBLE_CHARACTERS + "]", re.UNICOD
|
||||
INVISIBLE_REPLACEMENT_CHAR = "?"
|
||||
|
||||
|
||||
class NoCssSanitizerWarning(UserWarning):
|
||||
pass
|
||||
|
||||
|
||||
class Cleaner:
|
||||
"""Cleaner for cleaning HTML fragments of malicious content
|
||||
|
||||
@ -89,7 +95,7 @@ class Cleaner:
|
||||
):
|
||||
"""Initializes a Cleaner
|
||||
|
||||
:arg list tags: allowed list of tags; defaults to
|
||||
:arg set tags: set of allowed tags; defaults to
|
||||
``bleach.sanitizer.ALLOWED_TAGS``
|
||||
|
||||
:arg dict attributes: allowed attributes; can be a callable, list or dict;
|
||||
@ -143,6 +149,25 @@ class Cleaner:
|
||||
alphabetical_attributes=False,
|
||||
)
|
||||
|
||||
if css_sanitizer is None:
|
||||
# FIXME(willkg): this doesn't handle when attributes or an
|
||||
# attributes value is a callable
|
||||
attributes_values = []
|
||||
if isinstance(attributes, list):
|
||||
attributes_values = attributes
|
||||
|
||||
elif isinstance(attributes, dict):
|
||||
attributes_values = []
|
||||
for values in attributes.values():
|
||||
if isinstance(values, (list, tuple)):
|
||||
attributes_values.extend(values)
|
||||
|
||||
if "style" in attributes_values:
|
||||
warnings.warn(
|
||||
"'style' attribute specified, but css_sanitizer not set.",
|
||||
category=NoCssSanitizerWarning,
|
||||
)
|
||||
|
||||
def clean(self, text):
|
||||
"""Cleans text and returns sanitized result as unicode
|
||||
|
||||
@ -155,9 +180,8 @@ class Cleaner:
|
||||
"""
|
||||
if not isinstance(text, str):
|
||||
message = (
|
||||
"argument cannot be of '{name}' type, must be of text type".format(
|
||||
name=text.__class__.__name__
|
||||
)
|
||||
f"argument cannot be of {text.__class__.__name__!r} type, "
|
||||
+ "must be of text type"
|
||||
)
|
||||
raise TypeError(message)
|
||||
|
||||
@ -167,13 +191,11 @@ class Cleaner:
|
||||
dom = self.parser.parseFragment(text)
|
||||
filtered = BleachSanitizerFilter(
|
||||
source=self.walker(dom),
|
||||
# Bleach-sanitizer-specific things
|
||||
allowed_tags=self.tags,
|
||||
attributes=self.attributes,
|
||||
strip_disallowed_elements=self.strip,
|
||||
strip_disallowed_tags=self.strip,
|
||||
strip_html_comments=self.strip_comments,
|
||||
css_sanitizer=self.css_sanitizer,
|
||||
# html5lib-sanitizer things
|
||||
allowed_elements=self.tags,
|
||||
allowed_protocols=self.protocols,
|
||||
)
|
||||
|
||||
@ -237,19 +259,21 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
|
||||
def __init__(
|
||||
self,
|
||||
source,
|
||||
allowed_elements=ALLOWED_TAGS,
|
||||
allowed_tags=ALLOWED_TAGS,
|
||||
attributes=ALLOWED_ATTRIBUTES,
|
||||
allowed_protocols=ALLOWED_PROTOCOLS,
|
||||
strip_disallowed_elements=False,
|
||||
attr_val_is_uri=html5lib_shim.attr_val_is_uri,
|
||||
svg_attr_val_allows_ref=html5lib_shim.svg_attr_val_allows_ref,
|
||||
svg_allow_local_href=html5lib_shim.svg_allow_local_href,
|
||||
strip_disallowed_tags=False,
|
||||
strip_html_comments=True,
|
||||
css_sanitizer=None,
|
||||
**kwargs,
|
||||
):
|
||||
"""Creates a BleachSanitizerFilter instance
|
||||
|
||||
:arg source: html5lib TreeWalker stream as an html5lib TreeWalker
|
||||
|
||||
:arg list allowed_elements: allowed list of tags; defaults to
|
||||
:arg set allowed_tags: set of allowed tags; defaults to
|
||||
``bleach.sanitizer.ALLOWED_TAGS``
|
||||
|
||||
:arg dict attributes: allowed attributes; can be a callable, list or dict;
|
||||
@ -258,8 +282,16 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
|
||||
:arg list allowed_protocols: allowed list of protocols for links; defaults
|
||||
to ``bleach.sanitizer.ALLOWED_PROTOCOLS``
|
||||
|
||||
:arg bool strip_disallowed_elements: whether or not to strip disallowed
|
||||
elements
|
||||
:arg attr_val_is_uri: set of attributes that have URI values
|
||||
|
||||
:arg svg_attr_val_allows_ref: set of SVG attributes that can have
|
||||
references
|
||||
|
||||
:arg svg_allow_local_href: set of SVG elements that can have local
|
||||
hrefs
|
||||
|
||||
:arg bool strip_disallowed_tags: whether or not to strip disallowed
|
||||
tags
|
||||
|
||||
:arg bool strip_html_comments: whether or not to strip HTML comments
|
||||
|
||||
@ -267,24 +299,24 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
|
||||
sanitizing style attribute values and style text; defaults to None
|
||||
|
||||
"""
|
||||
self.attr_filter = attribute_filter_factory(attributes)
|
||||
self.strip_disallowed_elements = strip_disallowed_elements
|
||||
self.strip_html_comments = strip_html_comments
|
||||
self.css_sanitizer = css_sanitizer
|
||||
# NOTE(willkg): This is the superclass of
|
||||
# html5lib.filters.sanitizer.Filter. We call this directly skipping the
|
||||
# __init__ for html5lib.filters.sanitizer.Filter because that does
|
||||
# things we don't need to do and kicks up the deprecation warning for
|
||||
# using Sanitizer.
|
||||
html5lib_shim.Filter.__init__(self, source)
|
||||
|
||||
# filter out html5lib deprecation warnings to use bleach from BleachSanitizerFilter init
|
||||
warnings.filterwarnings(
|
||||
"ignore",
|
||||
message="html5lib's sanitizer is deprecated",
|
||||
category=DeprecationWarning,
|
||||
module="bleach._vendor.html5lib",
|
||||
)
|
||||
return super().__init__(
|
||||
source,
|
||||
allowed_elements=allowed_elements,
|
||||
allowed_protocols=allowed_protocols,
|
||||
**kwargs,
|
||||
)
|
||||
self.allowed_tags = frozenset(allowed_tags)
|
||||
self.allowed_protocols = frozenset(allowed_protocols)
|
||||
|
||||
self.attr_filter = attribute_filter_factory(attributes)
|
||||
self.strip_disallowed_tags = strip_disallowed_tags
|
||||
self.strip_html_comments = strip_html_comments
|
||||
|
||||
self.attr_val_is_uri = attr_val_is_uri
|
||||
self.svg_attr_val_allows_ref = svg_attr_val_allows_ref
|
||||
self.css_sanitizer = css_sanitizer
|
||||
self.svg_allow_local_href = svg_allow_local_href
|
||||
|
||||
def sanitize_stream(self, token_iterator):
|
||||
for token in token_iterator:
|
||||
@ -354,10 +386,10 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
|
||||
"""
|
||||
token_type = token["type"]
|
||||
if token_type in ["StartTag", "EndTag", "EmptyTag"]:
|
||||
if token["name"] in self.allowed_elements:
|
||||
if token["name"] in self.allowed_tags:
|
||||
return self.allow_token(token)
|
||||
|
||||
elif self.strip_disallowed_elements:
|
||||
elif self.strip_disallowed_tags:
|
||||
return None
|
||||
|
||||
else:
|
||||
@ -570,7 +602,7 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
|
||||
def disallowed_token(self, token):
|
||||
token_type = token["type"]
|
||||
if token_type == "EndTag":
|
||||
token["data"] = "</%s>" % token["name"]
|
||||
token["data"] = f"</{token['name']}>"
|
||||
|
||||
elif token["data"]:
|
||||
assert token_type in ("StartTag", "EmptyTag")
|
||||
@ -586,25 +618,19 @@ class BleachSanitizerFilter(html5lib_shim.SanitizerFilter):
|
||||
if ns is None or ns not in html5lib_shim.prefixes:
|
||||
namespaced_name = name
|
||||
else:
|
||||
namespaced_name = "{}:{}".format(html5lib_shim.prefixes[ns], name)
|
||||
namespaced_name = f"{html5lib_shim.prefixes[ns]}:{name}"
|
||||
|
||||
attrs.append(
|
||||
' %s="%s"'
|
||||
% (
|
||||
namespaced_name,
|
||||
# NOTE(willkg): HTMLSerializer escapes attribute values
|
||||
# already, so if we do it here (like HTMLSerializer does),
|
||||
# then we end up double-escaping.
|
||||
v,
|
||||
)
|
||||
)
|
||||
token["data"] = "<{}{}>".format(token["name"], "".join(attrs))
|
||||
# NOTE(willkg): HTMLSerializer escapes attribute values
|
||||
# already, so if we do it here (like HTMLSerializer does),
|
||||
# then we end up double-escaping.
|
||||
attrs.append(f' {namespaced_name}="{v}"')
|
||||
token["data"] = f"<{token['name']}{''.join(attrs)}>"
|
||||
|
||||
else:
|
||||
token["data"] = "<%s>" % token["name"]
|
||||
token["data"] = f"<{token['name']}>"
|
||||
|
||||
if token.get("selfClosing"):
|
||||
token["data"] = token["data"][:-1] + "/>"
|
||||
token["data"] = f"{token['data'][:-1]}/>"
|
||||
|
||||
token["type"] = "Characters"
|
||||
|
||||
|
@ -5,7 +5,7 @@ backports.csv==1.0.7
|
||||
backports.functools-lru-cache==1.6.4
|
||||
backports.zoneinfo==0.2.1;python_version<"3.9"
|
||||
beautifulsoup4==4.11.1
|
||||
bleach==5.0.1
|
||||
bleach==6.0.0
|
||||
certifi==2022.12.7
|
||||
cheroot==9.0.0
|
||||
cherrypy==18.8.0
|
||||
|
Loading…
x
Reference in New Issue
Block a user