2026-05-01 05:23:48 网络安全文章来源：ZONE.CI 全球网 0 阅读模式

文章总结： 本文详细分析了Bleach库清洗后markdown2SafeMode模式下Alt属性XSS漏洞的完整攻击链路。通过追踪代码执行流程，揭示了双哈希逃逸技术如何绕过安全过滤，实现Markdown语法与JavaScript的关联执行。文章提供了完整的漏洞分析和技术细节，对WEB安全防护具有重要参考价值。 综合评分： 82 文章分类： WEB安全,漏洞分析,安全开发,代码审计,应用安全

cover_image

Markdown 双哈希逃逸 (Bleach 清洗后 markdown2 SafeMode 的 Alt 属性 XSS 完整链路)

原创

YMsora YMsora

YMs0ra的安全漫路

2026年4月30日 21:40 浙江

在小说阅读器读本章

去阅读

就按照闲谈学习去完成这个吧

无容置疑的点只有两个，就是需要让markdown语法和js进行联系

以及让bot的无头浏览器执行我们的js

我们看代码片段

safe_md = bleach.clean(&nbsp; &nbsp; &nbsp; &nbsp; md,&nbsp; &nbsp; &nbsp; &nbsp; tags=[],&nbsp; &nbsp; &nbsp; &nbsp; attributes={},&nbsp; &nbsp; &nbsp; &nbsp; protocols=[],&nbsp; &nbsp; &nbsp; &nbsp; strip=True,&nbsp; &nbsp; &nbsp; &nbsp; strip_comments=True,&nbsp; &nbsp; )

直接进行追溯

这个函数传的参数很多都是默认的

def&nbsp;clean(&nbsp; &nbsp; text,&nbsp; &nbsp; tags=ALLOWED_TAGS,#[]&nbsp; &nbsp; attributes=ALLOWED_ATTRIBUTES,#{}&nbsp; &nbsp; protocols=ALLOWED_PROTOCOLS,#[]&nbsp; &nbsp; strip=False,&nbsp; &nbsp; strip_comments=True,&nbsp; &nbsp; css_sanitizer=None,):
&nbsp; &nbsp; cleaner = Cleaner(&nbsp; &nbsp; &nbsp; &nbsp; tags=tags,&nbsp; &nbsp; &nbsp; &nbsp; attributes=attributes,&nbsp; &nbsp; &nbsp; &nbsp; protocols=protocols,&nbsp; &nbsp; &nbsp; &nbsp; strip=strip,&nbsp; &nbsp; &nbsp; &nbsp; strip_comments=strip_comments,&nbsp; &nbsp; &nbsp; &nbsp; css_sanitizer=css_sanitizer,&nbsp; &nbsp; )&nbsp; &nbsp;&nbsp;return&nbsp;cleaner.clean(text)

继续跟

def&nbsp;clean(self, text):&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;if&nbsp;not&nbsp;isinstance(text,&nbsp;str):&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; message = (&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;f"argument cannot be of&nbsp;{text.__class__.__name__!r}&nbsp;thttps://www.gm7.org/archives/ype, "&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; +&nbsp;"must be of text thttps://www.gm7.org/archives/ype"&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; )&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;raise&nbsp;Thttps://www.gm7.org/archives/ypeError(message)
&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;if&nbsp;not&nbsp;text:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;return&nbsp;""
&nbsp; &nbsp; &nbsp; &nbsp; dom =&nbsp;self.parser.parseFragment(text)#text是的&nbsp; &nbsp; &nbsp; &nbsp; filtered = BleachSanitizerFilter(&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; source=self.walker(dom),&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; allowed_tags=self.tags,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; attributes=self.attributes,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; strip_disallowed_tags=self.strip,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; strip_html_comments=self.strip_comments,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; css_sanitizer=self.css_sanitizer,&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; allowed_protocols=self.protocols,&nbsp; &nbsp; &nbsp; &nbsp; )
&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# Applhttps://www.gm7.org/archives/y anhttps://www.gm7.org/archives/y filters after the BleachSanitizerFilter&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;for&nbsp;filter_class&nbsp;in&nbsp;self.filters:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; filtered = filter_class(source=filtered)
&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;return&nbsp;self.serializer.render(filtered)

其中parseFragment(text)是讲其解析为良好的树形结构，暂时不看

看看BleachSanitizerFilter

def&nbsp;sanitize_token(self, token):&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;"""Sanitize a token either bhttps://www.gm7.org/archives/y HTML-encoding or dropping.
&nbsp; &nbsp; &nbsp; &nbsp; Unlike sanitizer.Filter, allowed_attributes can be a dict of {'tag':&nbsp; &nbsp; &nbsp; &nbsp; ['attribute', 'pairs'], 'tag': callable}.
&nbsp; &nbsp; &nbsp; &nbsp; Here callable is a function with two arguments of attribute name and&nbsp; &nbsp; &nbsp; &nbsp; value. It should return true of false.
&nbsp; &nbsp; &nbsp; &nbsp; Also gives the option to strip tags instead of encoding.
&nbsp; &nbsp; &nbsp; &nbsp; :arg dict token: token to sanitize
&nbsp; &nbsp; &nbsp; &nbsp; :returns: token or list of tokens
&nbsp; &nbsp; &nbsp; &nbsp; """&nbsp; &nbsp; &nbsp; &nbsp; token_thttps://www.gm7.org/archives/ype = token["thttps://www.gm7.org/archives/ype"]&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;if&nbsp;token_thttps://www.gm7.org/archives/ype&nbsp;in&nbsp;["StartTag",&nbsp;"EndTag",&nbsp;"Empthttps://www.gm7.org/archives/yTag"]:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;if&nbsp;token["name"]&nbsp;in&nbsp;self.allowed_tags:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;return&nbsp;self.allow_token(token)
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;elif&nbsp;self.strip_disallowed_tags:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;return&nbsp;None
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;else:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;return&nbsp;self.disallowed_token(token)
&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;elif&nbsp;token_thttps://www.gm7.org/archives/ype ==&nbsp;"Comment":&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;if&nbsp;not&nbsp;self.strip_html_comments:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# call lxml.sax.saxutils to escape &, <, and > in addition to " and '&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; token["data"] = html5lib_shim.escape(&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; token["data"], entities={'"':&nbsp;"&quot;",&nbsp;"'":&nbsp;"&#x27;"}&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; )&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;return&nbsp;token&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;else:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;return&nbsp;None
&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;elif&nbsp;token_thttps://www.gm7.org/archives/ype ==&nbsp;"Characters":&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;return&nbsp;self.sanitize_characters(token)
&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;else:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;return&nbsp;token

其实就是将html标签转为不支持的格式

然后直接转markdown，看看当markdown的safe标签的时候的过滤

html = Markup(markdown2.markdown(safe_md, safe_mode=”escape”))

def&nbsp;_sanitize_html(self, s:&nbsp;str) ->&nbsp;str:&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;if&nbsp;self.safe_mode ==&nbsp;"replace":&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;return&nbsp;self.html_removed_text&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;elif&nbsp;self.safe_mode ==&nbsp;"escape":&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; replacements = [&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; ('&',&nbsp;'&amp;'),&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; ('<',&nbsp;'&lt;'),&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; ('>',&nbsp;'&gt;'),&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; ]&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;for&nbsp;before, after&nbsp;in&nbsp;replacements:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; s = s.replace(before, after)&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;return&nbsp;s&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;else:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;raise&nbsp;MarkdownError("invalid value for 'safe_mode': %r (must be "&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;"'escape' or 'replace')"&nbsp;%&nbsp;self.safe_mode)
&nbsp; &nbsp; _inline_link_title = re.compile(r'''&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; ( &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # \1&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; [ \t]+&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; (['"]) &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;# quote char = \2&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; (?P<title>.*?)&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; \2&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; )? &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;# title is optional&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; \)$&nbsp; &nbsp; &nbsp; &nbsp; ''', re.X | re.S)&nbsp; &nbsp; _tail_of_reference_link_re = re.compile(r'''&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; # Match tail of: [text][id]&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; [ ]? &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;# one optional space&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; (?:\n[ ]*)? &nbsp; # one optional newline followed bhttps://www.gm7.org/archives/y spaces&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; \[&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; (?P<id>[^\[\]]*?)&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; \]&nbsp; &nbsp; &nbsp; &nbsp; ''', re.X | re.S)
&nbsp; &nbsp; _whitespace = re.compile(r'\s*')
&nbsp; &nbsp; _strip_anglebrackets = re.compile(r'<(.*)>.*')

貌似核心不在这，我们回去跟text

在text最开始进markdown主函数的时候调用了convert

def&nbsp;convert(self, text:&nbsp;str) ->&nbsp;'UnicodeWithAttrs':&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;"""Convert the given text."""&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# Main function. The order in which other subs are called here is&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# essential. Link and image substitutions need to happen before&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# _EscapeSpecialChars(), so that anhttps://www.gm7.org/archives/y *'s or _'s in the <a>&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# and <img> tags get encoded.
&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# Clear the global hashes. If we don't clear these, https://www.gm7.org/archives/you get conflicts&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# from other articles when generating a page which contains more than&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# one article (e.g. an index page that shows the N most recent&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# articles):&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;self.reset()
&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;if&nbsp;not&nbsp;isinstance(text,&nbsp;str):&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;#&nbsp;TODO:&nbsp;perhaps shouldn't presume UTF-8 for string input?&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;str(text,&nbsp;'utf-8')
&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;if&nbsp;self.use_file_vars:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# Look for emacs-sthttps://www.gm7.org/archives/yle file variable hints.&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;self._emacs_oneliner_vars_pat.sub(self._emacs_vars_oneliner_sub, text)&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; emacs_vars =&nbsp;self._get_emacs_vars(text)&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;if&nbsp;"markdown-extras"&nbsp;in&nbsp;emacs_vars:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; splitter = re.compile("[ ,]+")&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;for&nbsp;e&nbsp;in&nbsp;splitter.split(emacs_vars["markdown-extras"]):&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;if&nbsp;'='&nbsp;in&nbsp;e:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; ename, earg = e.split('=',&nbsp;1)&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;trhttps://www.gm7.org/archives/y:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; earg =&nbsp;int(earg)&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;except&nbsp;ValueError:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;pass&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;else:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; ename, earg = e,&nbsp;None&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;self.extras[ename] = earg
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;self._setup_extras()
&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# Standardize line endings:&nbsp; &nbsp; &nbsp; &nbsp; text = text.replace("\r\n",&nbsp;"\n")&nbsp; &nbsp; &nbsp; &nbsp; text = text.replace("\r",&nbsp;"\n")
&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# Make sure $text ends with a couple of newlines:&nbsp; &nbsp; &nbsp; &nbsp; text +=&nbsp;"\n\n"
&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# Convert all tabs to spaces.&nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;self._detab(text)
&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# Strip anhttps://www.gm7.org/archives/y lines consisting onlhttps://www.gm7.org/archives/y of spaces and tabs.&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# This makes subsequent regexen easier to write, because we can&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# match consecutive blank lines with /\n+/ instead of something&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# contorted like /[ \t]*\n+/ .&nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;self._ws_onlhttps://www.gm7.org/archives/y_line_re.sub("", text)
&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# strip metadata from head and extract&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;if&nbsp;"metadata"&nbsp;in&nbsp;self.extras:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;self._extract_metadata(text)
&nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;self.preprocess(text)
&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;if&nbsp;self.safe_mode:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;self._hash_html_spans(text)
&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# Turn block-level HTML blocks into hash entries&nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;self._hash_html_blocks(text, raw=True)
&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# Strip link definitions, store in hashes.&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;if&nbsp;"footnotes"&nbsp;in&nbsp;self.extras:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# Must do footnotes first because an unluckhttps://www.gm7.org/archives/y footnote defn&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# looks like a link defn:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# &nbsp; [^4]: this "looks like a link defn"&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;self._strip_footnote_definitions(text)&nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;self._strip_link_definitions(text)
&nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;self._run_block_gamut(text)
&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;if&nbsp;"footnotes"&nbsp;in&nbsp;self.extras:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;self._do_footnote_marker(text)&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;self._add_footnotes(text)
&nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;self.postprocess(text)
&nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;self._unescape_special_chars(text)
&nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;self._unhash_html_spans(text)&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;if&nbsp;self.safe_mode:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# return the removed text warning to its markdown.phttps://www.gm7.org/archives/y compatible form&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; text = text.replace(self.html_removed_text,&nbsp;self.html_removed_text_compat)
&nbsp; &nbsp; &nbsp; &nbsp; do_target_blank_links =&nbsp;"target-blank-links"&nbsp;in&nbsp;self.extras&nbsp; &nbsp; &nbsp; &nbsp; do_nofollow_links =&nbsp;"nofollow"&nbsp;in&nbsp;self.extras
&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;if&nbsp;do_target_blank_links&nbsp;and&nbsp;do_nofollow_links:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;self._a_nofollow_or_blank_links.sub(r'<\1 rel="nofollow noopener" target="_blank"\2', text)&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;elif&nbsp;do_target_blank_links:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;self._a_nofollow_or_blank_links.sub(r'<\1 rel="noopener" target="_blank"\2', text)&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;elif&nbsp;do_nofollow_links:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;self._a_nofollow_or_blank_links.sub(r'<\1 rel="nofollow"\2', text)
&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;if&nbsp;"toc"&nbsp;in&nbsp;self.extras&nbsp;and&nbsp;self._toc:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;if&nbsp;self.extras['header-ids'].get('mixed'):&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# TOC will onlhttps://www.gm7.org/archives/y be out of order if mixed headers is enabled&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;def&nbsp;toc_sort(entrhttps://www.gm7.org/archives/y):&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;'''Sort the TOC bhttps://www.gm7.org/archives/y order of appearance in text'''&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;match&nbsp;= re.search(&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# header tag, anhttps://www.gm7.org/archives/y attrs, the ID, anhttps://www.gm7.org/archives/y attrs, the text, close tag&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;r'^<(h%d).*?id=(["\'])%s\2.*>%s</\1>$'&nbsp;% (entrhttps://www.gm7.org/archives/y[0], entrhttps://www.gm7.org/archives/y[1], re.escape(entrhttps://www.gm7.org/archives/y[2])),&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; text, re.M&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; )&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;return&nbsp;match.start()&nbsp;if&nbsp;match&nbsp;else&nbsp;0
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;self._toc.sort(kehttps://www.gm7.org/archives/y=toc_sort)&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;self._toc_html = calculate_toc_html(self._toc)
&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# Prepend toc html to output&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;if&nbsp;self.cli&nbsp;or&nbsp;(self.extras['toc']&nbsp;is&nbsp;not&nbsp;None&nbsp;and&nbsp;self.extras['toc'].get('prepend',&nbsp;False)):&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;f'{self._toc_html}\n{text}'
&nbsp; &nbsp; &nbsp; &nbsp; text +=&nbsp;"\n"
&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# Attach attrs to output&nbsp; &nbsp; &nbsp; &nbsp; rv = UnicodeWithAttrs(text)
&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;if&nbsp;"toc"&nbsp;in&nbsp;self.extras&nbsp;and&nbsp;self._toc:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; rv.toc_html =&nbsp;self._toc_html
&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;if&nbsp;"metadata"&nbsp;in&nbsp;self.extras:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; rv.metadata =&nbsp;self.metadata&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;return&nbsp;rv

这一段是没有校验其他字段的

&nbsp;if&nbsp;self.safe_mode:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;self._hash_html_spans(text)
&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# Turn block-level HTML blocks into hash entries&nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;self._hash_html_blocks(text, raw=True)
&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# Strip link definitions, store in hashes.
&nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;self._strip_link_definitions(text)
&nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;self._run_block_gamut(text)
&nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;self.postprocess(text)
&nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;self._unescape_special_chars(text)
&nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;self._unhash_html_spans(text)

先看看_hash_html_spans

因为比较长，只截回调那一部分，也就是非函数而是调用的部分

code_hashes = {}&nbsp; &nbsp; &nbsp; &nbsp; text =&nbsp;self._code_span_re.sub(&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; lambda&nbsp;m:&nbsp;self._hash_span(m.string[m.start(): m.end()], code_hashes),&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; text&nbsp; &nbsp; &nbsp; &nbsp; )

因为md是reset的新状态，那么当_code_span_re这个正则被匹配的时候就会进行hash_span回调，

继续追溯

_code_span_re&nbsp;= re.compile(r'''&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; (?<!\\)&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; (`+) &nbsp; &nbsp; &nbsp; &nbsp;# \1 = Opening run of `&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; (?!`) &nbsp; &nbsp; &nbsp; # See Note A test/tm-cases/escapes.text&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; (.+?) &nbsp; &nbsp; &nbsp; # \2 = The code block&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; (?<!`)&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; \1 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;# Matching closer&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; (?!`)&nbsp; &nbsp; &nbsp; &nbsp; ''', re.X | re.S)

def&nbsp;_hash_span(self, text:&nbsp;str, hash_table:&nbsp;Optional[dict] =&nbsp;None) ->&nbsp;str:&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;'''&nbsp; &nbsp; &nbsp; &nbsp; Wrapper around `_hash_text` that also adds the hash to `self.hash_spans`,&nbsp; &nbsp; &nbsp; &nbsp; meaning it will be automaticallhttps://www.gm7.org/archives/y unhashed during conversion.
&nbsp; &nbsp; &nbsp; &nbsp; Args:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; text: the text to hash&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; hash_table: the dict to insert the hash into. If omitted will default to `self.html_spans`
&nbsp; &nbsp; &nbsp; &nbsp; Returns:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; The hashed text&nbsp; &nbsp; &nbsp; &nbsp; '''&nbsp; &nbsp; &nbsp; &nbsp; kehttps://www.gm7.org/archives/y = _hash_text(text)&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;if&nbsp;hash_table&nbsp;is&nbsp;not&nbsp;None:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; hash_table[kehttps://www.gm7.org/archives/y] = text&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;else:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;self.html_spans[kehttps://www.gm7.org/archives/y] = text&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;return&nbsp;kehttps://www.gm7.org/archives/y

跟hash

def&nbsp;_hash_text(s:&nbsp;str) ->&nbsp;str:&nbsp; &nbsp;&nbsp;return&nbsp;'md5-'&nbsp;+ sha256(SECRET_SALT + s.encode("utf-8")).hexdigest()[32:]
# Table of hash values for escaped characters:g_escape_table = {ch: _hash_text(ch)&nbsp; &nbsp;&nbsp;for&nbsp;ch&nbsp;in&nbsp;'\\`*_{}[]()>#+-.!'}
# Ampersand-encoding based entirelhttps://www.gm7.org/archives/y on Nat Irons's Amputator MT plugin:# &nbsp; http://bumppo.net/projects/amputator/_AMPERSAND_BODY_RE =&nbsp;r'#?[xX]?(?:[0-9a-fA-F]+|\w+);'_AMPERSAND_RE = re.compile(r'&(?!%s)'&nbsp;% _AMPERSAND_BODY_RE)_ESCAPED_AMPERSAND_RE = re.compile(r'(?:\\\\)*\\&(%s)'&nbsp;% _AMPERSAND_BODY_RE)

这里转hash，然后就是正常的图片转img标签。然后就是_unescape_special_chars

def&nbsp;_unescape_special_chars(self, text:&nbsp;str) ->&nbsp;str:&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# Swap back in all the special characters we've hidden.&nbsp; &nbsp; &nbsp; &nbsp; hashmap =&nbsp;tuple(self._escape_table.items()) +&nbsp;tuple(self._code_table.items())&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;# html_blocks table is in format {hash: item} compared to usual {item: hash}&nbsp; &nbsp; &nbsp; &nbsp; hashmap +=&nbsp;tuple(tuple(reversed(i))&nbsp;for&nbsp;i&nbsp;in&nbsp;self.html_blocks.items())&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;while&nbsp;True:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; orig_text = text&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;for&nbsp;ch,&nbsp;hash&nbsp;in&nbsp;hashmap:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; text = text.replace(hash, ch)&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;if&nbsp;text == orig_text:&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;&nbsp;break&nbsp; &nbsp; &nbsp; &nbsp;&nbsp;return&nbsp;text

它用元组将hash换了回来

也就是一个md5对应的原本代码

在这里需要先明确

md的语法，也就是这里的x是alt属性，https://www.gm7.org/archives/y是src

但是有一点，它转hash转回来的时候只换了src，并没有换alt标签的东西，

所以alt的md5就会被直接泄露出来

result = (&nbsp; &nbsp; f'<img src="..."'&nbsp; &nbsp; f' alt="{self.md._hash_span(_xml_escape_attr(link_text))}"' &nbsp;&nbsp;# ← 这里！&nbsp; &nbsp; ...)

并且因为clean的缘故没法插入html标签

所以执行这个分两步

极其巧妙的截断

完结

免责声明：

本文所载程序、技术方法仅面向合法合规的安全研究与教学场景，旨在提升网络安全防护能力，具有明确的技术研究属性。

任何单位或个人未经授权，将本文内容用于攻击、破坏等非法用途的，由此引发的全部法律责任、民事赔偿及连带责任，均由行为人独立承担，本站不承担任何连带责任。

本站内容均为技术交流与知识分享目的发布，若存在版权侵权或其他异议，请通过邮件联系处理，具体联系方式可点击页面上方的联系我。

本文转载自：YMs0ra的安全漫路 YMsora YMsora《Markdown 双哈希逃逸 (Bleach 清洗后 markdown2 SafeMode 的 Alt 属性 XSS 完整链路)》