الملفات
Building-API-and-Monitoring…/venv/lib/python3.12/site-packages/mistune/helpers.py
2025-09-27 23:34:33 +03:00

194 أسطر
4.3 KiB
Python

import re
import string
from typing import Any, Dict, Tuple, Union
from .util import escape_url
PREVENT_BACKSLASH = r"(?<!\\)(?:\\\\)*"
PUNCTUATION = r"[" + re.escape(string.punctuation) + r"]"
LINK_LABEL = r"(?:[^\\\[\]]|\\.){0,500}"
LINK_BRACKET_START = re.compile(r"[ \t]*\n?[ \t]*<")
LINK_BRACKET_RE = re.compile(r"<([^<>\n\\\x00]*)>")
LINK_HREF_BLOCK_RE = re.compile(r"[ \t]*\n?[ \t]*([^\s]+)(?:\s|$)")
LINK_HREF_INLINE_RE = re.compile(
r"[ \t]*\n?[ \t]*([^ \t\n]*?)(?:[ \t\n]|"
r"(?:" + PREVENT_BACKSLASH + r"\)))"
)
LINK_TITLE_RE = re.compile(
r"[ \t\n]+("
r'"(?:\\' + PUNCTUATION + r'|[^"\x00])*"|' # "title"
r"'(?:\\" + PUNCTUATION + r"|[^'\x00])*'" # 'title'
r")"
)
PAREN_END_RE = re.compile(r"\s*\)")
HTML_TAGNAME = r"[A-Za-z][A-Za-z0-9-]*"
HTML_ATTRIBUTES = (
r"(?:\s+[A-Za-z_:][A-Za-z0-9_.:-]*"
r'(?:\s*=\s*(?:[^ !"\'=<>`]+|\'[^\']*?\'|"[^\"]*?"))?)*'
)
BLOCK_TAGS = (
"address",
"article",
"aside",
"base",
"basefont",
"blockquote",
"body",
"caption",
"center",
"col",
"colgroup",
"dd",
"details",
"dialog",
"dir",
"div",
"dl",
"dt",
"fieldset",
"figcaption",
"figure",
"footer",
"form",
"frame",
"frameset",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"head",
"header",
"hr",
"html",
"iframe",
"legend",
"li",
"link",
"main",
"menu",
"menuitem",
"meta",
"nav",
"noframes",
"ol",
"optgroup",
"option",
"p",
"param",
"section",
"source",
"summary",
"table",
"tbody",
"td",
"tfoot",
"th",
"thead",
"title",
"tr",
"track",
"ul",
)
PRE_TAGS = ("pre", "script", "style", "textarea")
_INLINE_LINK_LABEL_RE = re.compile(LINK_LABEL + r"\]")
_INLINE_SQUARE_BRACKET_RE = re.compile(PREVENT_BACKSLASH + r"[\[\]]")
_ESCAPE_CHAR_RE = re.compile(r"\\(" + PUNCTUATION + r")")
def unescape_char(text: str) -> str:
return _ESCAPE_CHAR_RE.sub(r"\1", text)
def parse_link_text(src: str, pos: int) -> Union[Tuple[str, int], Tuple[None, None]]:
level = 1
found = False
start_pos = pos
while pos < len(src):
m = _INLINE_SQUARE_BRACKET_RE.search(src, pos)
if not m:
break
pos = m.end()
marker = m.group(0)
if marker == "]":
level -= 1
if level == 0:
found = True
break
else:
level += 1
if found:
text = src[start_pos : pos - 1]
return text, pos
return None, None
def parse_link_label(src: str, start_pos: int) -> Union[Tuple[str, int], Tuple[None, None]]:
m = _INLINE_LINK_LABEL_RE.match(src, start_pos)
if m:
label = m.group(0)[:-1]
return label, m.end()
return None, None
def parse_link_href(src: str, start_pos: int, block: bool = False) -> Union[Tuple[str, int], Tuple[None, None]]:
m = LINK_BRACKET_START.match(src, start_pos)
if m:
start_pos = m.end() - 1
m = LINK_BRACKET_RE.match(src, start_pos)
if m:
return m.group(1), m.end()
return None, None
if block:
m = LINK_HREF_BLOCK_RE.match(src, start_pos)
else:
m = LINK_HREF_INLINE_RE.match(src, start_pos)
if not m:
return None, None
end_pos = m.end()
href = m.group(1)
if block and src[end_pos - 1] == href[-1]:
return href, end_pos
return href, end_pos - 1
def parse_link_title(src: str, start_pos: int, max_pos: int) -> Union[Tuple[str, int], Tuple[None, None]]:
m = LINK_TITLE_RE.match(src, start_pos, max_pos)
if m:
title = m.group(1)[1:-1]
title = unescape_char(title)
return title, m.end()
return None, None
def parse_link(src: str, pos: int) -> Union[Tuple[Dict[str, Any], int], Tuple[None, None]]:
href, href_pos = parse_link_href(src, pos)
if href is None:
return None, None
assert href_pos is not None
title, title_pos = parse_link_title(src, href_pos, len(src))
next_pos = title_pos or href_pos
m = PAREN_END_RE.match(src, next_pos)
if not m:
return None, None
href = unescape_char(href)
attrs = {"url": escape_url(href)}
if title:
attrs["title"] = title
return attrs, m.end()