Building Project Files
هذا الالتزام موجود في:
406
venv/lib/python3.12/site-packages/mistune/inline_parser.py
Normal file
406
venv/lib/python3.12/site-packages/mistune/inline_parser.py
Normal file
@@ -0,0 +1,406 @@
|
||||
import re
|
||||
from typing import (
|
||||
Any,
|
||||
Dict,
|
||||
List,
|
||||
Match,
|
||||
MutableMapping,
|
||||
Optional,
|
||||
)
|
||||
|
||||
from .core import InlineState, Parser
|
||||
from .helpers import (
|
||||
HTML_ATTRIBUTES,
|
||||
HTML_TAGNAME,
|
||||
PREVENT_BACKSLASH,
|
||||
PUNCTUATION,
|
||||
parse_link,
|
||||
parse_link_label,
|
||||
parse_link_text,
|
||||
unescape_char,
|
||||
)
|
||||
from .util import escape_url, unikey
|
||||
|
||||
PAREN_END_RE = re.compile(r"\s*\)")
|
||||
|
||||
AUTO_EMAIL = (
|
||||
r"""<[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9]"""
|
||||
r"(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?"
|
||||
r"(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*>"
|
||||
)
|
||||
|
||||
INLINE_HTML = (
|
||||
r"<" + HTML_TAGNAME + HTML_ATTRIBUTES + r"\s*/?>|" # open tag
|
||||
r"</" + HTML_TAGNAME + r"\s*>|" # close tag
|
||||
r"<!--(?!>|->)(?:(?!--)[\s\S])+?(?<!-)-->|" # comment
|
||||
r"<\?[\s\S]+?\?>|" # script like <?php?>
|
||||
r"<![A-Z][\s\S]+?>|" # doctype
|
||||
r"<!\[CDATA[\s\S]+?\]\]>" # cdata
|
||||
)
|
||||
|
||||
EMPHASIS_END_RE = {
|
||||
"*": re.compile(r"(?:" + PREVENT_BACKSLASH + r"\\\*|[^\s*])\*(?!\*)"),
|
||||
"_": re.compile(r"(?:" + PREVENT_BACKSLASH + r"\\_|[^\s_])_(?!_)\b"),
|
||||
"**": re.compile(r"(?:" + PREVENT_BACKSLASH + r"\\\*|[^\s*])\*\*(?!\*)"),
|
||||
"__": re.compile(r"(?:" + PREVENT_BACKSLASH + r"\\_|[^\s_])__(?!_)\b"),
|
||||
"***": re.compile(r"(?:" + PREVENT_BACKSLASH + r"\\\*|[^\s*])\*\*\*(?!\*)"),
|
||||
"___": re.compile(r"(?:" + PREVENT_BACKSLASH + r"\\_|[^\s_])___(?!_)\b"),
|
||||
}
|
||||
|
||||
|
||||
class InlineParser(Parser[InlineState]):
|
||||
sc_flag = 0
|
||||
state_cls = InlineState
|
||||
|
||||
#: linebreak leaves two spaces at the end of line
|
||||
STD_LINEBREAK = r"(?:\\| {2,})\n\s*"
|
||||
|
||||
#: every new line becomes <br>
|
||||
HARD_LINEBREAK = r" *\n\s*"
|
||||
|
||||
# we only need to find the start pattern of an inline token
|
||||
SPECIFICATION = {
|
||||
# e.g. \`, \$
|
||||
"escape": r"(?:\\" + PUNCTUATION + ")+",
|
||||
# `code, ```code
|
||||
"codespan": r"`{1,}",
|
||||
# *w, **w, _w, __w
|
||||
"emphasis": r"\*{1,3}(?=[^\s*])|\b_{1,3}(?=[^\s_])",
|
||||
# [link], ![img]
|
||||
"link": r"!?\[",
|
||||
# <https://example.com>. regex copied from commonmark.js
|
||||
"auto_link": r"<[A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*>",
|
||||
"auto_email": AUTO_EMAIL,
|
||||
"inline_html": INLINE_HTML,
|
||||
"linebreak": STD_LINEBREAK,
|
||||
"softbreak": HARD_LINEBREAK,
|
||||
"prec_auto_link": r"<[A-Za-z][A-Za-z\d.+-]{1,31}:",
|
||||
"prec_inline_html": r"</?" + HTML_TAGNAME + r"|<!|<\?",
|
||||
}
|
||||
DEFAULT_RULES = (
|
||||
"escape",
|
||||
"codespan",
|
||||
"emphasis",
|
||||
"link",
|
||||
"auto_link",
|
||||
"auto_email",
|
||||
"inline_html",
|
||||
"linebreak",
|
||||
)
|
||||
|
||||
def __init__(self, hard_wrap: bool = False) -> None:
|
||||
super(InlineParser, self).__init__()
|
||||
|
||||
self.hard_wrap = hard_wrap
|
||||
# lazy add linebreak
|
||||
if hard_wrap:
|
||||
self.specification["linebreak"] = self.HARD_LINEBREAK
|
||||
else:
|
||||
self.rules.append("softbreak")
|
||||
|
||||
self._methods = {name: getattr(self, "parse_" + name) for name in self.rules}
|
||||
|
||||
def parse_escape(self, m: Match[str], state: InlineState) -> int:
|
||||
text = m.group(0)
|
||||
text = unescape_char(text)
|
||||
state.append_token(
|
||||
{
|
||||
"type": "text",
|
||||
"raw": text,
|
||||
}
|
||||
)
|
||||
return m.end()
|
||||
|
||||
def parse_link(self, m: Match[str], state: InlineState) -> Optional[int]:
|
||||
pos = m.end()
|
||||
|
||||
marker = m.group(0)
|
||||
is_image = marker[0] == "!"
|
||||
if is_image and state.in_image:
|
||||
state.append_token({"type": "text", "raw": marker})
|
||||
return pos
|
||||
elif not is_image and state.in_link:
|
||||
state.append_token({"type": "text", "raw": marker})
|
||||
return pos
|
||||
|
||||
text = None
|
||||
label, end_pos = parse_link_label(state.src, pos)
|
||||
if label is None:
|
||||
text, end_pos = parse_link_text(state.src, pos)
|
||||
if text is None:
|
||||
return None
|
||||
|
||||
assert end_pos is not None
|
||||
|
||||
if text is None:
|
||||
text = label
|
||||
|
||||
assert text is not None
|
||||
|
||||
if end_pos >= len(state.src) and label is None:
|
||||
return None
|
||||
|
||||
rules = ["codespan", "prec_auto_link", "prec_inline_html"]
|
||||
prec_pos = self.precedence_scan(m, state, end_pos, rules)
|
||||
if prec_pos:
|
||||
return prec_pos
|
||||
|
||||
if end_pos < len(state.src):
|
||||
c = state.src[end_pos]
|
||||
if c == "(":
|
||||
# standard link [text](<url> "title")
|
||||
attrs, pos2 = parse_link(state.src, end_pos + 1)
|
||||
if pos2:
|
||||
token = self.__parse_link_token(is_image, text, attrs, state)
|
||||
state.append_token(token)
|
||||
return pos2
|
||||
|
||||
elif c == "[":
|
||||
# standard ref link [text][label]
|
||||
label2, pos2 = parse_link_label(state.src, end_pos + 1)
|
||||
if pos2:
|
||||
end_pos = pos2
|
||||
if label2:
|
||||
label = label2
|
||||
|
||||
if label is None:
|
||||
return None
|
||||
|
||||
ref_links = state.env.get("ref_links")
|
||||
if not ref_links:
|
||||
return None
|
||||
|
||||
key = unikey(label)
|
||||
env = ref_links.get(key)
|
||||
if env:
|
||||
attrs = {"url": env["url"], "title": env.get("title")}
|
||||
token = self.__parse_link_token(is_image, text, attrs, state)
|
||||
token["ref"] = key
|
||||
token["label"] = label
|
||||
state.append_token(token)
|
||||
return end_pos
|
||||
return None
|
||||
|
||||
def __parse_link_token(
|
||||
self,
|
||||
is_image: bool,
|
||||
text: str,
|
||||
attrs: Optional[Dict[str, Any]],
|
||||
state: InlineState,
|
||||
) -> Dict[str, Any]:
|
||||
new_state = state.copy()
|
||||
new_state.src = text
|
||||
if is_image:
|
||||
new_state.in_image = True
|
||||
token = {
|
||||
"type": "image",
|
||||
"children": self.render(new_state),
|
||||
"attrs": attrs,
|
||||
}
|
||||
else:
|
||||
new_state.in_link = True
|
||||
token = {
|
||||
"type": "link",
|
||||
"children": self.render(new_state),
|
||||
"attrs": attrs,
|
||||
}
|
||||
return token
|
||||
|
||||
def parse_auto_link(self, m: Match[str], state: InlineState) -> int:
|
||||
text = m.group(0)
|
||||
pos = m.end()
|
||||
if state.in_link:
|
||||
self.process_text(text, state)
|
||||
return pos
|
||||
|
||||
text = text[1:-1]
|
||||
self._add_auto_link(text, text, state)
|
||||
return pos
|
||||
|
||||
def parse_auto_email(self, m: Match[str], state: InlineState) -> int:
|
||||
text = m.group(0)
|
||||
pos = m.end()
|
||||
if state.in_link:
|
||||
self.process_text(text, state)
|
||||
return pos
|
||||
|
||||
text = text[1:-1]
|
||||
url = "mailto:" + text
|
||||
self._add_auto_link(url, text, state)
|
||||
return pos
|
||||
|
||||
def _add_auto_link(self, url: str, text: str, state: InlineState) -> None:
|
||||
state.append_token(
|
||||
{
|
||||
"type": "link",
|
||||
"children": [{"type": "text", "raw": text}],
|
||||
"attrs": {"url": escape_url(url)},
|
||||
}
|
||||
)
|
||||
|
||||
def parse_emphasis(self, m: Match[str], state: InlineState) -> int:
|
||||
pos = m.end()
|
||||
|
||||
marker = m.group(0)
|
||||
mlen = len(marker)
|
||||
if mlen == 1 and state.in_emphasis:
|
||||
state.append_token({"type": "text", "raw": marker})
|
||||
return pos
|
||||
elif mlen == 2 and state.in_strong:
|
||||
state.append_token({"type": "text", "raw": marker})
|
||||
return pos
|
||||
|
||||
_end_re = EMPHASIS_END_RE[marker]
|
||||
m1 = _end_re.search(state.src, pos)
|
||||
if not m1:
|
||||
state.append_token({"type": "text", "raw": marker})
|
||||
return pos
|
||||
|
||||
end_pos = m1.end()
|
||||
text = state.src[pos : end_pos - mlen]
|
||||
|
||||
prec_pos = self.precedence_scan(m, state, end_pos)
|
||||
if prec_pos:
|
||||
return prec_pos
|
||||
|
||||
new_state = state.copy()
|
||||
new_state.src = text
|
||||
if mlen == 1:
|
||||
new_state.in_emphasis = True
|
||||
children = self.render(new_state)
|
||||
state.append_token({"type": "emphasis", "children": children})
|
||||
elif mlen == 2:
|
||||
new_state.in_strong = True
|
||||
children = self.render(new_state)
|
||||
state.append_token({"type": "strong", "children": children})
|
||||
else:
|
||||
new_state.in_emphasis = True
|
||||
new_state.in_strong = True
|
||||
|
||||
children = [{"type": "strong", "children": self.render(new_state)}]
|
||||
state.append_token(
|
||||
{
|
||||
"type": "emphasis",
|
||||
"children": children,
|
||||
}
|
||||
)
|
||||
return end_pos
|
||||
|
||||
def parse_codespan(self, m: Match[str], state: InlineState) -> int:
|
||||
marker = m.group(0)
|
||||
# require same marker with same length at end
|
||||
|
||||
pattern = re.compile(r"(.*?[^`])" + marker + r"(?!`)", re.S)
|
||||
|
||||
pos = m.end()
|
||||
m2 = pattern.match(state.src, pos)
|
||||
if m2:
|
||||
end_pos = m2.end()
|
||||
code = m2.group(1)
|
||||
# Line endings are treated like spaces
|
||||
code = code.replace("\n", " ")
|
||||
if len(code.strip()):
|
||||
if code.startswith(" ") and code.endswith(" "):
|
||||
code = code[1:-1]
|
||||
state.append_token({"type": "codespan", "raw": code})
|
||||
return end_pos
|
||||
else:
|
||||
state.append_token({"type": "text", "raw": marker})
|
||||
return pos
|
||||
|
||||
def parse_linebreak(self, m: Match[str], state: InlineState) -> int:
|
||||
state.append_token({"type": "linebreak"})
|
||||
return m.end()
|
||||
|
||||
def parse_softbreak(self, m: Match[str], state: InlineState) -> int:
|
||||
state.append_token({"type": "softbreak"})
|
||||
return m.end()
|
||||
|
||||
def parse_inline_html(self, m: Match[str], state: InlineState) -> int:
|
||||
end_pos = m.end()
|
||||
html = m.group(0)
|
||||
state.append_token({"type": "inline_html", "raw": html})
|
||||
if html.startswith(("<a ", "<a>", "<A ", "<A>")):
|
||||
state.in_link = True
|
||||
elif html.startswith(("</a ", "</a>", "</A ", "</A>")):
|
||||
state.in_link = False
|
||||
return end_pos
|
||||
|
||||
def process_text(self, text: str, state: InlineState) -> None:
|
||||
state.append_token({"type": "text", "raw": text})
|
||||
|
||||
def parse(self, state: InlineState) -> List[Dict[str, Any]]:
|
||||
pos = 0
|
||||
sc = self.compile_sc()
|
||||
while pos < len(state.src):
|
||||
m = sc.search(state.src, pos)
|
||||
if not m:
|
||||
break
|
||||
|
||||
end_pos = m.start()
|
||||
if end_pos > pos:
|
||||
hole = state.src[pos:end_pos]
|
||||
self.process_text(hole, state)
|
||||
|
||||
new_pos = self.parse_method(m, state)
|
||||
if not new_pos:
|
||||
# move cursor 1 character forward
|
||||
pos = end_pos + 1
|
||||
hole = state.src[end_pos:pos]
|
||||
self.process_text(hole, state)
|
||||
else:
|
||||
pos = new_pos
|
||||
|
||||
if pos == 0:
|
||||
# special case, just pure text
|
||||
self.process_text(state.src, state)
|
||||
elif pos < len(state.src):
|
||||
self.process_text(state.src[pos:], state)
|
||||
return state.tokens
|
||||
|
||||
def precedence_scan(
|
||||
self,
|
||||
m: Match[str],
|
||||
state: InlineState,
|
||||
end_pos: int,
|
||||
rules: Optional[List[str]] = None,
|
||||
) -> Optional[int]:
|
||||
if rules is None:
|
||||
rules = ["codespan", "link", "prec_auto_link", "prec_inline_html"]
|
||||
|
||||
mark_pos = m.end()
|
||||
sc = self.compile_sc(rules)
|
||||
m1 = sc.search(state.src, mark_pos, end_pos)
|
||||
if not m1:
|
||||
return None
|
||||
|
||||
lastgroup = m1.lastgroup
|
||||
if not lastgroup:
|
||||
return None
|
||||
rule_name = lastgroup.replace("prec_", "")
|
||||
sc = self.compile_sc([rule_name])
|
||||
m2 = sc.match(state.src, m1.start())
|
||||
if not m2:
|
||||
return None
|
||||
|
||||
func = self._methods[rule_name]
|
||||
new_state = state.copy()
|
||||
new_state.src = state.src
|
||||
m2_pos = func(m2, new_state)
|
||||
if not m2_pos or m2_pos < end_pos:
|
||||
return None
|
||||
|
||||
raw_text = state.src[m.start() : m2.start()]
|
||||
state.append_token({"type": "text", "raw": raw_text})
|
||||
for token in new_state.tokens:
|
||||
state.append_token(token)
|
||||
return m2_pos
|
||||
|
||||
def render(self, state: InlineState) -> List[Dict[str, Any]]:
|
||||
self.parse(state)
|
||||
return state.tokens
|
||||
|
||||
def __call__(self, s: str, env: MutableMapping[str, Any]) -> List[Dict[str, Any]]:
|
||||
state = self.state_cls(env)
|
||||
state.src = s
|
||||
return self.render(state)
|
المرجع في مشكلة جديدة
حظر مستخدم