Building Project Files

هذا الالتزام موجود في:
ahmedgamalyousef
2025-09-27 23:34:33 +03:00
التزام bb279b263f
1831 ملفات معدلة مع 330295 إضافات و0 حذوفات

عرض الملف

@@ -0,0 +1,406 @@
import re
from typing import (
Any,
Dict,
List,
Match,
MutableMapping,
Optional,
)
from .core import InlineState, Parser
from .helpers import (
HTML_ATTRIBUTES,
HTML_TAGNAME,
PREVENT_BACKSLASH,
PUNCTUATION,
parse_link,
parse_link_label,
parse_link_text,
unescape_char,
)
from .util import escape_url, unikey
PAREN_END_RE = re.compile(r"\s*\)")
AUTO_EMAIL = (
r"""<[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9]"""
r"(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?"
r"(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*>"
)
INLINE_HTML = (
r"<" + HTML_TAGNAME + HTML_ATTRIBUTES + r"\s*/?>|" # open tag
r"</" + HTML_TAGNAME + r"\s*>|" # close tag
r"<!--(?!>|->)(?:(?!--)[\s\S])+?(?<!-)-->|" # comment
r"<\?[\s\S]+?\?>|" # script like <?php?>
r"<![A-Z][\s\S]+?>|" # doctype
r"<!\[CDATA[\s\S]+?\]\]>" # cdata
)
EMPHASIS_END_RE = {
"*": re.compile(r"(?:" + PREVENT_BACKSLASH + r"\\\*|[^\s*])\*(?!\*)"),
"_": re.compile(r"(?:" + PREVENT_BACKSLASH + r"\\_|[^\s_])_(?!_)\b"),
"**": re.compile(r"(?:" + PREVENT_BACKSLASH + r"\\\*|[^\s*])\*\*(?!\*)"),
"__": re.compile(r"(?:" + PREVENT_BACKSLASH + r"\\_|[^\s_])__(?!_)\b"),
"***": re.compile(r"(?:" + PREVENT_BACKSLASH + r"\\\*|[^\s*])\*\*\*(?!\*)"),
"___": re.compile(r"(?:" + PREVENT_BACKSLASH + r"\\_|[^\s_])___(?!_)\b"),
}
class InlineParser(Parser[InlineState]):
sc_flag = 0
state_cls = InlineState
#: linebreak leaves two spaces at the end of line
STD_LINEBREAK = r"(?:\\| {2,})\n\s*"
#: every new line becomes <br>
HARD_LINEBREAK = r" *\n\s*"
# we only need to find the start pattern of an inline token
SPECIFICATION = {
# e.g. \`, \$
"escape": r"(?:\\" + PUNCTUATION + ")+",
# `code, ```code
"codespan": r"`{1,}",
# *w, **w, _w, __w
"emphasis": r"\*{1,3}(?=[^\s*])|\b_{1,3}(?=[^\s_])",
# [link], ![img]
"link": r"!?\[",
# <https://example.com>. regex copied from commonmark.js
"auto_link": r"<[A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]*>",
"auto_email": AUTO_EMAIL,
"inline_html": INLINE_HTML,
"linebreak": STD_LINEBREAK,
"softbreak": HARD_LINEBREAK,
"prec_auto_link": r"<[A-Za-z][A-Za-z\d.+-]{1,31}:",
"prec_inline_html": r"</?" + HTML_TAGNAME + r"|<!|<\?",
}
DEFAULT_RULES = (
"escape",
"codespan",
"emphasis",
"link",
"auto_link",
"auto_email",
"inline_html",
"linebreak",
)
def __init__(self, hard_wrap: bool = False) -> None:
super(InlineParser, self).__init__()
self.hard_wrap = hard_wrap
# lazy add linebreak
if hard_wrap:
self.specification["linebreak"] = self.HARD_LINEBREAK
else:
self.rules.append("softbreak")
self._methods = {name: getattr(self, "parse_" + name) for name in self.rules}
def parse_escape(self, m: Match[str], state: InlineState) -> int:
text = m.group(0)
text = unescape_char(text)
state.append_token(
{
"type": "text",
"raw": text,
}
)
return m.end()
def parse_link(self, m: Match[str], state: InlineState) -> Optional[int]:
pos = m.end()
marker = m.group(0)
is_image = marker[0] == "!"
if is_image and state.in_image:
state.append_token({"type": "text", "raw": marker})
return pos
elif not is_image and state.in_link:
state.append_token({"type": "text", "raw": marker})
return pos
text = None
label, end_pos = parse_link_label(state.src, pos)
if label is None:
text, end_pos = parse_link_text(state.src, pos)
if text is None:
return None
assert end_pos is not None
if text is None:
text = label
assert text is not None
if end_pos >= len(state.src) and label is None:
return None
rules = ["codespan", "prec_auto_link", "prec_inline_html"]
prec_pos = self.precedence_scan(m, state, end_pos, rules)
if prec_pos:
return prec_pos
if end_pos < len(state.src):
c = state.src[end_pos]
if c == "(":
# standard link [text](<url> "title")
attrs, pos2 = parse_link(state.src, end_pos + 1)
if pos2:
token = self.__parse_link_token(is_image, text, attrs, state)
state.append_token(token)
return pos2
elif c == "[":
# standard ref link [text][label]
label2, pos2 = parse_link_label(state.src, end_pos + 1)
if pos2:
end_pos = pos2
if label2:
label = label2
if label is None:
return None
ref_links = state.env.get("ref_links")
if not ref_links:
return None
key = unikey(label)
env = ref_links.get(key)
if env:
attrs = {"url": env["url"], "title": env.get("title")}
token = self.__parse_link_token(is_image, text, attrs, state)
token["ref"] = key
token["label"] = label
state.append_token(token)
return end_pos
return None
def __parse_link_token(
self,
is_image: bool,
text: str,
attrs: Optional[Dict[str, Any]],
state: InlineState,
) -> Dict[str, Any]:
new_state = state.copy()
new_state.src = text
if is_image:
new_state.in_image = True
token = {
"type": "image",
"children": self.render(new_state),
"attrs": attrs,
}
else:
new_state.in_link = True
token = {
"type": "link",
"children": self.render(new_state),
"attrs": attrs,
}
return token
def parse_auto_link(self, m: Match[str], state: InlineState) -> int:
text = m.group(0)
pos = m.end()
if state.in_link:
self.process_text(text, state)
return pos
text = text[1:-1]
self._add_auto_link(text, text, state)
return pos
def parse_auto_email(self, m: Match[str], state: InlineState) -> int:
text = m.group(0)
pos = m.end()
if state.in_link:
self.process_text(text, state)
return pos
text = text[1:-1]
url = "mailto:" + text
self._add_auto_link(url, text, state)
return pos
def _add_auto_link(self, url: str, text: str, state: InlineState) -> None:
state.append_token(
{
"type": "link",
"children": [{"type": "text", "raw": text}],
"attrs": {"url": escape_url(url)},
}
)
def parse_emphasis(self, m: Match[str], state: InlineState) -> int:
pos = m.end()
marker = m.group(0)
mlen = len(marker)
if mlen == 1 and state.in_emphasis:
state.append_token({"type": "text", "raw": marker})
return pos
elif mlen == 2 and state.in_strong:
state.append_token({"type": "text", "raw": marker})
return pos
_end_re = EMPHASIS_END_RE[marker]
m1 = _end_re.search(state.src, pos)
if not m1:
state.append_token({"type": "text", "raw": marker})
return pos
end_pos = m1.end()
text = state.src[pos : end_pos - mlen]
prec_pos = self.precedence_scan(m, state, end_pos)
if prec_pos:
return prec_pos
new_state = state.copy()
new_state.src = text
if mlen == 1:
new_state.in_emphasis = True
children = self.render(new_state)
state.append_token({"type": "emphasis", "children": children})
elif mlen == 2:
new_state.in_strong = True
children = self.render(new_state)
state.append_token({"type": "strong", "children": children})
else:
new_state.in_emphasis = True
new_state.in_strong = True
children = [{"type": "strong", "children": self.render(new_state)}]
state.append_token(
{
"type": "emphasis",
"children": children,
}
)
return end_pos
def parse_codespan(self, m: Match[str], state: InlineState) -> int:
marker = m.group(0)
# require same marker with same length at end
pattern = re.compile(r"(.*?[^`])" + marker + r"(?!`)", re.S)
pos = m.end()
m2 = pattern.match(state.src, pos)
if m2:
end_pos = m2.end()
code = m2.group(1)
# Line endings are treated like spaces
code = code.replace("\n", " ")
if len(code.strip()):
if code.startswith(" ") and code.endswith(" "):
code = code[1:-1]
state.append_token({"type": "codespan", "raw": code})
return end_pos
else:
state.append_token({"type": "text", "raw": marker})
return pos
def parse_linebreak(self, m: Match[str], state: InlineState) -> int:
state.append_token({"type": "linebreak"})
return m.end()
def parse_softbreak(self, m: Match[str], state: InlineState) -> int:
state.append_token({"type": "softbreak"})
return m.end()
def parse_inline_html(self, m: Match[str], state: InlineState) -> int:
end_pos = m.end()
html = m.group(0)
state.append_token({"type": "inline_html", "raw": html})
if html.startswith(("<a ", "<a>", "<A ", "<A>")):
state.in_link = True
elif html.startswith(("</a ", "</a>", "</A ", "</A>")):
state.in_link = False
return end_pos
def process_text(self, text: str, state: InlineState) -> None:
state.append_token({"type": "text", "raw": text})
def parse(self, state: InlineState) -> List[Dict[str, Any]]:
pos = 0
sc = self.compile_sc()
while pos < len(state.src):
m = sc.search(state.src, pos)
if not m:
break
end_pos = m.start()
if end_pos > pos:
hole = state.src[pos:end_pos]
self.process_text(hole, state)
new_pos = self.parse_method(m, state)
if not new_pos:
# move cursor 1 character forward
pos = end_pos + 1
hole = state.src[end_pos:pos]
self.process_text(hole, state)
else:
pos = new_pos
if pos == 0:
# special case, just pure text
self.process_text(state.src, state)
elif pos < len(state.src):
self.process_text(state.src[pos:], state)
return state.tokens
def precedence_scan(
self,
m: Match[str],
state: InlineState,
end_pos: int,
rules: Optional[List[str]] = None,
) -> Optional[int]:
if rules is None:
rules = ["codespan", "link", "prec_auto_link", "prec_inline_html"]
mark_pos = m.end()
sc = self.compile_sc(rules)
m1 = sc.search(state.src, mark_pos, end_pos)
if not m1:
return None
lastgroup = m1.lastgroup
if not lastgroup:
return None
rule_name = lastgroup.replace("prec_", "")
sc = self.compile_sc([rule_name])
m2 = sc.match(state.src, m1.start())
if not m2:
return None
func = self._methods[rule_name]
new_state = state.copy()
new_state.src = state.src
m2_pos = func(m2, new_state)
if not m2_pos or m2_pos < end_pos:
return None
raw_text = state.src[m.start() : m2.start()]
state.append_token({"type": "text", "raw": raw_text})
for token in new_state.tokens:
state.append_token(token)
return m2_pos
def render(self, state: InlineState) -> List[Dict[str, Any]]:
self.parse(state)
return state.tokens
def __call__(self, s: str, env: MutableMapping[str, Any]) -> List[Dict[str, Any]]:
state = self.state_cls(env)
state.src = s
return self.render(state)