From 269276003d599c3d3f5b1b1ed7eafe694d2ea964 Mon Sep 17 00:00:00 2001 From: "Thomas G. Lopes" Date: Fri, 13 Mar 2026 12:46:08 +0000 Subject: [PATCH] preserve markdown formatting in scraped changelog --- .gitea/scripts/update-appimage-nix.sh | 157 +++++++++++++++++++++----- 1 file changed, 127 insertions(+), 30 deletions(-) diff --git a/.gitea/scripts/update-appimage-nix.sh b/.gitea/scripts/update-appimage-nix.sh index 8a78795..affc347 100644 --- a/.gitea/scripts/update-appimage-nix.sh +++ b/.gitea/scripts/update-appimage-nix.sh @@ -22,7 +22,7 @@ release_tag_template="${release_tag_template//$'\r'/}" current_version=$($PYTHON_BIN - <<'PY' import re import os -p=os.environ['FILE'] +p=os.environ["FILE"] s=open(p).read() m=re.search(r'version\s*=\s*"([^"]+)";', s) print(m.group(1) if m else "") @@ -53,11 +53,11 @@ export NEW_HASH="$new_hash" "$PYTHON_BIN" - <<'PY' import os import re -p=os.environ['FILE'] +p=os.environ["FILE"] s=open(p).read() s=re.sub(r'version\s*=\s*"[^"]+"', f'version = "{os.environ["LATEST_VERSION"]}"', s, count=1) s=re.sub(r'hash\s*=\s*"[^"]+"', f'hash = "{os.environ["NEW_HASH"]}"', s, count=1) -open(p,'w').write(s) +open(p,"w").write(s) PY echo "updated=true" >> "$GITHUB_OUTPUT" @@ -76,7 +76,8 @@ release_html=$(curl -fsSL "$release_url" || true) release_notes="" if [ -n "$release_html" ]; then release_notes=$(printf '%s' "$release_html" | "$PYTHON_BIN" -c ' -import re, sys +import re +import sys from html.parser import HTMLParser html = sys.stdin.read() @@ -88,46 +89,142 @@ if not m: fragment = m.group(1) -class Extractor(HTMLParser): +class MdExtractor(HTMLParser): def __init__(self): super().__init__() self.out = [] - self.in_code = False + self.list_depth = 0 + self.in_pre = False + self.in_code_inline = False + self.link_stack = [] + + def _append(self, text): + self.out.append(text) + + def _ensure_newline(self): + if not self.out: + return + if not self.out[-1].endswith("\n"): + self.out.append("\n") + + def _ensure_blank_line(self): + if not self.out: + return + joined = "".join(self.out) + if joined.endswith("\n\n"): + return + if joined.endswith("\n"): + self.out.append("\n") + else: + self.out.append("\n\n") def handle_starttag(self, tag, attrs): - if tag in ("br",): - self.out.append("\n") - elif tag in ("p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "li"): - if self.out and not self.out[-1].endswith("\n"): - self.out.append("\n") - elif tag in ("pre",): - if self.out and not self.out[-1].endswith("\n"): - self.out.append("\n") - self.out.append("```\n") - self.in_code = True - elif tag in ("code",) and not self.in_code: - self.out.append("`") + attrs_d = dict(attrs) + + if tag in ("h1", "h2", "h3", "h4", "h5", "h6"): + self._ensure_blank_line() + level = int(tag[1]) + self._append("#" * level + " ") + return + + if tag in ("p", "div"): + self._ensure_blank_line() + return + + if tag in ("ul", "ol"): + self._ensure_blank_line() + self.list_depth += 1 + return + + if tag == "li": + self._ensure_newline() + indent = " " * max(self.list_depth - 1, 0) + self._append(f"{indent}- ") + return + + if tag == "br": + self._append("\n") + return + + if tag == "pre": + self._ensure_blank_line() + self._append("```\n") + self.in_pre = True + return + + if tag == "code": + if not self.in_pre: + self._append("`") + self.in_code_inline = True + return + + if tag == "a": + href = attrs_d.get("href", "") + self.link_stack.append(href) + self._append("[") + return + + if tag in ("strong", "b"): + self._append("**") + return + + if tag in ("em", "i"): + self._append("*") + return def handle_endtag(self, tag): - if tag in ("p", "div", "li", "h1", "h2", "h3", "h4", "h5", "h6"): - if not self.out or not self.out[-1].endswith("\n"): - self.out.append("\n") - elif tag in ("pre",): - if not self.out or not self.out[-1].endswith("\n"): - self.out.append("\n") - self.out.append("```\n") - self.in_code = False - elif tag in ("code",) and not self.in_code: - self.out.append("`") + if tag in ("h1", "h2", "h3", "h4", "h5", "h6", "p", "div"): + self._ensure_blank_line() + return + + if tag in ("ul", "ol"): + self.list_depth = max(self.list_depth - 1, 0) + self._ensure_blank_line() + return + + if tag == "li": + self._ensure_newline() + return + + if tag == "pre": + self._ensure_newline() + self._append("```\n\n") + self.in_pre = False + return + + if tag == "code": + if not self.in_pre and self.in_code_inline: + self._append("`") + self.in_code_inline = False + return + + if tag == "a": + href = self.link_stack.pop() if self.link_stack else "" + if href: + self._append("](" + + href + + ")") + else: + self._append("]") + return + + if tag in ("strong", "b"): + self._append("**") + return + + if tag in ("em", "i"): + self._append("*") + return def handle_data(self, data): if data: - self.out.append(data) + self._append(data) -parser = Extractor() +parser = MdExtractor() parser.feed(fragment) text = "".join(parser.out) text = re.sub(r"\n{3,}", "\n\n", text) +text = re.sub(r"[ \t]+\n", "\n", text) print(text.strip()) ' || true) else