scrape github release page for changelog
This commit is contained in:
@@ -70,29 +70,72 @@ release_tag="${release_tag%\}}"
|
|||||||
release_tag="${release_tag#\'}"
|
release_tag="${release_tag#\'}"
|
||||||
release_tag="${release_tag%\'}"
|
release_tag="${release_tag%\'}"
|
||||||
|
|
||||||
api_url="https://api.github.com/repos/${RELEASE_API_REPO}/releases/tags/${release_tag}"
|
release_url="${LATEST_RELEASE_URL%/latest}/tag/${release_tag}"
|
||||||
|
release_html=$(curl -fsSL "$release_url" || true)
|
||||||
curl_headers=(
|
|
||||||
-H "Accept: application/vnd.github+json"
|
|
||||||
-H "X-GitHub-Api-Version: 2022-11-28"
|
|
||||||
)
|
|
||||||
if [ -n "${GITHUB_TOKEN:-}" ]; then
|
|
||||||
curl_headers+=( -H "Authorization: Bearer ${GITHUB_TOKEN}" )
|
|
||||||
fi
|
|
||||||
|
|
||||||
api_response=$(curl -sS -w '\n%{http_code}' "${curl_headers[@]}" "$api_url" || true)
|
|
||||||
api_body=$(printf '%s\n' "$api_response" | sed '$d')
|
|
||||||
api_code=$(printf '%s\n' "$api_response" | tail -n1)
|
|
||||||
|
|
||||||
release_notes=""
|
release_notes=""
|
||||||
if [ "$api_code" = "200" ]; then
|
if [ -n "$release_html" ]; then
|
||||||
release_notes=$(printf '%s' "$api_body" | "$PYTHON_BIN" -c 'import json,sys; d=json.load(sys.stdin); print((d.get("body") or "").strip())' || true)
|
release_notes=$(printf '%s' "$release_html" | "$PYTHON_BIN" -c '
|
||||||
|
import re, sys
|
||||||
|
from html.parser import HTMLParser
|
||||||
|
|
||||||
|
html = sys.stdin.read()
|
||||||
|
|
||||||
|
m = re.search(r"<div[^>]*data-test-selector=\"body-content\"[^>]*class=\"[^\"]*markdown-body[^\"]*\"[^>]*>(.*?)</div>", html, re.S)
|
||||||
|
if not m:
|
||||||
|
print("")
|
||||||
|
raise SystemExit(0)
|
||||||
|
|
||||||
|
fragment = m.group(1)
|
||||||
|
|
||||||
|
class Extractor(HTMLParser):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.out = []
|
||||||
|
self.in_code = False
|
||||||
|
|
||||||
|
def handle_starttag(self, tag, attrs):
|
||||||
|
if tag in ("br",):
|
||||||
|
self.out.append("\n")
|
||||||
|
elif tag in ("p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "li"):
|
||||||
|
if self.out and not self.out[-1].endswith("\n"):
|
||||||
|
self.out.append("\n")
|
||||||
|
elif tag in ("pre",):
|
||||||
|
if self.out and not self.out[-1].endswith("\n"):
|
||||||
|
self.out.append("\n")
|
||||||
|
self.out.append("```\n")
|
||||||
|
self.in_code = True
|
||||||
|
elif tag in ("code",) and not self.in_code:
|
||||||
|
self.out.append("`")
|
||||||
|
|
||||||
|
def handle_endtag(self, tag):
|
||||||
|
if tag in ("p", "div", "li", "h1", "h2", "h3", "h4", "h5", "h6"):
|
||||||
|
if not self.out or not self.out[-1].endswith("\n"):
|
||||||
|
self.out.append("\n")
|
||||||
|
elif tag in ("pre",):
|
||||||
|
if not self.out or not self.out[-1].endswith("\n"):
|
||||||
|
self.out.append("\n")
|
||||||
|
self.out.append("```\n")
|
||||||
|
self.in_code = False
|
||||||
|
elif tag in ("code",) and not self.in_code:
|
||||||
|
self.out.append("`")
|
||||||
|
|
||||||
|
def handle_data(self, data):
|
||||||
|
if data:
|
||||||
|
self.out.append(data)
|
||||||
|
|
||||||
|
parser = Extractor()
|
||||||
|
parser.feed(fragment)
|
||||||
|
text = "".join(parser.out)
|
||||||
|
text = re.sub(r"\n{3,}", "\n\n", text)
|
||||||
|
print(text.strip())
|
||||||
|
' || true)
|
||||||
else
|
else
|
||||||
echo "warning: failed to fetch release notes from GitHub API (status=$api_code, url=$api_url)"
|
echo "warning: failed to fetch release page ${release_url}"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -z "$release_notes" ]; then
|
if [ -z "$release_notes" ]; then
|
||||||
release_notes="_No changelog found in upstream release notes. Check ${LATEST_RELEASE_URL%/latest}/tag/${release_tag}._"
|
release_notes="_No changelog found on upstream release page. Check ${release_url}._"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
delimiter="CHANGELOG_$(date +%s%N)"
|
delimiter="CHANGELOG_$(date +%s%N)"
|
||||||
|
|||||||
Reference in New Issue
Block a user