Feature --check HTTPCODE added

This commit is contained in:
2026-04-16 18:55:57 +02:00
parent b3006978b3
commit c8d4128b79
4 changed files with 73 additions and 12 deletions

1
.gitignore vendored
View File

@@ -41,3 +41,4 @@ Thumbs.db
# Local test artifacts
test_preview.md
blog/

View File

@@ -38,6 +38,8 @@ mdlink .
- `--timeout FLOAT`
Per-request timeout in seconds (default: `10.0`).
- `--check CODE`
Report only selected HTTP status codes. Repeat option for multiple codes (for example `--check 404` or `--check 301 --check 404`).
## Interactive Redirect Rewrite

View File

@@ -1,6 +1,6 @@
from __future__ import annotations
from typing import Iterable
from typing import Callable, Iterable, Optional
import httpx
@@ -29,11 +29,12 @@ class LinkChecker:
def check(self, url: str) -> LinkCheckResult:
try:
response = self._client.get(url)
original_status = response.history[0].status_code if response.history else response.status_code
final_url = str(response.url)
redirected = final_url != url
return LinkCheckResult(
original_url=url,
status_code=response.status_code,
status_code=original_status,
final_url=final_url,
redirected=redirected,
error=None,
@@ -47,8 +48,16 @@ class LinkChecker:
error=str(exc),
)
def check_many(self, urls: Iterable[str]) -> dict[str, LinkCheckResult]:
def check_many(
self,
urls: Iterable[str],
progress_callback: Optional[Callable[[int, int, str], None]] = None,
) -> dict[str, LinkCheckResult]:
results: dict[str, LinkCheckResult] = {}
for url in unique_preserve_order(urls):
unique_urls = unique_preserve_order(urls)
total = len(unique_urls)
for index, url in enumerate(unique_urls, start=1):
results[url] = self.check(url)
if progress_callback is not None:
progress_callback(index, total, url)
return results

View File

@@ -19,10 +19,28 @@ def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(prog="mdlink", description="Scan Markdown files and validate links.")
parser.add_argument("path", type=Path, help="Directory or Markdown file to scan")
parser.add_argument("--timeout", type=float, default=10.0, help="Request timeout in seconds")
parser.add_argument(
"--check",
dest="check_codes",
type=int,
action="append",
metavar="CODE",
help="Report only selected HTTP status codes. Repeat for multiple values (e.g. --check 404 --check 301).",
)
return parser.parse_args()
def _build_report_table(records: list[LinkRecord], checks: dict[str, LinkCheckResult]) -> Table:
def _is_listed_result(result: LinkCheckResult, check_codes: Optional[set[int]]) -> bool:
if check_codes:
return result.status_code is not None and result.status_code in check_codes
return result.should_report
def _build_report_table(
records: list[LinkRecord],
checks: dict[str, LinkCheckResult],
check_codes: Optional[set[int]] = None,
) -> Table:
table = Table(title="Non-200 Links")
table.add_column("file")
table.add_column("line", justify="right")
@@ -32,7 +50,7 @@ def _build_report_table(records: list[LinkRecord], checks: dict[str, LinkCheckRe
for record in records:
result = checks[record.url]
if not result.should_report:
if not _is_listed_result(result=result, check_codes=check_codes):
continue
status_value = str(result.status_code) if result.status_code is not None else f"ERR: {result.error}"
table.add_row(
@@ -45,10 +63,16 @@ def _build_report_table(records: list[LinkRecord], checks: dict[str, LinkCheckRe
return table
def _collect_redirects(records: list[LinkRecord], checks: dict[str, LinkCheckResult]) -> list[tuple[LinkRecord, LinkCheckResult]]:
def _collect_redirects(
records: list[LinkRecord],
checks: dict[str, LinkCheckResult],
check_codes: Optional[set[int]] = None,
) -> list[tuple[LinkRecord, LinkCheckResult]]:
redirects: list[tuple[LinkRecord, LinkCheckResult]] = []
for record in records:
result = checks[record.url]
if not _is_listed_result(result=result, check_codes=check_codes):
continue
if not result.redirected:
continue
if not result.final_url:
@@ -80,6 +104,7 @@ def _cached_check(
def _collect_https_candidates(
records: list[LinkRecord],
checks: dict[str, LinkCheckResult],
check_codes: Optional[set[int]] = None,
) -> list[tuple[LinkRecord, str]]:
candidates: list[tuple[LinkRecord, str]] = []
seen: set[tuple[Path, str]] = set()
@@ -89,6 +114,10 @@ def _collect_https_candidates(
if not _is_http_url(record.url):
continue
original_check = checks.get(record.url)
if original_check is None:
continue
if not _is_listed_result(result=original_check, check_codes=check_codes):
continue
if original_check and original_check.redirected:
continue
key = (record.file_path, record.url)
@@ -102,6 +131,7 @@ def _collect_https_candidates(
def _handle_rewrites(
records: list[LinkRecord],
checks: dict[str, LinkCheckResult],
check_codes: Optional[set[int]],
redirects: list[tuple[LinkRecord, LinkCheckResult]],
checker: LinkChecker,
editor: ASTMarkdownEditor,
@@ -139,7 +169,7 @@ def _handle_rewrites(
continue
replacements_by_file[record.file_path][record.url] = final_url
https_candidates = _collect_https_candidates(records=records, checks=checks)
https_candidates = _collect_https_candidates(records=records, checks=checks, check_codes=check_codes)
if https_candidates:
console.print("\n[bold]HTTPS upgrade candidates[/bold]")
@@ -183,6 +213,7 @@ def _handle_rewrites(
def main() -> None:
args = parse_args()
console = Console()
check_codes = set(args.check_codes) if args.check_codes else None
scanner = MarkdownScanner()
records = scanner.scan_path(args.path)
@@ -192,18 +223,36 @@ def main() -> None:
urls = unique_preserve_order(record.url for record in records)
with LinkChecker(timeout=args.timeout) as checker:
checks = checker.check_many(urls)
table = _build_report_table(records, checks)
total_urls = len(urls)
console.print(f"Checking {total_urls} unique URLs...")
def progress(current: int, total: int, url: str) -> None:
_ = url
if current == 1 or current % 25 == 0 or current == total:
console.print(f"[dim]Progress: {current}/{total}[/dim]")
try:
checks = checker.check_many(urls, progress_callback=progress)
except KeyboardInterrupt:
console.print("\n[yellow]Aborted by user during link checks.[/yellow]")
return
table = _build_report_table(records, checks, check_codes=check_codes)
if table.row_count:
console.print(table)
else:
console.print("No non-200 links found.")
if check_codes:
sorted_codes = ", ".join(str(code) for code in sorted(check_codes))
console.print(f"No links found with status code(s): {sorted_codes}.")
else:
console.print("No non-200 links found.")
redirects = _collect_redirects(records, checks)
redirects = _collect_redirects(records, checks, check_codes=check_codes)
editor = ASTMarkdownEditor()
_handle_rewrites(
records=records,
checks=checks,
check_codes=check_codes,
redirects=redirects,
checker=checker,
editor=editor,