Feature --check HTTPCODE added
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -41,3 +41,4 @@ Thumbs.db
|
||||
|
||||
# Local test artifacts
|
||||
test_preview.md
|
||||
blog/
|
||||
|
||||
@@ -38,6 +38,8 @@ mdlink .
|
||||
|
||||
- `--timeout FLOAT`
|
||||
Per-request timeout in seconds (default: `10.0`).
|
||||
- `--check CODE`
|
||||
Report only selected HTTP status codes. Repeat option for multiple codes (for example `--check 404` or `--check 301 --check 404`).
|
||||
|
||||
## Interactive Redirect Rewrite
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Iterable
|
||||
from typing import Callable, Iterable, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
@@ -29,11 +29,12 @@ class LinkChecker:
|
||||
def check(self, url: str) -> LinkCheckResult:
|
||||
try:
|
||||
response = self._client.get(url)
|
||||
original_status = response.history[0].status_code if response.history else response.status_code
|
||||
final_url = str(response.url)
|
||||
redirected = final_url != url
|
||||
return LinkCheckResult(
|
||||
original_url=url,
|
||||
status_code=response.status_code,
|
||||
status_code=original_status,
|
||||
final_url=final_url,
|
||||
redirected=redirected,
|
||||
error=None,
|
||||
@@ -47,8 +48,16 @@ class LinkChecker:
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
def check_many(self, urls: Iterable[str]) -> dict[str, LinkCheckResult]:
|
||||
def check_many(
|
||||
self,
|
||||
urls: Iterable[str],
|
||||
progress_callback: Optional[Callable[[int, int, str], None]] = None,
|
||||
) -> dict[str, LinkCheckResult]:
|
||||
results: dict[str, LinkCheckResult] = {}
|
||||
for url in unique_preserve_order(urls):
|
||||
unique_urls = unique_preserve_order(urls)
|
||||
total = len(unique_urls)
|
||||
for index, url in enumerate(unique_urls, start=1):
|
||||
results[url] = self.check(url)
|
||||
if progress_callback is not None:
|
||||
progress_callback(index, total, url)
|
||||
return results
|
||||
|
||||
@@ -19,10 +19,28 @@ def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(prog="mdlink", description="Scan Markdown files and validate links.")
|
||||
parser.add_argument("path", type=Path, help="Directory or Markdown file to scan")
|
||||
parser.add_argument("--timeout", type=float, default=10.0, help="Request timeout in seconds")
|
||||
parser.add_argument(
|
||||
"--check",
|
||||
dest="check_codes",
|
||||
type=int,
|
||||
action="append",
|
||||
metavar="CODE",
|
||||
help="Report only selected HTTP status codes. Repeat for multiple values (e.g. --check 404 --check 301).",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def _build_report_table(records: list[LinkRecord], checks: dict[str, LinkCheckResult]) -> Table:
|
||||
def _is_listed_result(result: LinkCheckResult, check_codes: Optional[set[int]]) -> bool:
|
||||
if check_codes:
|
||||
return result.status_code is not None and result.status_code in check_codes
|
||||
return result.should_report
|
||||
|
||||
|
||||
def _build_report_table(
|
||||
records: list[LinkRecord],
|
||||
checks: dict[str, LinkCheckResult],
|
||||
check_codes: Optional[set[int]] = None,
|
||||
) -> Table:
|
||||
table = Table(title="Non-200 Links")
|
||||
table.add_column("file")
|
||||
table.add_column("line", justify="right")
|
||||
@@ -32,7 +50,7 @@ def _build_report_table(records: list[LinkRecord], checks: dict[str, LinkCheckRe
|
||||
|
||||
for record in records:
|
||||
result = checks[record.url]
|
||||
if not result.should_report:
|
||||
if not _is_listed_result(result=result, check_codes=check_codes):
|
||||
continue
|
||||
status_value = str(result.status_code) if result.status_code is not None else f"ERR: {result.error}"
|
||||
table.add_row(
|
||||
@@ -45,10 +63,16 @@ def _build_report_table(records: list[LinkRecord], checks: dict[str, LinkCheckRe
|
||||
return table
|
||||
|
||||
|
||||
def _collect_redirects(records: list[LinkRecord], checks: dict[str, LinkCheckResult]) -> list[tuple[LinkRecord, LinkCheckResult]]:
|
||||
def _collect_redirects(
|
||||
records: list[LinkRecord],
|
||||
checks: dict[str, LinkCheckResult],
|
||||
check_codes: Optional[set[int]] = None,
|
||||
) -> list[tuple[LinkRecord, LinkCheckResult]]:
|
||||
redirects: list[tuple[LinkRecord, LinkCheckResult]] = []
|
||||
for record in records:
|
||||
result = checks[record.url]
|
||||
if not _is_listed_result(result=result, check_codes=check_codes):
|
||||
continue
|
||||
if not result.redirected:
|
||||
continue
|
||||
if not result.final_url:
|
||||
@@ -80,6 +104,7 @@ def _cached_check(
|
||||
def _collect_https_candidates(
|
||||
records: list[LinkRecord],
|
||||
checks: dict[str, LinkCheckResult],
|
||||
check_codes: Optional[set[int]] = None,
|
||||
) -> list[tuple[LinkRecord, str]]:
|
||||
candidates: list[tuple[LinkRecord, str]] = []
|
||||
seen: set[tuple[Path, str]] = set()
|
||||
@@ -89,6 +114,10 @@ def _collect_https_candidates(
|
||||
if not _is_http_url(record.url):
|
||||
continue
|
||||
original_check = checks.get(record.url)
|
||||
if original_check is None:
|
||||
continue
|
||||
if not _is_listed_result(result=original_check, check_codes=check_codes):
|
||||
continue
|
||||
if original_check and original_check.redirected:
|
||||
continue
|
||||
key = (record.file_path, record.url)
|
||||
@@ -102,6 +131,7 @@ def _collect_https_candidates(
|
||||
def _handle_rewrites(
|
||||
records: list[LinkRecord],
|
||||
checks: dict[str, LinkCheckResult],
|
||||
check_codes: Optional[set[int]],
|
||||
redirects: list[tuple[LinkRecord, LinkCheckResult]],
|
||||
checker: LinkChecker,
|
||||
editor: ASTMarkdownEditor,
|
||||
@@ -139,7 +169,7 @@ def _handle_rewrites(
|
||||
continue
|
||||
replacements_by_file[record.file_path][record.url] = final_url
|
||||
|
||||
https_candidates = _collect_https_candidates(records=records, checks=checks)
|
||||
https_candidates = _collect_https_candidates(records=records, checks=checks, check_codes=check_codes)
|
||||
if https_candidates:
|
||||
console.print("\n[bold]HTTPS upgrade candidates[/bold]")
|
||||
|
||||
@@ -183,6 +213,7 @@ def _handle_rewrites(
|
||||
def main() -> None:
|
||||
args = parse_args()
|
||||
console = Console()
|
||||
check_codes = set(args.check_codes) if args.check_codes else None
|
||||
|
||||
scanner = MarkdownScanner()
|
||||
records = scanner.scan_path(args.path)
|
||||
@@ -192,18 +223,36 @@ def main() -> None:
|
||||
|
||||
urls = unique_preserve_order(record.url for record in records)
|
||||
with LinkChecker(timeout=args.timeout) as checker:
|
||||
checks = checker.check_many(urls)
|
||||
table = _build_report_table(records, checks)
|
||||
total_urls = len(urls)
|
||||
console.print(f"Checking {total_urls} unique URLs...")
|
||||
|
||||
def progress(current: int, total: int, url: str) -> None:
|
||||
_ = url
|
||||
if current == 1 or current % 25 == 0 or current == total:
|
||||
console.print(f"[dim]Progress: {current}/{total}[/dim]")
|
||||
|
||||
try:
|
||||
checks = checker.check_many(urls, progress_callback=progress)
|
||||
except KeyboardInterrupt:
|
||||
console.print("\n[yellow]Aborted by user during link checks.[/yellow]")
|
||||
return
|
||||
|
||||
table = _build_report_table(records, checks, check_codes=check_codes)
|
||||
if table.row_count:
|
||||
console.print(table)
|
||||
else:
|
||||
if check_codes:
|
||||
sorted_codes = ", ".join(str(code) for code in sorted(check_codes))
|
||||
console.print(f"No links found with status code(s): {sorted_codes}.")
|
||||
else:
|
||||
console.print("No non-200 links found.")
|
||||
|
||||
redirects = _collect_redirects(records, checks)
|
||||
redirects = _collect_redirects(records, checks, check_codes=check_codes)
|
||||
editor = ASTMarkdownEditor()
|
||||
_handle_rewrites(
|
||||
records=records,
|
||||
checks=checks,
|
||||
check_codes=check_codes,
|
||||
redirects=redirects,
|
||||
checker=checker,
|
||||
editor=editor,
|
||||
|
||||
Reference in New Issue
Block a user