Feature --check HTTPCODE added
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -41,3 +41,4 @@ Thumbs.db
|
|||||||
|
|
||||||
# Local test artifacts
|
# Local test artifacts
|
||||||
test_preview.md
|
test_preview.md
|
||||||
|
blog/
|
||||||
|
|||||||
@@ -38,6 +38,8 @@ mdlink .
|
|||||||
|
|
||||||
- `--timeout FLOAT`
|
- `--timeout FLOAT`
|
||||||
Per-request timeout in seconds (default: `10.0`).
|
Per-request timeout in seconds (default: `10.0`).
|
||||||
|
- `--check CODE`
|
||||||
|
Report only selected HTTP status codes. Repeat option for multiple codes (for example `--check 404` or `--check 301 --check 404`).
|
||||||
|
|
||||||
## Interactive Redirect Rewrite
|
## Interactive Redirect Rewrite
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
from typing import Iterable
|
from typing import Callable, Iterable, Optional
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
@@ -29,11 +29,12 @@ class LinkChecker:
|
|||||||
def check(self, url: str) -> LinkCheckResult:
|
def check(self, url: str) -> LinkCheckResult:
|
||||||
try:
|
try:
|
||||||
response = self._client.get(url)
|
response = self._client.get(url)
|
||||||
|
original_status = response.history[0].status_code if response.history else response.status_code
|
||||||
final_url = str(response.url)
|
final_url = str(response.url)
|
||||||
redirected = final_url != url
|
redirected = final_url != url
|
||||||
return LinkCheckResult(
|
return LinkCheckResult(
|
||||||
original_url=url,
|
original_url=url,
|
||||||
status_code=response.status_code,
|
status_code=original_status,
|
||||||
final_url=final_url,
|
final_url=final_url,
|
||||||
redirected=redirected,
|
redirected=redirected,
|
||||||
error=None,
|
error=None,
|
||||||
@@ -47,8 +48,16 @@ class LinkChecker:
|
|||||||
error=str(exc),
|
error=str(exc),
|
||||||
)
|
)
|
||||||
|
|
||||||
def check_many(self, urls: Iterable[str]) -> dict[str, LinkCheckResult]:
|
def check_many(
|
||||||
|
self,
|
||||||
|
urls: Iterable[str],
|
||||||
|
progress_callback: Optional[Callable[[int, int, str], None]] = None,
|
||||||
|
) -> dict[str, LinkCheckResult]:
|
||||||
results: dict[str, LinkCheckResult] = {}
|
results: dict[str, LinkCheckResult] = {}
|
||||||
for url in unique_preserve_order(urls):
|
unique_urls = unique_preserve_order(urls)
|
||||||
|
total = len(unique_urls)
|
||||||
|
for index, url in enumerate(unique_urls, start=1):
|
||||||
results[url] = self.check(url)
|
results[url] = self.check(url)
|
||||||
|
if progress_callback is not None:
|
||||||
|
progress_callback(index, total, url)
|
||||||
return results
|
return results
|
||||||
|
|||||||
@@ -19,10 +19,28 @@ def parse_args() -> argparse.Namespace:
|
|||||||
parser = argparse.ArgumentParser(prog="mdlink", description="Scan Markdown files and validate links.")
|
parser = argparse.ArgumentParser(prog="mdlink", description="Scan Markdown files and validate links.")
|
||||||
parser.add_argument("path", type=Path, help="Directory or Markdown file to scan")
|
parser.add_argument("path", type=Path, help="Directory or Markdown file to scan")
|
||||||
parser.add_argument("--timeout", type=float, default=10.0, help="Request timeout in seconds")
|
parser.add_argument("--timeout", type=float, default=10.0, help="Request timeout in seconds")
|
||||||
|
parser.add_argument(
|
||||||
|
"--check",
|
||||||
|
dest="check_codes",
|
||||||
|
type=int,
|
||||||
|
action="append",
|
||||||
|
metavar="CODE",
|
||||||
|
help="Report only selected HTTP status codes. Repeat for multiple values (e.g. --check 404 --check 301).",
|
||||||
|
)
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
def _build_report_table(records: list[LinkRecord], checks: dict[str, LinkCheckResult]) -> Table:
|
def _is_listed_result(result: LinkCheckResult, check_codes: Optional[set[int]]) -> bool:
|
||||||
|
if check_codes:
|
||||||
|
return result.status_code is not None and result.status_code in check_codes
|
||||||
|
return result.should_report
|
||||||
|
|
||||||
|
|
||||||
|
def _build_report_table(
|
||||||
|
records: list[LinkRecord],
|
||||||
|
checks: dict[str, LinkCheckResult],
|
||||||
|
check_codes: Optional[set[int]] = None,
|
||||||
|
) -> Table:
|
||||||
table = Table(title="Non-200 Links")
|
table = Table(title="Non-200 Links")
|
||||||
table.add_column("file")
|
table.add_column("file")
|
||||||
table.add_column("line", justify="right")
|
table.add_column("line", justify="right")
|
||||||
@@ -32,7 +50,7 @@ def _build_report_table(records: list[LinkRecord], checks: dict[str, LinkCheckRe
|
|||||||
|
|
||||||
for record in records:
|
for record in records:
|
||||||
result = checks[record.url]
|
result = checks[record.url]
|
||||||
if not result.should_report:
|
if not _is_listed_result(result=result, check_codes=check_codes):
|
||||||
continue
|
continue
|
||||||
status_value = str(result.status_code) if result.status_code is not None else f"ERR: {result.error}"
|
status_value = str(result.status_code) if result.status_code is not None else f"ERR: {result.error}"
|
||||||
table.add_row(
|
table.add_row(
|
||||||
@@ -45,10 +63,16 @@ def _build_report_table(records: list[LinkRecord], checks: dict[str, LinkCheckRe
|
|||||||
return table
|
return table
|
||||||
|
|
||||||
|
|
||||||
def _collect_redirects(records: list[LinkRecord], checks: dict[str, LinkCheckResult]) -> list[tuple[LinkRecord, LinkCheckResult]]:
|
def _collect_redirects(
|
||||||
|
records: list[LinkRecord],
|
||||||
|
checks: dict[str, LinkCheckResult],
|
||||||
|
check_codes: Optional[set[int]] = None,
|
||||||
|
) -> list[tuple[LinkRecord, LinkCheckResult]]:
|
||||||
redirects: list[tuple[LinkRecord, LinkCheckResult]] = []
|
redirects: list[tuple[LinkRecord, LinkCheckResult]] = []
|
||||||
for record in records:
|
for record in records:
|
||||||
result = checks[record.url]
|
result = checks[record.url]
|
||||||
|
if not _is_listed_result(result=result, check_codes=check_codes):
|
||||||
|
continue
|
||||||
if not result.redirected:
|
if not result.redirected:
|
||||||
continue
|
continue
|
||||||
if not result.final_url:
|
if not result.final_url:
|
||||||
@@ -80,6 +104,7 @@ def _cached_check(
|
|||||||
def _collect_https_candidates(
|
def _collect_https_candidates(
|
||||||
records: list[LinkRecord],
|
records: list[LinkRecord],
|
||||||
checks: dict[str, LinkCheckResult],
|
checks: dict[str, LinkCheckResult],
|
||||||
|
check_codes: Optional[set[int]] = None,
|
||||||
) -> list[tuple[LinkRecord, str]]:
|
) -> list[tuple[LinkRecord, str]]:
|
||||||
candidates: list[tuple[LinkRecord, str]] = []
|
candidates: list[tuple[LinkRecord, str]] = []
|
||||||
seen: set[tuple[Path, str]] = set()
|
seen: set[tuple[Path, str]] = set()
|
||||||
@@ -89,6 +114,10 @@ def _collect_https_candidates(
|
|||||||
if not _is_http_url(record.url):
|
if not _is_http_url(record.url):
|
||||||
continue
|
continue
|
||||||
original_check = checks.get(record.url)
|
original_check = checks.get(record.url)
|
||||||
|
if original_check is None:
|
||||||
|
continue
|
||||||
|
if not _is_listed_result(result=original_check, check_codes=check_codes):
|
||||||
|
continue
|
||||||
if original_check and original_check.redirected:
|
if original_check and original_check.redirected:
|
||||||
continue
|
continue
|
||||||
key = (record.file_path, record.url)
|
key = (record.file_path, record.url)
|
||||||
@@ -102,6 +131,7 @@ def _collect_https_candidates(
|
|||||||
def _handle_rewrites(
|
def _handle_rewrites(
|
||||||
records: list[LinkRecord],
|
records: list[LinkRecord],
|
||||||
checks: dict[str, LinkCheckResult],
|
checks: dict[str, LinkCheckResult],
|
||||||
|
check_codes: Optional[set[int]],
|
||||||
redirects: list[tuple[LinkRecord, LinkCheckResult]],
|
redirects: list[tuple[LinkRecord, LinkCheckResult]],
|
||||||
checker: LinkChecker,
|
checker: LinkChecker,
|
||||||
editor: ASTMarkdownEditor,
|
editor: ASTMarkdownEditor,
|
||||||
@@ -139,7 +169,7 @@ def _handle_rewrites(
|
|||||||
continue
|
continue
|
||||||
replacements_by_file[record.file_path][record.url] = final_url
|
replacements_by_file[record.file_path][record.url] = final_url
|
||||||
|
|
||||||
https_candidates = _collect_https_candidates(records=records, checks=checks)
|
https_candidates = _collect_https_candidates(records=records, checks=checks, check_codes=check_codes)
|
||||||
if https_candidates:
|
if https_candidates:
|
||||||
console.print("\n[bold]HTTPS upgrade candidates[/bold]")
|
console.print("\n[bold]HTTPS upgrade candidates[/bold]")
|
||||||
|
|
||||||
@@ -183,6 +213,7 @@ def _handle_rewrites(
|
|||||||
def main() -> None:
|
def main() -> None:
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
console = Console()
|
console = Console()
|
||||||
|
check_codes = set(args.check_codes) if args.check_codes else None
|
||||||
|
|
||||||
scanner = MarkdownScanner()
|
scanner = MarkdownScanner()
|
||||||
records = scanner.scan_path(args.path)
|
records = scanner.scan_path(args.path)
|
||||||
@@ -192,18 +223,36 @@ def main() -> None:
|
|||||||
|
|
||||||
urls = unique_preserve_order(record.url for record in records)
|
urls = unique_preserve_order(record.url for record in records)
|
||||||
with LinkChecker(timeout=args.timeout) as checker:
|
with LinkChecker(timeout=args.timeout) as checker:
|
||||||
checks = checker.check_many(urls)
|
total_urls = len(urls)
|
||||||
table = _build_report_table(records, checks)
|
console.print(f"Checking {total_urls} unique URLs...")
|
||||||
|
|
||||||
|
def progress(current: int, total: int, url: str) -> None:
|
||||||
|
_ = url
|
||||||
|
if current == 1 or current % 25 == 0 or current == total:
|
||||||
|
console.print(f"[dim]Progress: {current}/{total}[/dim]")
|
||||||
|
|
||||||
|
try:
|
||||||
|
checks = checker.check_many(urls, progress_callback=progress)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
console.print("\n[yellow]Aborted by user during link checks.[/yellow]")
|
||||||
|
return
|
||||||
|
|
||||||
|
table = _build_report_table(records, checks, check_codes=check_codes)
|
||||||
if table.row_count:
|
if table.row_count:
|
||||||
console.print(table)
|
console.print(table)
|
||||||
else:
|
else:
|
||||||
console.print("No non-200 links found.")
|
if check_codes:
|
||||||
|
sorted_codes = ", ".join(str(code) for code in sorted(check_codes))
|
||||||
|
console.print(f"No links found with status code(s): {sorted_codes}.")
|
||||||
|
else:
|
||||||
|
console.print("No non-200 links found.")
|
||||||
|
|
||||||
redirects = _collect_redirects(records, checks)
|
redirects = _collect_redirects(records, checks, check_codes=check_codes)
|
||||||
editor = ASTMarkdownEditor()
|
editor = ASTMarkdownEditor()
|
||||||
_handle_rewrites(
|
_handle_rewrites(
|
||||||
records=records,
|
records=records,
|
||||||
checks=checks,
|
checks=checks,
|
||||||
|
check_codes=check_codes,
|
||||||
redirects=redirects,
|
redirects=redirects,
|
||||||
checker=checker,
|
checker=checker,
|
||||||
editor=editor,
|
editor=editor,
|
||||||
|
|||||||
Reference in New Issue
Block a user