#!/usr/bin/env bash set -euo pipefail STATE_FILE=".mdlink-state.json" TEST_DIR="testdata" TEST_FILE="${TEST_DIR}/one.markdown" BIN="./venv/bin/mdlink" if [[ ! -x "${BIN}" ]]; then BIN="./.venv/bin/mdlink" fi if command -v rg >/dev/null 2>&1; then FILTER_CMD="rg" else FILTER_CMD="grep -E" fi generate_testdata() { mkdir -p "${TEST_DIR}" cat > "${TEST_DIR}/one.markdown" <<'EOF' # Test Data One - [ok](https://httpbin.org/status/200) - [redirect](http://github.com) - [broken](https://httpbin.org/status/404) - Naked: https://httpbin.org/status/500 - ![img](https://httpbin.org/image/png) EOF cat > "${TEST_DIR}/two.markdown" <<'EOF' # Test Data Two - [redirect with title](http://github.com "GitHub redirect") - [duplicate redirect](http://github.com) - [ftp should be ignored](ftp://speedtest.tele2.net) - [query and fragment](https://example.com/docs?lang=de#intro) - [inline code URL should not be a markdown link](`https://example.org/code`) - autolink angle brackets: EOF cat > "${TEST_DIR}/three.markdown" <<'EOF' # Test Data Three - [parentheses in URL](https://en.wikipedia.org/wiki/Function_(mathematics)) - [trailing punctuation in sentence] See https://example.org/docs, for details. - [image in text should be ignored] text before ![logo](https://example.com/logo.png) text after - [mailto should be ignored](mailto:team@example.org) - bare www should be ignored: www.example.org EOF echo "Generated ${TEST_DIR}/one.markdown" echo "Generated ${TEST_DIR}/two.markdown" echo "Generated ${TEST_DIR}/three.markdown" } run_state_checks() { echo "Using binary: ${BIN}" echo "Using filter: ${FILTER_CMD}" echo "Using data dir: ${TEST_DIR}" echo echo "1) First directory scan" rm -f "${STATE_FILE}" ${BIN} "${TEST_DIR}" --timeout 0.5 --check 404 | ${FILTER_CMD} "^Files total|^Checking " || true echo echo "2) Second directory scan (should skip files via scan_index)" ${BIN} "${TEST_DIR}" --timeout 0.5 --check 404 | ${FILTER_CMD} "^Files total|No new files" || true echo echo "3) Single-file scan ignores state (should still scan file)" ${BIN} "${TEST_FILE}" --timeout 0.5 --check 404 | ${FILTER_CMD} "^Files total|^Checking " || true echo echo "4) url_policy ignore reduces checked URLs" cat > "${STATE_FILE}" <<'EOF' { "version": 1, "scan_index": {}, "url_policy": { "testdata/one.markdown": [ { "action": "ignore", "source": "https://httpbin.org/status/404", "target": "https://httpbin.org/status/404", "seen_at": "2026-04-17T12:00:00Z" } ], "testdata/two.markdown": [ { "action": "ignore", "source": "http://github.com", "target": "https://github.com/", "seen_at": "2026-04-17T12:00:01Z" } ] } } EOF ${BIN} "${TEST_DIR}" --rescan --timeout 0.5 --check 404 | ${FILTER_CMD} "^Files total|^Checking " || true echo echo "5) --reset-url-policy brings ignored URL back into checks" ${BIN} "${TEST_DIR}" --rescan --reset-url-policy --timeout 0.5 --check 404 | ${FILTER_CMD} "^Files total|^Checking " || true echo echo "Done." } generate_only=false if [[ "${1:-}" == "--generate-only" ]]; then generate_only=true fi generate_testdata if [[ "${generate_only}" == false ]]; then run_state_checks fi