feat: add persistent scan_index/url_policy state with --rescan and --reset-url-policy
This commit is contained in:
@@ -1,31 +1,119 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
OUTPUT_FILE="${1:-test.md}"
|
||||
STATE_FILE=".mdlink-state.json"
|
||||
TEST_DIR="testdata"
|
||||
TEST_FILE="${TEST_DIR}/one.markdown"
|
||||
BIN="./venv/bin/mdlink"
|
||||
if [[ ! -x "${BIN}" ]]; then
|
||||
BIN="./.venv/bin/mdlink"
|
||||
fi
|
||||
|
||||
cat > "${OUTPUT_FILE}" <<'EOF'
|
||||
# mdlink Test
|
||||
if command -v rg >/dev/null 2>&1; then
|
||||
FILTER_CMD="rg"
|
||||
else
|
||||
FILTER_CMD="grep -E"
|
||||
fi
|
||||
|
||||
generate_testdata() {
|
||||
mkdir -p "${TEST_DIR}"
|
||||
|
||||
cat > "${TEST_DIR}/one.markdown" <<'EOF'
|
||||
# Test Data One
|
||||
|
||||
- [ok](https://httpbin.org/status/200)
|
||||
- [redirect](http://github.com)
|
||||
- [broken](https://httpbin.org/status/404)
|
||||
- Naked: https://httpbin.org/status/500
|
||||
- 
|
||||
EOF
|
||||
|
||||
## Additional Cases
|
||||
cat > "${TEST_DIR}/two.markdown" <<'EOF'
|
||||
# Test Data Two
|
||||
|
||||
- [redirect with title](http://github.com "GitHub redirect")
|
||||
- [query and fragment](https://example.com/docs?lang=de#intro)
|
||||
- [duplicate redirect](http://github.com)
|
||||
- [duplicate redirect again](http://github.com)
|
||||
- [non-http scheme should be ignored](mailto:team@example.org)
|
||||
- [ftp should be ignored](ftp://speedtest.tele2.net)
|
||||
- [query and fragment](https://example.com/docs?lang=de#intro)
|
||||
- [inline code URL should not be a markdown link](`https://example.org/code`)
|
||||
- [image in text should be ignored] text before  text after
|
||||
- autolink angle brackets: <https://example.org/autolink>
|
||||
EOF
|
||||
|
||||
cat > "${TEST_DIR}/three.markdown" <<'EOF'
|
||||
# Test Data Three
|
||||
|
||||
- [parentheses in URL](https://en.wikipedia.org/wiki/Function_(mathematics))
|
||||
- [trailing punctuation in sentence] See https://example.org/docs, for details.
|
||||
- autolink angle brackets: <https://example.org/autolink>
|
||||
- [image in text should be ignored] text before  text after
|
||||
- [mailto should be ignored](mailto:team@example.org)
|
||||
- bare www should be ignored: www.example.org
|
||||
EOF
|
||||
|
||||
echo "Generated ${OUTPUT_FILE}"
|
||||
echo "Generated ${TEST_DIR}/one.markdown"
|
||||
echo "Generated ${TEST_DIR}/two.markdown"
|
||||
echo "Generated ${TEST_DIR}/three.markdown"
|
||||
}
|
||||
|
||||
run_state_checks() {
|
||||
echo "Using binary: ${BIN}"
|
||||
echo "Using filter: ${FILTER_CMD}"
|
||||
echo "Using data dir: ${TEST_DIR}"
|
||||
echo
|
||||
|
||||
echo "1) First directory scan"
|
||||
rm -f "${STATE_FILE}"
|
||||
${BIN} "${TEST_DIR}" --timeout 0.5 --check 404 | ${FILTER_CMD} "^Files total|^Checking " || true
|
||||
echo
|
||||
|
||||
echo "2) Second directory scan (should skip files via scan_index)"
|
||||
${BIN} "${TEST_DIR}" --timeout 0.5 --check 404 | ${FILTER_CMD} "^Files total|No new files" || true
|
||||
echo
|
||||
|
||||
echo "3) Single-file scan ignores state (should still scan file)"
|
||||
${BIN} "${TEST_FILE}" --timeout 0.5 --check 404 | ${FILTER_CMD} "^Files total|^Checking " || true
|
||||
echo
|
||||
|
||||
echo "4) url_policy ignore reduces checked URLs"
|
||||
cat > "${STATE_FILE}" <<'EOF'
|
||||
{
|
||||
"version": 1,
|
||||
"scan_index": {},
|
||||
"url_policy": {
|
||||
"testdata/one.markdown": [
|
||||
{
|
||||
"action": "ignore",
|
||||
"source": "https://httpbin.org/status/404",
|
||||
"target": "https://httpbin.org/status/404",
|
||||
"seen_at": "2026-04-17T12:00:00Z"
|
||||
}
|
||||
],
|
||||
"testdata/two.markdown": [
|
||||
{
|
||||
"action": "ignore",
|
||||
"source": "http://github.com",
|
||||
"target": "https://github.com/",
|
||||
"seen_at": "2026-04-17T12:00:01Z"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
EOF
|
||||
${BIN} "${TEST_DIR}" --rescan --timeout 0.5 --check 404 | ${FILTER_CMD} "^Files total|^Checking " || true
|
||||
echo
|
||||
|
||||
echo "5) --reset-url-policy brings ignored URL back into checks"
|
||||
${BIN} "${TEST_DIR}" --rescan --reset-url-policy --timeout 0.5 --check 404 | ${FILTER_CMD} "^Files total|^Checking " || true
|
||||
echo
|
||||
|
||||
echo "Done."
|
||||
}
|
||||
|
||||
generate_only=false
|
||||
if [[ "${1:-}" == "--generate-only" ]]; then
|
||||
generate_only=true
|
||||
fi
|
||||
|
||||
generate_testdata
|
||||
if [[ "${generate_only}" == false ]]; then
|
||||
run_state_checks
|
||||
fi
|
||||
|
||||
Reference in New Issue
Block a user