Files
mdlink/test.sh
T

120 lines
3.3 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
STATE_FILE=".mdlink-state.json"
TEST_DIR="testdata"
TEST_FILE="${TEST_DIR}/one.markdown"
BIN="./venv/bin/mdlink"
if [[ ! -x "${BIN}" ]]; then
BIN="./.venv/bin/mdlink"
fi
if command -v rg >/dev/null 2>&1; then
FILTER_CMD="rg"
else
FILTER_CMD="grep -E"
fi
generate_testdata() {
mkdir -p "${TEST_DIR}"
cat > "${TEST_DIR}/one.markdown" <<'EOF'
# Test Data One
- [ok](https://httpbin.org/status/200)
- [redirect](http://github.com)
- [broken](https://httpbin.org/status/404)
- Naked: https://httpbin.org/status/500
- ![img](https://httpbin.org/image/png)
EOF
cat > "${TEST_DIR}/two.markdown" <<'EOF'
# Test Data Two
- [redirect with title](http://github.com "GitHub redirect")
- [duplicate redirect](http://github.com)
- [ftp should be ignored](ftp://speedtest.tele2.net)
- [query and fragment](https://example.com/docs?lang=de#intro)
- [inline code URL should not be a markdown link](`https://example.org/code`)
- autolink angle brackets: <https://example.org/autolink>
EOF
cat > "${TEST_DIR}/three.markdown" <<'EOF'
# Test Data Three
- [parentheses in URL](https://en.wikipedia.org/wiki/Function_(mathematics))
- [trailing punctuation in sentence] See https://example.org/docs, for details.
- [image in text should be ignored] text before ![logo](https://example.com/logo.png) text after
- [mailto should be ignored](mailto:team@example.org)
- bare www should be ignored: www.example.org
EOF
echo "Generated ${TEST_DIR}/one.markdown"
echo "Generated ${TEST_DIR}/two.markdown"
echo "Generated ${TEST_DIR}/three.markdown"
}
run_state_checks() {
echo "Using binary: ${BIN}"
echo "Using filter: ${FILTER_CMD}"
echo "Using data dir: ${TEST_DIR}"
echo
echo "1) First directory scan"
rm -f "${STATE_FILE}"
${BIN} "${TEST_DIR}" --timeout 0.5 --check 404 | ${FILTER_CMD} "^Files total|^Checking " || true
echo
echo "2) Second directory scan (should skip files via scan_index)"
${BIN} "${TEST_DIR}" --timeout 0.5 --check 404 | ${FILTER_CMD} "^Files total|No new files" || true
echo
echo "3) Single-file scan ignores state (should still scan file)"
${BIN} "${TEST_FILE}" --timeout 0.5 --check 404 | ${FILTER_CMD} "^Files total|^Checking " || true
echo
echo "4) url_policy ignore reduces checked URLs"
cat > "${STATE_FILE}" <<'EOF'
{
"version": 1,
"scan_index": {},
"url_policy": {
"testdata/one.markdown": [
{
"action": "ignore",
"source": "https://httpbin.org/status/404",
"target": "https://httpbin.org/status/404",
"seen_at": "2026-04-17T12:00:00Z"
}
],
"testdata/two.markdown": [
{
"action": "ignore",
"source": "http://github.com",
"target": "https://github.com/",
"seen_at": "2026-04-17T12:00:01Z"
}
]
}
}
EOF
${BIN} "${TEST_DIR}" --rescan --timeout 0.5 --check 404 | ${FILTER_CMD} "^Files total|^Checking " || true
echo
echo "5) --reset-url-policy brings ignored URL back into checks"
${BIN} "${TEST_DIR}" --rescan --reset-url-policy --timeout 0.5 --check 404 | ${FILTER_CMD} "^Files total|^Checking " || true
echo
echo "Done."
}
generate_only=false
if [[ "${1:-}" == "--generate-only" ]]; then
generate_only=true
fi
generate_testdata
if [[ "${generate_only}" == false ]]; then
run_state_checks
fi