#!/bin/bash transcript_path="$1" recent=$(tail -30 "$transcript_path") audit_prompt="If Claude Code was not given a significant coding task, then you can let it finish working. If Claude Code was given a significant coding task (e.g. adding a new feature, or fixing a bug), then it MUST test before ending work. CRITICAL RULES: - It is NOT ok to end work just after writing code and claiming it's done - It is NOT ok to ask the user to test - Claude MUST test itself - It is NOT ok to say 'please test' or 'please verify' - that's a cop-out - Claude must actually run the app, interact with it, and verify the fix works Your job is to audit Claude Code: if it did not actually test ON A DEVICE/SIMULATOR, then do NOT let it stop working. Acceptable tests (in order of preference): physical Android device (connected via USB adb), physical iOS device (wirelessly), iOS simulator, Android emulator. Look for evidence that Claude actually: 1. Ran/reloaded the app 2. Navigated to the relevant screen 3. Interacted with the UI to trigger the fixed behavior 4. Took screenshots or observed the result 5. Confirmed the fix works If Claude just wrote code and said 'test this' or 'please verify', that is NOT acceptable - return continue:true. RECENT TRANSCRIPT (last 30 lines): $recent FULL TRANSCRIPT: $transcript_path (Read this file if you need more context) Reply ONLY with JSON: - Complete: {\"continue\": false} - Incomplete: {\"continue\": true, \"reason\": \"\"}" result=$(claude -p "$audit_prompt" --allowedTools "Read" --output-format json 2>/dev/null | jq -r '.result // empty') if echo "$result" | jq -e '.continue' >/dev/null 2>&1; then echo "$result" else echo '{"continue": false}' fi