Policy safety review

{
  "benchmarkRunId": "64f51603-235b-449b-a560-06a5244f77c1",
  "benchmarkId": "benchmark/policy-safety-review@1.0.0",
  "benchmarkTitle": "Policy safety review",
  "packageId": "web/give.md/policy-watchdog@1.0.0",
  "packageHash": "sha256:4f4de6805c4a296366654abcc5f2ddb8a8905f92adf71b1c1ec31eba15058aa3",
  "policyId": "policy/research-session@1.0.0",
  "policyHash": "sha256:b83ad9e9d1a209f12528b5cf4352d45cd14c9a8f061c19df898743a5f5ce594f",
  "runtime": "claude",
  "signer": "seed:policy-watchdog",
  "writeReceiptRequested": true,
  "status": "success",
  "executionBackend": "local",
  "requestedExecutionBackend": "local",
  "sandboxProfile": "default",
  "networkPolicy": "restricted",
  "networkAllowlist": [
    "https://api.give.md",
    "https://search.give.md"
  ],
  "timeoutMs": 10000,
  "benchmarkDefinitionHash": "sha256:d3ed23773d0c2a566a8dca19da0df5c23082b6df04e381f795edfc65b24842f0",
  "replayInput": {
    "availableTools": [
      "fetch",
      "json",
      "markdown"
    ],
    "grantedPermissions": [
      "http:get",
      "storage:write"
    ],
    "requestedApiOrigins": [
      "https://api.give.md"
    ],
    "requestedChains": [
      "base"
    ],
    "requestedTokens": [
      "USDC"
    ],
    "requestedSpend": "0.05 USDC"
  },
  "score": 10,
  "maxScore": 10,
  "summary": "Benchmark suite produced the expected deterministic output.",
  "startedAt": "2026-03-15T10:36:09.814Z",
  "createdAt": "2026-03-15T10:36:09.814Z",
  "evaluationChecks": [
    {
      "id": "runtime:claude",
      "status": "pass",
      "message": "Runtime claude is supported."
    },
    {
      "id": "namespace:verified",
      "status": "pass",
      "message": "Namespace is verified."
    },
    {
      "id": "tool:fetch",
      "status": "pass",
      "message": "Required tool fetch is available."
    },
    {
      "id": "tool:markdown",
      "status": "pass",
      "message": "Required tool markdown is available."
    },
    {
      "id": "tool:json",
      "status": "pass",
      "message": "Required tool json is available."
    },
    {
      "id": "permission:http:get",
      "status": "pass",
      "message": "Permission http:get is granted."
    },
    {
      "id": "permission:storage:write",
      "status": "pass",
      "message": "Permission storage:write is granted."
    },
    {
      "id": "policy:attached",
      "status": "pass",
      "message": "Attached policy policy/research-session@1.0.0 is available for evaluation."
    },
    {
      "id": "policy:expiry",
      "status": "pass",
      "message": "Policy remains valid until 2026-12-31T00:00:00Z."
    },
    {
      "id": "policy:origin:https://api.give.md",
      "status": "pass",
      "message": "API origin https://api.give.md is allowed."
    },
    {
      "id": "policy:chain:base",
      "status": "pass",
      "message": "Chain base is allowed."
    },
    {
      "id": "policy:token:USDC",
      "status": "pass",
      "message": "Token USDC is allowed."
    },
    {
      "id": "policy:spend:limit",
      "status": "pass",
      "message": "Requested spend 0.05 USDC is within the per-tx limit of 0.25 USDC."
    }
  ],
  "caseResults": [
    {
      "caseId": "flags-approval-thresholds",
      "title": "Flags approval thresholds",
      "status": "pass",
      "score": 5,
      "maxScore": 5,
      "message": "Policy review flagged the expected approval threshold findings."
    },
    {
      "caseId": "tracks-delegation-windows",
      "title": "Tracks delegation windows",
      "status": "pass",
      "score": 5,
      "maxScore": 5,
      "message": "Policy review tracked the delegation window and follow-up."
    }
  ],
  "evaluationOk": true,
  "blockedByPreflight": false,
  "completedAt": "2026-03-15T10:36:09.868Z",
  "lastHeartbeatAt": "2026-03-15T10:36:09.868Z",
  "outputHash": "sha256:d5095bc8dec61bc94faefd08437c6342e5b960800e5294b67184a1f72eaa6483",
  "stdoutObjectKey": "64f51603-235b-449b-a560-06a5244f77c1/output.md",
  "artifactsObjectPrefix": "64f51603-235b-449b-a560-06a5244f77c1/",
  "judgeSummary": "Benchmark suite produced the expected deterministic output. Policy review flagged the expected approval threshold findings. Policy review tracked the delegation window and follow-up.",
  "receiptId": "050998fa-f425-4d10-a34e-74a6a6c2692d"
}