{
  "attention_microbenchmark_count": 20,
  "created_at": "2026-05-26T03:01:00.402027+00:00",
  "failure_count": 0,
  "highest_throughput_configuration": {
    "batch_size": 32,
    "exact_accuracy": 1.0,
    "examples_per_second": 62.31686647096926,
    "experiment": "throughput_latency_batch_scaling",
    "first_run_seconds": 0.6442325829993933,
    "generated_tokens_per_second": 1184.020462948416,
    "implementation_name": "kv_cache",
    "mean_latency_per_batch": 0.5135046386664422,
    "number_correct": 12,
    "number_total": 12,
    "p95_latency_per_batch": 0.5287551789049758,
    "per_example_latency": 0.01604701995832632,
    "timestamp": "2026-05-26T03:01:00.392486+00:00"
  },
  "interactive_latency_configuration": {
    "batch_size": 1,
    "exact_accuracy": 1.0,
    "examples_per_second": 6.769303080632282,
    "experiment": "throughput_latency_batch_scaling",
    "first_run_seconds": 0.12676262500463054,
    "generated_tokens_per_second": 128.61675853201336,
    "implementation_name": "kv_cache",
    "mean_latency_per_batch": 0.1477256946673151,
    "number_correct": 1,
    "number_total": 1,
    "p95_latency_per_batch": 0.1580386581030325,
    "per_example_latency": 0.1477256946673151,
    "timestamp": "2026-05-26T02:59:26.440332+00:00"
  },
  "lowest_batch_latency_configuration": {
    "batch_size": 1,
    "exact_accuracy": 1.0,
    "examples_per_second": 6.769303080632282,
    "experiment": "throughput_latency_batch_scaling",
    "first_run_seconds": 0.12676262500463054,
    "generated_tokens_per_second": 128.61675853201336,
    "implementation_name": "kv_cache",
    "mean_latency_per_batch": 0.1477256946673151,
    "number_correct": 1,
    "number_total": 1,
    "p95_latency_per_batch": 0.1580386581030325,
    "per_example_latency": 0.1477256946673151,
    "timestamp": "2026-05-26T02:59:26.440332+00:00"
  },
  "lowest_per_example_latency_configuration": {
    "batch_size": 32,
    "exact_accuracy": 1.0,
    "examples_per_second": 62.31686647096926,
    "experiment": "throughput_latency_batch_scaling",
    "first_run_seconds": 0.6442325829993933,
    "generated_tokens_per_second": 1184.020462948416,
    "implementation_name": "kv_cache",
    "mean_latency_per_batch": 0.5135046386664422,
    "number_correct": 12,
    "number_total": 12,
    "p95_latency_per_batch": 0.5287551789049758,
    "per_example_latency": 0.01604701995832632,
    "timestamp": "2026-05-26T03:01:00.392486+00:00"
  },
  "main_caveats": [
    "Several diagnostics are microbenchmarks or simulations and are labeled as such in their output files.",
    "CPU backend measurements do not expose kernel-launch timing separately.",
    "Process RSS is a coarse memory signal and does not isolate individual JAX computations.",
    "Sequence-length scaling beyond 28 tokens is synthetic because the trained model has learned position embeddings for max_seq_len=28."
  ],
  "memory_behavior_count": 9
}
