<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
  <title>Abhijeet Gupta, Writing</title>
  <link>/writing</link>
  <description>Notes &amp; essays by Abhijeet Gupta.</description>
  <item>
  <title><![CDATA[How to correctly report LLM-as-a-Judge evaluations]]></title>
  <link>/writing#llm-as-a-judge-evaluation</link>
  <guid isPermaLink="false">llm-as-a-judge-evaluation</guid>
  <pubDate>Fri, 12 Jun 2026 00:00:00 GMT</pubDate>
  <description><![CDATA[A practical guide to running, calibrating, and reporting LLM-as-a-Judge results — covering judge selection, position bias, pairwise vs scoring setups, and the statistics that actually belong in the paper.]]></description>
</item>
<item>
  <title><![CDATA[10 must-read machine learning research papers for ML engineers]]></title>
  <link>/writing#must-read-ml-research-papers</link>
  <guid isPermaLink="false">must-read-ml-research-papers</guid>
  <pubDate>Wed, 10 Jun 2026 00:00:00 GMT</pubDate>
  <description><![CDATA[An annotated bibliography of foundational and recent work in LLM evaluation and reinforcement learning, with notes on why each paper matters in practice.]]></description>
</item>
<item>
  <title><![CDATA[Notes on evaluating reasoning models across families]]></title>
  <link>/writing#evaluating-reasoning-models</link>
  <guid isPermaLink="false">evaluating-reasoning-models</guid>
  <pubDate>Mon, 20 Apr 2026 00:00:00 GMT</pubDate>
  <description><![CDATA[Observations from disentangling reasoning length effects from forced re-entry across Llama and Qwen distilled models.]]></description>
</item>
<item>
  <title><![CDATA[Reproducibility on a shared Slurm cluster]]></title>
  <link>/writing#reproducibility-on-slurm</link>
  <guid isPermaLink="false">reproducibility-on-slurm</guid>
  <pubDate>Sun, 02 Nov 2025 00:00:00 GMT</pubDate>
  <description><![CDATA[Small operational habits that yield significant returns when several collaborators share the same GPU resources.]]></description>
</item>
</channel>
</rss>