<?xml version="1.0" encoding="UTF-8"?><rss xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:atom="http://www.w3.org/2005/Atom" version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:googleplay="http://www.google.com/schemas/play-podcasts/1.0"><channel><title><![CDATA[Designing with AI]]></title><description><![CDATA[Organized thoughts on building/designing apps that integrate state of the art AI agents.]]></description><link>https://newsletter.victordibia.com</link><image><url>https://substackcdn.com/image/fetch/$s_!1FgP!,w_256,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fff160652-8bbb-475e-80a7-ba4eb5f80dcb_504x504.png</url><title>Designing with AI</title><link>https://newsletter.victordibia.com</link></image><generator>Substack</generator><lastBuildDate>Sat, 04 Apr 2026 08:22:52 GMT</lastBuildDate><atom:link href="https://newsletter.victordibia.com/feed" rel="self" type="application/rss+xml"/><language><![CDATA[en]]></language><webMaster><![CDATA[victordibia@substack.com]]></webMaster><itunes:owner><itunes:email><![CDATA[victordibia@substack.com]]></itunes:email><itunes:name><![CDATA[Victor Dibia, PhD]]></itunes:name></itunes:owner><itunes:author><![CDATA[Victor Dibia, PhD]]></itunes:author><googleplay:owner><![CDATA[victordibia@substack.com]]></googleplay:owner><googleplay:email><![CDATA[victordibia@substack.com]]></googleplay:email><googleplay:author><![CDATA[Victor Dibia, PhD]]></googleplay:author><itunes:block><![CDATA[Yes]]></itunes:block><item><title><![CDATA[Building Your Own Claude Code from Scratch]]></title><description><![CDATA[Three Extensions That Turn your For Loop into an Agent like Claude Code]]></description><link>https://newsletter.victordibia.com/p/building-a-claude-code-like-agent</link><guid isPermaLink="false">https://newsletter.victordibia.com/p/building-a-claude-code-like-agent</guid><dc:creator><![CDATA[Victor Dibia, PhD]]></dc:creator><pubDate>Wed, 01 Apr 2026 13:15:08 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!NcCQ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62137b8a-dce6-4edd-bf4f-3759580b8ca3_1678x1140.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!NcCQ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62137b8a-dce6-4edd-bf4f-3759580b8ca3_1678x1140.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!NcCQ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62137b8a-dce6-4edd-bf4f-3759580b8ca3_1678x1140.png 424w, https://substackcdn.com/image/fetch/$s_!NcCQ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62137b8a-dce6-4edd-bf4f-3759580b8ca3_1678x1140.png 848w, https://substackcdn.com/image/fetch/$s_!NcCQ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62137b8a-dce6-4edd-bf4f-3759580b8ca3_1678x1140.png 1272w, https://substackcdn.com/image/fetch/$s_!NcCQ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62137b8a-dce6-4edd-bf4f-3759580b8ca3_1678x1140.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!NcCQ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62137b8a-dce6-4edd-bf4f-3759580b8ca3_1678x1140.png" width="1456" height="989" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/62137b8a-dce6-4edd-bf4f-3759580b8ca3_1678x1140.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:989,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1446996,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/173104368?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62137b8a-dce6-4edd-bf4f-3759580b8ca3_1678x1140.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!NcCQ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62137b8a-dce6-4edd-bf4f-3759580b8ca3_1678x1140.png 424w, https://substackcdn.com/image/fetch/$s_!NcCQ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62137b8a-dce6-4edd-bf4f-3759580b8ca3_1678x1140.png 848w, https://substackcdn.com/image/fetch/$s_!NcCQ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62137b8a-dce6-4edd-bf4f-3759580b8ca3_1678x1140.png 1272w, https://substackcdn.com/image/fetch/$s_!NcCQ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F62137b8a-dce6-4edd-bf4f-3759580b8ca3_1678x1140.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>In a <a href="https://newsletter.victordibia.com/p/the-agent-execution-loop-how-to-build">previous post</a>, I walked through the basic agent execution loop - the while loop that calls a model, executes tools, and iterates. That loop is the foundation. But there&#8217;s a gap between a basic loop and agents like <a href="https://www.anthropic.com/claude-code">Claude Code</a>, <a href="https://github.blog/ai-and-ml/github-copilot/agent-mode-101-all-about-github-copilots-powerful-mode/">GitHub Copilot CLI</a>, or <a href="https://github.com/google-gemini/gemini-cli">Gemini CLI</a> that handle complex, multi-step tasks over extended runs.</p><p>Consider what happens when you ask Claude Code to &#8220;refactor this authentication system to use JWT tokens.&#8221; It needs to find all relevant files across the codebase, read and understand the current implementation, plan the changes, edit multiple files, run tests to verify nothing broke, and iterate if tests fail. That&#8217;s 20-40 iterations. Or more. Each iteration adds tool calls and results to the context. A basic loop can&#8217;t handle this - <a href="https://newsletter.victordibia.com/p/context-engineering-101-how-agents">context explodes</a>, the agent quits early, and you have no visibility into what&#8217;s happening.</p><p>What bridges the gap? Three extensions:</p><ol><li><p><strong>Tools</strong> - Not just any tools, but the specific set that enables code exploration and modification at scale</p></li><li><p><strong>Middlewares and hooks</strong> - Per-call interception for observability and safety, plus loop-level control for persistence</p></li><li><p><strong>Context management</strong> - Strategies to handle the inevitable context explosion</p></li></ol><p>This post walks through building an updated agent (beyond the for loop) using <a href="https://github.com/victordibia/designing-multiagent-systems">PicoAgents</a> - the companion multi-agent framework built from scratch as part of the <a href="https://multiagentbook.com/">Designing Multi-Agent Systems</a> book project.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!YT4d!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff702ca2f-e73b-41da-ae5e-255280947b32_2752x1536.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!YT4d!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff702ca2f-e73b-41da-ae5e-255280947b32_2752x1536.png 424w, https://substackcdn.com/image/fetch/$s_!YT4d!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff702ca2f-e73b-41da-ae5e-255280947b32_2752x1536.png 848w, https://substackcdn.com/image/fetch/$s_!YT4d!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff702ca2f-e73b-41da-ae5e-255280947b32_2752x1536.png 1272w, https://substackcdn.com/image/fetch/$s_!YT4d!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff702ca2f-e73b-41da-ae5e-255280947b32_2752x1536.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!YT4d!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff702ca2f-e73b-41da-ae5e-255280947b32_2752x1536.png" width="1456" height="813" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f702ca2f-e73b-41da-ae5e-255280947b32_2752x1536.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:813,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:4494885,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/173104368?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff702ca2f-e73b-41da-ae5e-255280947b32_2752x1536.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!YT4d!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff702ca2f-e73b-41da-ae5e-255280947b32_2752x1536.png 424w, https://substackcdn.com/image/fetch/$s_!YT4d!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff702ca2f-e73b-41da-ae5e-255280947b32_2752x1536.png 848w, https://substackcdn.com/image/fetch/$s_!YT4d!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff702ca2f-e73b-41da-ae5e-255280947b32_2752x1536.png 1272w, https://substackcdn.com/image/fetch/$s_!YT4d!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff702ca2f-e73b-41da-ae5e-255280947b32_2752x1536.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Designing Multi-agent Systems - <a href="https://buy.multiagentbook.com/">PDF</a> | <a href="https://www.amazon.com/dp/B0G2BCQQJY">Amazon</a></figcaption></figure></div><p>All code below comes from <a href="https://github.com/victordibia/designing-multiagent-systems">PicoAgents</a> - a working framework you can install and run, not pseudocode. Snippets are simplified; comments note which source file each comes from.</p><div><hr></div><h2>Extension 1: The Right Tools</h2><p>The first piece of the puzzle is assembling the right set of tools. Reviewing what Claude Code ships with is instructive - the tools cluster into four groups:</p><p><strong>Exploration</strong> - finding and reading code</p><ul><li><p><code>Read</code> - file contents with line numbers (supports images, PDFs, notebooks)</p></li><li><p><code>Glob</code> - fast file pattern matching (<code>**/*.py</code>, <code>src/**/*.ts</code>)</p></li><li><p><code>Grep</code> - content search using ripgrep with regex support</p></li><li><p><code>LS</code> - list directories</p></li></ul><p><strong>Modification</strong> - changing code and running commands</p><ul><li><p><code>Write</code> - create or overwrite files</p></li><li><p><code>Edit</code> / <code>MultiEdit</code> - exact string replacement (requires unique match)</p></li><li><p><code>Bash</code> - execute shell commands with timeout and background support</p></li><li><p><code>NotebookEdit</code> - edit Jupyter notebook cells</p></li></ul><p><strong>Coordination</strong> - managing complex tasks</p><ul><li><p><code>Task</code> - launch sub-agents for complex tasks (Explore, general-purpose)</p></li><li><p><code>TodoWrite</code> - manage structured task lists with status tracking</p></li><li><p><code>ExitPlanMode</code> - exit planning mode after creating implementation plan</p></li></ul><p><strong>Interaction</strong> - communicating with user and web</p><ul><li><p><code>AskUserQuestion</code> - ask structured multiple-choice questions</p></li><li><p><code>WebFetch</code> / <code>WebSearch</code> - fetch and search web content</p></li><li><p><code>Skill</code> - execute user-defined skills</p></li><li><p>MCP tools - dynamic tools from MCP servers</p></li></ul><p>PicoAgents implements nearly all of these. The tool names differ (<code>ReadFileTool</code>, <code>EditFileTool</code>, <code>BashTool</code>, etc.) but the coverage maps one-to-one. PicoAgents also adds a <code>ThinkTool</code> for explicit reasoning and a <code>SkillsTool</code> for on-demand context loading via <a href="https://newsletter.victordibia.com/p/implementing-claude-code-skills-from">SKILL.md files</a>.</p><h3>Tool Implementation and Design</h3><p>I covered how to implement tools (the <code>@tool</code> decorator, JSON Schema generation, and the tool execution pattern) in my <a href="https://newsletter.victordibia.com/p/the-agent-execution-loop-how-to-build">agent execution loop post</a>, and the broader arc of how tool calling has evolved in <a href="https://newsletter.victordibia.com/p/the-arc-of-agent-action">the arc of tool calling</a>. PicoAgents&#8217; tool implementations are in <code>picoagents/tools/</code>.</p><p>What&#8217;s worth highlighting here are the design patterns specific to <em>coding</em> agent tools:</p><p><strong>Output truncation matters more than you&#8217;d think.</strong> A naive <code>bash("ls -la")</code> on a large directory dumps thousands of lines into context. Every token of that output gets carried forward to every subsequent LLM call. PicoAgents&#8217; <code>BashTool</code> caps output and includes exit codes; <code>ReadFileTool</code> takes <code>offset</code> and <code>limit</code> parameters so the agent can read files in chunks.</p><p><strong>Structured errors over stack traces.</strong> <code>Error: File not found: config.py</code> is actionable. A Python traceback wastes tokens and confuses the model. Every tool should return a clear, concise error message that tells the model what to do differently.</p><p><strong>Edit uniqueness prevents wrong-location edits.</strong> The <code>EditFileTool</code> requires the target string to be unique in the file. If the string appears multiple times, the tool returns an error asking for more context. This is the same pattern Claude Code uses - it prevents the common failure of editing the wrong occurrence of a repeated string.</p><p><strong>The security model is local trust.</strong> These tools are powerful - <code>bash</code> can execute arbitrary commands, <code>write_file</code> can overwrite anything. This is intentional. Meta&#8217;s <a href="https://ai.meta.com/blog/practical-ai-agent-security/">Rule of Two</a> framework captures why this works: agents should satisfy no more than two of (A) process untrusted inputs, (B) access sensitive systems, (C) change state. A local coding agent satisfies B and C, but not A - the input is from you, the trusted user.</p><h3>Learning from Claude Code&#8217;s System Prompt</h3><p>Tools are only half the equation. The other half is <em>how you instruct the model to use them</em>. <a href="https://github.com/anthropics/claude-code/blob/main/SYSTEM_PROMPT.md">Claude Code&#8217;s system prompt has been published</a> and reveals patterns worth adopting:</p><ul><li><p><strong>Token minimization</strong> - &#8220;Responses under 4 lines unless detail requested&#8221; - saves context</p></li><li><p><strong>Batch tool calls</strong> - &#8220;Call multiple tools in a single response&#8221; - fewer iterations</p></li><li><p><strong>Read before edit</strong> - &#8220;NEVER edit a file you haven&#8217;t read&#8221; - prevents blind edits</p></li><li><p><strong>Follow patterns</strong> - &#8220;Examine neighboring files before writing&#8221; - consistency</p></li><li><p><strong>Verify deps</strong> - &#8220;Never assume libraries exist&#8221; - prevents import errors</p></li><li><p><strong>No proactive docs</strong> - &#8220;NEVER create documentation unless requested&#8221; - stays focused</p></li></ul><p>PicoAgents&#8217; <code>_instructions.py</code> incorporates a good number of these patterns.</p><div><hr></div><h2>Extension 2: Middlewares and Hooks</h2><p>As agents run longer, you need two kinds of control: visibility into individual operations (what tool just ran? how many tokens did that call use?) and control over the loop itself (is the task actually done? should the agent keep going?).</p><p>PicoAgents separates these into two distinct mechanisms, following the pattern I described in my <a href="https://newsletter.victordibia.com/p/agent-middleware-adding-control-and">middleware post</a>:</p><p><strong>Middlewares</strong> intercept individual tool/model calls. They fire around each operation and can log, block, or modify calls.</p><p><strong>Hooks</strong> intercept the agent loop itself. They fire before the first LLM call or when the agent tries to stop, and can inject instructions or check completion.</p><h3>Middlewares: Per-Call Interception</h3><p>Middlewares wrap individual tool and model calls. PicoAgents uses a <code>BaseMiddleware</code> class with two override points: <code>on_model_call</code> (fires around each LLM request) and <code>on_tool_call</code> (fires around each tool execution). Override either to log, modify, or block the operation:</p><pre><code><code>from picoagents._middleware import BaseMiddleware

class TokenTrackingMiddleware(BaseMiddleware):
    async def on_model_call(self, messages, tools, context, next_fn):
        result = await next_fn(messages, tools)
        print(f"Tokens: {result.usage.tokens_input} in, "
              f"{result.usage.tokens_output} out")
        return result

    async def on_tool_call(self, tool_name, parameters, context, next_fn):
        if tool_name == "bash" and "rm -rf" in parameters.get("command", ""):
            return ToolResult(success=False, result="Blocked: dangerous command")
        return await next_fn(tool_name, parameters)
</code></code></pre><p>The <code>next_fn</code> pattern chains middlewares - each one decides whether to pass through, modify, or block the call. The blocked result goes back to the model as a tool response, so it can adjust its approach.</p><h3>Hooks: Loop-Level Control</h3><p>Hooks operate at a different level - they fire at two points in the agent loop: before the first LLM call (start hooks) and when the agent would stop because it returned no tool calls (end hooks). They don&#8217;t see individual operations.</p><p><strong>Start hooks</strong> run once before the first LLM call. They inject instructions:</p><pre><code><code>from picoagents._hooks import BaseStartHook, BaseEndHook

class PlanningHook(BaseStartHook):
    async def on_start(self, context: LoopContext) -&gt; str | None:
        return (
            "Before starting, break the task into steps using todo_write. "
            "Mark each step as you complete it."
        )
</code></code></pre><p><strong>End hooks</strong> run when the agent tries to stop (returns no tool calls). They decide whether to let it:</p><pre><code><code>class LLMCompletionCheckHook(BaseEndHook):
    def __init__(self, max_restarts=2):
        self.termination = MaxRestartsTermination(max_restarts)

    async def on_end(self, context: LoopContext) -&gt; str | None:
        if self.termination.should_terminate(context):
            return None  # Hit restart limit, allow stop

        # Summarize what the agent actually did (tool calls, not claims)
        summary = self._build_activity_log(context.llm_messages)

        # Ask a judge model: is this complete?
        result = await context.model_client.create([
            SystemMessage("Judge task completion based on tool calls and "
                          "results, not the agent's claims. "
                          "Reply COMPLETE or INCOMPLETE."),
            UserMessage(f"Task: {context.llm_messages[1].content}\n\n"
                        f"Activity:\n{summary}"),
        ])

        if result.message.content.startswith("COMPLETE"):
            return None  # Done, allow stop

        return f"You are not done yet. Continue working."
</code></code></pre><p>Return a string: the agent gets that message injected and the loop continues. Return <code>None</code>: the agent stops. The <code>MaxRestartsTermination</code> safety valve prevents infinite loops - after N restarts, the hook allows the agent to stop regardless.</p><p>The two mechanisms are complementary. A middleware that blocks <code>rm -rf</code> fires on every tool call, but it can&#8217;t prevent early stopping. An end hook that checks completion can keep the agent going, but it can&#8217;t block a dangerous command. Together, they give agents <strong>cognitive durability</strong> - the ability to persist through long tasks instead of quitting at the first natural pause. How completion hooks interact with context management is where things get interesting.</p><div><hr></div><h2>Extension 3: Context Management</h2><p>Without context management, every tool result accumulates. A 20-iteration task with file reads, grep results, and bash outputs will overflow any context window. I covered context engineering strategies in depth in a <a href="https://newsletter.victordibia.com/p/context-engineering-101-how-agents">previous post</a> - including benchmarks comparing strategies, the sawtooth pattern of healthy compaction, and the thrashing failure mode when budgets are too tight. Here I&#8217;ll focus on two things that post didn&#8217;t cover well: the pluggable strategy pattern and the critical implementation detail that most frameworks get wrong.</p><h3>Compaction as a Pluggable Strategy</h3><p>PicoAgents defines a <code>CompactionStrategy</code> protocol with a single method: <code>compact(messages) -&gt; messages</code>. Any class that implements this method can be plugged into an agent. PicoAgents ships with three built-in strategies:</p><ul><li><p><code>HeadTailCompaction</code> - Keeps the head (system prompt, task) and tail (recent work), drops the middle. Zero extra cost. Works with any provider.</p></li><li><p><code>SlidingWindowCompaction</code> - Keeps the last N messages. Simpler but loses the original task context.</p></li><li><p><code>NoCompaction</code> - Baseline that returns messages unchanged, for benchmarking.</p></li></ul><p>But these are just starting points. The protocol is the point - you can implement your own. An LLM-based summarization strategy (like what Claude Code uses via Anthropic&#8217;s <a href="https://platform.claude.com/docs/en/build-with-claude/compaction">Compaction API</a>) preserves semantic meaning but costs an extra LLM call and is provider-specific. A hybrid strategy could summarize the middle while preserving head and tail verbatim. The agent doesn&#8217;t care which strategy it gets - it calls <code>compact()</code> and gets back messages.</p><p>One implementation detail matters: many agent frameworks apply compaction <em>after</em> adding new messages but use the original list for the next iteration. The compacted list doesn&#8217;t persist.</p><p>PicoAgents applies compaction <em>before</em> each LLM call, and the compacted list continues forward:</p><pre><code><code># From picoagents/agents/_agent.py (simplified)
while iteration &lt; self.max_iterations:
    # CRITICAL: Apply context strategy BEFORE each call
    if self.compaction:
        llm_messages = self.compaction.compact(llm_messages)

    # The compacted list is used for both this call AND continues forward
    response = await model_client.create(llm_messages, tools=tools)

    # Add response and tool results to llm_messages
    # ... (these accumulate until next compaction)
</code></code></pre><p>This seems obvious, but it&#8217;s a common bug. The context strategy must be in the loop with reassignment, not applied as a side effect after it.</p><h3>Context Isolation via Sub-Agents</h3><p>Compaction manages context within a single agent. Isolation prevents context from accumulating in the first place by running sub-tasks in separate contexts.</p><p>PicoAgents supports this via <code>as_tool()</code> - any agent can be wrapped as a callable tool for another agent. The sub-agent gets its own context window, tools, and compaction strategy. It runs, does its work (potentially reading dozens of files and accumulating 50k+ tokens), and returns a summary. Only that summary enters the parent&#8217;s context.</p><pre><code><code>sub_agent = Agent(
    name="code_reviewer",
    description="Reviews code in a directory",
    tools=[ReadFileTool(), ListDirectoryTool()],
    compaction=HeadTailCompaction(token_budget=50_000),
    ...
)

# The coordinator never sees the sub-agent's internal context
coordinator = Agent(
    tools=[sub_agent.as_tool()],  # Agent becomes a callable tool
    ...
)
</code></code></pre><p>This is the same pattern Claude Code uses with its <code>Task</code> tool - spawn a sub-agent for bounded work, discard its context, keep only the result. The parent&#8217;s context stays bounded regardless of how much work the sub-agents do.</p><p>For benchmarks comparing compaction and isolation strategies - including the budget sizing rule and how hooks interact with compaction - see the <a href="https://newsletter.victordibia.com/p/context-engineering-101-how-agents">Context Engineering post</a>.</p><div><hr></div><h2>Putting It Together</h2><p>Here&#8217;s how PicoAgents&#8217; <code>Agent</code> class wires all three extensions together. The constructor takes tools, compaction, middlewares, and hooks as separate concerns:</p><pre><code><code>from picoagents import Agent, LLMCompletionCheckHook
from picoagents.compaction import HeadTailCompaction
from picoagents._hooks import PlanningHook
from picoagents._middleware import BaseMiddleware
from picoagents.llm import AzureOpenAIChatCompletionClient
from picoagents.tools import create_coding_tools, create_context_engineering_tools

model_client = AzureOpenAIChatCompletionClient(
    model="gpt-4.1-mini",
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
)

agent = Agent(
    name="coding_agent",
    instructions="You are a senior software engineer...",
    model_client=model_client,
    tools=create_coding_tools() + create_context_engineering_tools(),
    compaction=HeadTailCompaction(token_budget=100_000, head_ratio=0.2),
    middlewares=[TokenTrackingMiddleware()],
    start_hooks=[PlanningHook()],
    end_hooks=[LLMCompletionCheckHook(max_restarts=5)],
    max_iterations=50,
)

response = await agent.run("Find all Python files that import pandas and list them")
# response.usage has token counts, LLM calls, tool calls
# response.context has the full conversation for multi-turn follow-ups
</code></code></pre><p>The <code>create_coding_tools()</code> preset includes: ReadFileTool, WriteFileTool, EditFileTool, GlobFilesTool, GrepTool, ListDirectoryTool, BashTool. Add <code>create_context_engineering_tools()</code> for TodoWriteTool, TodoReadTool, and SkillsTool. Wrap any agent with <code>as_tool()</code> for sub-agent isolation.</p><p>Multi-turn conversations pass the context forward:</p><pre><code><code>r1 = await agent.run("Find the bug in auth.py")
r2 = await agent.run("Now fix it", context=r1.context)  # Continues the conversation</code></code></pre><p>To dig into the implementation, here are the key source files:</p><ul><li><p><strong><a href="https://github.com/victordibia/designing-multiagent-systems/blob/main/picoagents/src/picoagents/agents/_agent.py">_agent.py</a></strong> - Agent class with tool loop, compaction, and hook integration</p></li><li><p><strong><a href="https://github.com/victordibia/designing-multiagent-systems/blob/main/picoagents/src/picoagents/compaction.py">compaction.py</a></strong> - HeadTail and SlidingWindow compaction strategies</p></li><li><p><strong><a href="https://github.com/victordibia/designing-multiagent-systems/blob/main/picoagents/src/picoagents/_hooks.py">_hooks.py</a></strong> - Completion checks, planning hooks, termination conditions</p></li><li><p><strong><a href="https://github.com/victordibia/designing-multiagent-systems/blob/main/picoagents/src/picoagents/_middleware.py">_middleware.py</a></strong> - BaseMiddleware for per-call interception</p></li><li><p><strong><a href="https://github.com/victordibia/designing-multiagent-systems/tree/main/picoagents/src/picoagents/tools">tools/</a></strong> - Tool implementations (coding, context engineering, research)</p></li><li><p><strong><a href="https://github.com/victordibia/designing-multiagent-systems/blob/main/examples/contextengineering/compaction.ipynb">compaction.ipynb</a></strong> - Benchmark comparing compaction strategies</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!lHNb!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc120efdf-31b8-4c53-80e1-552845272ed9_1678x1061.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!lHNb!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc120efdf-31b8-4c53-80e1-552845272ed9_1678x1061.png 424w, https://substackcdn.com/image/fetch/$s_!lHNb!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc120efdf-31b8-4c53-80e1-552845272ed9_1678x1061.png 848w, https://substackcdn.com/image/fetch/$s_!lHNb!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc120efdf-31b8-4c53-80e1-552845272ed9_1678x1061.png 1272w, https://substackcdn.com/image/fetch/$s_!lHNb!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc120efdf-31b8-4c53-80e1-552845272ed9_1678x1061.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!lHNb!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc120efdf-31b8-4c53-80e1-552845272ed9_1678x1061.png" width="1456" height="921" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c120efdf-31b8-4c53-80e1-552845272ed9_1678x1061.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:921,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:245943,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/173104368?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc120efdf-31b8-4c53-80e1-552845272ed9_1678x1061.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!lHNb!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc120efdf-31b8-4c53-80e1-552845272ed9_1678x1061.png 424w, https://substackcdn.com/image/fetch/$s_!lHNb!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc120efdf-31b8-4c53-80e1-552845272ed9_1678x1061.png 848w, https://substackcdn.com/image/fetch/$s_!lHNb!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc120efdf-31b8-4c53-80e1-552845272ed9_1678x1061.png 1272w, https://substackcdn.com/image/fetch/$s_!lHNb!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc120efdf-31b8-4c53-80e1-552845272ed9_1678x1061.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">For a software engineering task (review a repo and write a quality report), without compaction we see uniform grown in tokens. With HeadTail compaction at an 8k/15k budget we see input tokens capped at token budget (with similar performance to no compaction).</figcaption></figure></div><p></p><div><hr></div><h2>Choosing Your Strategy</h2><p>The three extensions interact. Here&#8217;s when to apply what:</p><ul><li><p><strong>Short tasks (&lt; 5 tool calls):</strong> No compaction, hooks, or sub-agents needed.</p></li><li><p><strong>Medium tasks (5-20 calls):</strong> Compaction at 2-3x working set. Token tracking middleware. Sub-agents optional.</p></li><li><p><strong>Long tasks (20+ calls):</strong> Compaction + completion hook. Token tracking + tool blocking middlewares. Sub-agents for bounded subtasks.</p></li><li><p><strong>Multi-directory operations:</strong> Compaction on each agent. Completion check on coordinator. One sub-agent per directory.</p></li></ul><p>The general principle: compaction manages memory within an agent, hooks manage persistence across the loop, and sub-agents manage scope across tasks. Start simple, add complexity only when you see the failure mode it addresses (context overflow, early stopping, or unbounded growth).</p><div><hr></div><h2>Same Patterns, Different Frameworks</h2><p>The three extensions are architectural patterns, not framework features - PicoAgents is just one implementation. The implementation details differ, but the architecture is the same. Here&#8217;s how each maps to other frameworks:</p><p><strong>Tools</strong>: Every framework has tool registration. The specific tools (read, edit, glob, grep, bash) are what matter for coding agents - the registration mechanism is interchangeable.</p><ul><li><p><strong><a href="https://github.com/langchain-ai/langgraph">LangGraph</a></strong> - <code>@tool</code> decorator, tools passed to <code>create_react_agent()</code></p></li><li><p><strong><a href="https://github.com/microsoft/agent-framework">Microsoft Agent Framework</a></strong> - <code>@kernel_function</code> decorator or <code>ai_function</code> registration</p></li><li><p><strong><a href="https://github.com/google/adk-python">Google ADK</a></strong> - <code>FunctionTool</code> wrapper, tools passed to <code>Agent()</code></p></li><li><p><strong><a href="https://github.com/anthropics/claude-agent-sdk-python">Claude Agent SDK</a></strong> - Tools defined in agent config</p></li></ul><p><strong>Middlewares and hooks</strong>: The naming varies, but the concept is universal - intercept operations at call level or loop level.</p><ul><li><p><strong>PicoAgents</strong> - Per-call: <a href="https://github.com/victordibia/designing-multiagent-systems/blob/main/picoagents/src/picoagents/_middleware.py">BaseMiddleware</a> (on_tool_call, on_model_call). Loop-level: <a href="https://github.com/victordibia/designing-multiagent-systems/blob/main/picoagents/src/picoagents/_hooks.py">BaseStartHook, BaseEndHook</a></p></li><li><p><strong>Deep Agents</strong> - Per-call: <a href="https://github.com/langchain-ai/deepagents/blob/main/libs/deepagents/deepagents/middleware/">AgentMiddleware</a> with <code>wrap_model_call()</code>. Loop-level: state-based via <code>IsLastStep</code></p></li><li><p><strong>Agent Framework</strong> - Per-call: <a href="https://github.com/microsoft/agent-framework/tree/main/python/samples/getting_started/middleware/">middleware classes</a> (decorator or class-based). Loop-level: event handlers</p></li><li><p><strong>Google ADK</strong> - Per-call: <a href="https://github.com/google/adk-python/blob/main/contributing/samples/callbacks/agent.py">before_tool_callback, after_tool_callback</a>. Loop-level: <code>before_agent_callback</code>, <code>after_agent_callback</code></p></li><li><p><strong>Claude Agent SDK</strong> - Per-call: <a href="https://github.com/anthropics/claude-agent-sdk-python/blob/main/examples/hooks.py">PreToolUse, PostToolUse hooks</a>. Loop-level: session-level hooks</p></li></ul><p><strong>Compaction</strong>: The strategies differ but the need is universal. <a href="https://github.com/langchain-ai/deepagents">Deep Agents</a> uses <code>SummarizationMiddleware</code> that triggers at 85% context fill, summarizes old messages with a smaller model, and offloads full history to a backend file. LangGraph&#8217;s core provides <code>RemoveMessage</code> primitives and custom state reducers - you build your own trimming logic. PicoAgents uses <a href="https://github.com/victordibia/designing-multiagent-systems/blob/main/picoagents/src/picoagents/compaction.py">HeadTail trimming</a> with no extra LLM calls. Different trade-offs, same architectural slot.</p><p>The <a href="https://github.com/victordibia/designing-multiagent-systems">PicoAgents implementation</a> is one reference - adapt the patterns to whatever you&#8217;re using.</p><div><hr></div><h2>Key Takeaways</h2><ol><li><p><strong>Tools matter, but not quantity</strong> - A focused set covering exploration (read, glob, grep), modification (write, edit, bash), and coordination (task, todo_write) handles most coding tasks.</p></li><li><p><strong>Middlewares and hooks solve different problems</strong> - Middlewares intercept individual calls (observability, safety). Hooks control the loop itself (prevent early stopping, inject planning etc). You need both for long-running agents.</p></li><li><p><strong>Right-size your compaction budget</strong> - Compaction cuts total tokens (56-70% in benchmarks), but too-tight budgets backfire. At 8k budget, agents scored 4.0/10 vs 6.0/10 without compaction - they lost memory of recent work and wasted 55% of reads re-reading files they&#8217;d already seen. A healthy budget produces a sawtooth context curve (grow, trim, grow). A flat line at the budget means thrashing. If your duplicate read ratio exceeds 15%, increase the budget. Size it at 2-3x the agent&#8217;s typical working set.</p></li><li><p><strong>Hooks and compaction interact</strong> - Completion hooks can amplify thrashing with tight budgets. Right-size the budget first, then add hooks.</p></li><li><p><strong>These patterns are universal</strong> - <a href="https://github.com/langchain-ai/deepagents">Deep Agents</a> (built on LangGraph) implements all three: <code>SummarizationMiddleware</code> for compaction, <code>AgentMiddleware</code> for per-call interception, and <code>SubAgentMiddleware</code> for context isolation - with the same coding tools (read, edit, glob, grep, bash). LangGraph&#8217;s core provides <code>RemoveMessage</code> for trimming and <code>IsLastStep</code> for loop control. The patterns are the same regardless of framework.</p></li></ol><div><hr></div><p><em>The full PicoAgents implementation is available in the companion repo for <a href="https://github.com/victordibia/designing-multiagent-systems">Designing Multi-Agent Systems</a>. The book covers these patterns in depth, including evaluation frameworks and the 10 common failure modes in agent systems.</em></p><p><strong>Up next:</strong> The patterns above are sufficient for building a strong initial agent. But production agents extend these ideas in ways that are hard to anticipate until you&#8217;ve seen them. On March 31, 2026, the full Claude Code source was inadvertently published via its npm registry - giving us a rare look at a production agent harness used by hundreds of thousands of developers. In the next post, I tear down that source code and show exactly how the same three extensions - tools, hooks, and compaction - evolve when they need to handle 50+ iteration sessions, recover from API failures, and manage context across sub-agents.</p><p><strong>Related posts:</strong></p><ul><li><p><a href="https://newsletter.victordibia.com/p/the-agent-execution-loop-how-to-build">The Agent Execution Loop</a> - The foundation this post builds on</p></li><li><p><a href="https://newsletter.victordibia.com/p/agent-middleware-adding-control-and">Agent Middleware</a> - Control and observability patterns</p></li><li><p><a href="https://newsletter.victordibia.com/p/context-engineering-101-how-agents">Context Engineering 101</a> - Full benchmark comparing compaction strategies</p></li><li><p><a href="https://newsletter.victordibia.com/p/implementing-claude-code-skills-from">Implementing Claude Code Skills from Scratch</a> - On-demand context loading</p></li><li><p><a href="https://newsletter.victordibia.com/p/the-arc-of-agent-action">The Arc of Tool Calling</a> - From code to tools and back</p></li></ul><div><hr></div><h2>References</h2><ul><li><p>Anthropic. &#8220;<a href="https://www.anthropic.com/claude-code">Claude Code</a>.&#8221;</p></li><li><p>Anthropic. &#8220;<a href="https://platform.claude.com/docs/en/build-with-claude/compaction">Compaction API Documentation</a>.&#8221; 2026.</p></li><li><p>Anthropic. &#8220;<a href="https://www.anthropic.com/engineering/effective-context-engineering-for-ai-agents">Effective Context Engineering for AI Agents</a>.&#8221;</p></li><li><p>Thariq. &#8220;<a href="https://x.com/trq212/status/2024574133011673516">Lessons from Building Claude Code: Prompt Caching Is Everything</a>.&#8221; Feb 2026.</p></li><li><p>Rigby. &#8220;<a href="https://dev.to/rigby_/what-actually-happens-when-you-run-compact-in-claude-code-3kl9">What Actually Happens When You Run /compact in Claude Code</a>.&#8221; 2026.</p></li><li><p>Anthropic. &#8220;<a href="https://github.com/anthropics/claude-code/blob/main/SYSTEM_PROMPT.md">Claude Code System Prompt</a>.&#8221; Published system prompt.</p></li><li><p>Piebald-AI. &#8220;<a href="https://github.com/Piebald-AI/claude-code-system-prompts">Claude Code System Prompts</a>.&#8221; Extracted prompts from Claude Code releases.</p></li><li><p>Meta AI. &#8220;<a href="https://ai.meta.com/blog/practical-ai-agent-security/">Agents Rule of Two: A Practical Approach to AI Agent Security</a>.&#8221;</p></li><li><p>Liu et al. &#8220;<a href="https://arxiv.org/abs/2307.03172">Lost in the Middle: How Language Models Use Long Contexts</a>.&#8221; 2023.</p></li></ul>]]></content:encoded></item><item><title><![CDATA[Agentic Noise: How AI Agents Can Break the Delicate Balance of Human Attention]]></title><description><![CDATA[Issue #60 | When AI Agents Flood One Side of a Platform, Everyone Loses]]></description><link>https://newsletter.victordibia.com/p/agentic-noise-how-ai-agents-can-break</link><guid isPermaLink="false">https://newsletter.victordibia.com/p/agentic-noise-how-ai-agents-can-break</guid><dc:creator><![CDATA[Victor Dibia, PhD]]></dc:creator><pubDate>Mon, 30 Mar 2026 12:32:20 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!Q3lO!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa6ebbb33-3c7a-4c09-acf5-8f857c847865_1678x1061.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Q3lO!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa6ebbb33-3c7a-4c09-acf5-8f857c847865_1678x1061.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Q3lO!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa6ebbb33-3c7a-4c09-acf5-8f857c847865_1678x1061.png 424w, https://substackcdn.com/image/fetch/$s_!Q3lO!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa6ebbb33-3c7a-4c09-acf5-8f857c847865_1678x1061.png 848w, https://substackcdn.com/image/fetch/$s_!Q3lO!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa6ebbb33-3c7a-4c09-acf5-8f857c847865_1678x1061.png 1272w, https://substackcdn.com/image/fetch/$s_!Q3lO!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa6ebbb33-3c7a-4c09-acf5-8f857c847865_1678x1061.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Q3lO!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa6ebbb33-3c7a-4c09-acf5-8f857c847865_1678x1061.png" width="1456" height="921" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a6ebbb33-3c7a-4c09-acf5-8f857c847865_1678x1061.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:921,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:799248,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/173682265?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa6ebbb33-3c7a-4c09-acf5-8f857c847865_1678x1061.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Q3lO!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa6ebbb33-3c7a-4c09-acf5-8f857c847865_1678x1061.png 424w, https://substackcdn.com/image/fetch/$s_!Q3lO!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa6ebbb33-3c7a-4c09-acf5-8f857c847865_1678x1061.png 848w, https://substackcdn.com/image/fetch/$s_!Q3lO!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa6ebbb33-3c7a-4c09-acf5-8f857c847865_1678x1061.png 1272w, https://substackcdn.com/image/fetch/$s_!Q3lO!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa6ebbb33-3c7a-4c09-acf5-8f857c847865_1678x1061.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Most digital platforms <em><strong>today operate as two-sided markets</strong></em>. YouTube has creators and viewers. LinkedIn has posters and readers. Academic publishing has authors and reviewers/readers. Job boards have applicants and recruiters. Social media has people sharing life updates and friends (or strangers) consuming them.</p><p>These platforms evolved equilibrium mechanisms - rate limits, ranking algorithms, capacity constraints - all calibrated around a basic assumption: <em><strong>humans on both sides</strong></em>. Humans produce content at human speed, and humans consume it at human speed. There is an <em>implicit contract in there that makes everything work</em>.</p><p>As AI models become more capable (AI agents that can act with some autonomy), there is a real chance that <em>poorly considered AI agent (assymetric) deployments</em> break this balance. In <a href="https://buy.multiagentbook.com/">Designing Multi-Agent Systems</a>, I introduce the concept of <strong>agentic noise</strong> to capture this phenomena. </p><blockquote><p><strong>Agentic Noise</strong>:  When AI agents accelerate one side of a platform while the other remains human-paced, breaking the assumptions underlying the system&#8217;s design. </p></blockquote><p>As we all deal with how our <a href="https://newsletter.victordibia.com/p/will-generative-ai-replace-software">respective domains</a> will <a href="https://newsletter.victordibia.com/p/how-will-ai-impact-academic-research">change with AI</a>, this post discusses some dimensions of the relevant issues:</p><ul><li><p>What are the current examples of asymmetric AI acceleration? (job markets, academia, social media, publishing, children&#8217;s content)</p></li><li><p>What happens when platforms respond with AI on the consumption side too? (the arms race trap - symmetric acceleration without welfare gains)</p></li><li><p>What needs to change? (platform-level coordination mechanisms, builder responsibility for platform balance)</p></li></ul><p></p><div><hr></div><p><em>This post draws on ideas from Chapter 13 of <a href="https://buy.multiagentbook.com/">Designing Multi-Agent Systems</a>, which covers agentic noise, platform imbalance, distributed responsibility, emergent risks, and practical security frameworks for autonomous agents. Grab the <a href="https://www.amazon.com/dp/B0G2BCQQJY">book on Amazon</a> or get the <a href="https://buy.multiagentbook.com/">digital PDF</a>.</em></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!lmj-!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea1d5eb6-e4cb-4e25-ba8f-5f3fde86e157_850x474.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!lmj-!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea1d5eb6-e4cb-4e25-ba8f-5f3fde86e157_850x474.png 424w, https://substackcdn.com/image/fetch/$s_!lmj-!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea1d5eb6-e4cb-4e25-ba8f-5f3fde86e157_850x474.png 848w, https://substackcdn.com/image/fetch/$s_!lmj-!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea1d5eb6-e4cb-4e25-ba8f-5f3fde86e157_850x474.png 1272w, https://substackcdn.com/image/fetch/$s_!lmj-!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea1d5eb6-e4cb-4e25-ba8f-5f3fde86e157_850x474.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!lmj-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea1d5eb6-e4cb-4e25-ba8f-5f3fde86e157_850x474.png" width="850" height="474" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/ea1d5eb6-e4cb-4e25-ba8f-5f3fde86e157_850x474.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:474,&quot;width&quot;:850,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:69353,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/173682265?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea1d5eb6-e4cb-4e25-ba8f-5f3fde86e157_850x474.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!lmj-!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea1d5eb6-e4cb-4e25-ba8f-5f3fde86e157_850x474.png 424w, https://substackcdn.com/image/fetch/$s_!lmj-!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea1d5eb6-e4cb-4e25-ba8f-5f3fde86e157_850x474.png 848w, https://substackcdn.com/image/fetch/$s_!lmj-!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea1d5eb6-e4cb-4e25-ba8f-5f3fde86e157_850x474.png 1272w, https://substackcdn.com/image/fetch/$s_!lmj-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea1d5eb6-e4cb-4e25-ba8f-5f3fde86e157_850x474.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h2>Examples of Producer / Consumer Imbalance</h2><p>Each example below follows the same pattern: agents accelerate one side, the other side <em>scrambles to respond</em>, and the resulting <em>race</em> inadvertently <em>degrades</em> the platform.</p><p><strong>Job markets.</strong> AI application tools let candidates submit hundreds of tailored resumes per day. Recruiters responded with AI screening - and <a href="https://www.shrm.org/topics-tools/news/hr-trends/recruitment-is-broken">19% report it has filtered out qualified applicants</a>. The arms race feeds itself: applicants use AI to tailor resumes to job descriptions, so employers receive resumes that are <a href="https://talentinsights.hirewell.com/blog/ai-hiring-arms-race-workslop-resume-fraud-2026">increasingly similar</a>,  disrupting matching tools designed to differentiate them. The result is a trust/signal breakdown on both sides, with a <a href="https://sanfordheisler.com/blog/2025/12/ai-bias-in-hiring-algorithmic-recruiting-and-your-rights/">federal class-action lawsuit</a> (Mobley v. Workday) now testing whether AI screening tools can be considered legal &#8220;agents&#8221; of the employer. Some employers are <a href="https://www.linkedin.com/news/story/ai-slop-renders-resumes-less-essential-employers-say-7062868/">eliminating resume requirements altogether</a>, shifting to skills-based hiring because AI has rendered polished documents indistinguishable (low signal).</p><p></p><p><strong>Academic research.</strong> I <a href="https://newsletter.victordibia.com/p/how-will-ai-impact-academic-research">wrote about this in detail</a> - the numbers tell the story. NeurIPS submissions jumped from ~9,500 in 2020 to over 23,000 by 2025. Systems like <a href="https://arxiv.org/abs/2504.08066">AI Scientist-v2</a> have already produced papers that passed peer review at top conference workshops. And now the review side is AI-accelerated too: at ICLR 2026, <a href="https://howaiworks.ai/blog/iclr-2026-ai-generated-peer-reviews-controversy">21% of peer reviews were found to be fully AI-generated</a>, with over half showing some AI use. Both sides are now agent-driven: agents writing papers, agents reviewing them. Meanwhile, organized &#8220;paper mills&#8221; use AI to generate <a href="https://www.chemistryworld.com/features/ai-tools-tackle-paper-mill-fraud-overwhelming-peer-review/4022253.article">fraudulent publications at industrial scale</a>, and AI detection tools struggle to keep pace.</p><p><strong>Social media and content platforms.</strong> LinkedIn feeds fill with AI-polished posts. YouTube surfaces AI-generated videos optimized for engagement but <em>empty of substance</em>. Platforms have responded with AI moderation - and YouTube&#8217;s version has become a cautionary tale. In 2025, YouTube terminated <a href="https://www.creatorhandbook.net/youtube-addresses-ai-moderation-concerns-after-reporting-12-million-channel-terminations-in-2025/">12 million channels</a> via automated enforcement. Legitimate creators get <a href="https://itsfoss.com/news/youtubes-ai-mod-enshittification/">caught in the dragnet</a> - tech tutorials flagged for using words like &#8220;bypass,&#8221; entire channels terminated with template responses - while sophisticated bad actors game the system.</p><p><strong>Book publishing.</strong> As the author of <a href="https://buy.multiagentbook.com/">Designing Multi-Agent Systems</a>, this one has been rather interesting! Several bestselling books in my category on Amazon have the veneer of legitimacy - hundreds of reviews pushing them to the top of rankings. However, a cursory review of the 1-2 star reviews and you find a pattern: multiple independent readers calling out the same substantive problems. Not cosmetic complaints (&#8221;packaging was damaged&#8221;) but correlated indictments of actual content quality, often labelled as AI slop. <strong>These books sit at #1 while readers call them garbage. </strong></p><blockquote><p>Note: I think elegant writing and technical content can be authored with AI. But asking ChatGPT to generate pages of disconnected random content bundled together as a book is what readers are increasingly concerned about. I have done some deeper research on this topic and will share more soon.</p></blockquote><p><strong>Children&#8217;s content.</strong> I have a six-year-old son and he does get some screen time. Even on YouTube Kids - supposedly age-appropriate - there are hours of AI-generated videos. Knock-off Pokemon content with no storyline, no lesson, no careful design. Just visually addictive spinoffs engineered to capture attention rather than deliver entertainment or learning. Researchers have dubbed it <a href="https://www.tubefilter.com/2025/05/02/ai-elsagate-videos-minion-go-cat-child-safety-content/">&#8220;Elsagate 2.0&#8221;</a> - AI-generated content targeting children at scale. Over <a href="https://www.mensjournal.com/news/expert-warning-youtube-ai-slop-rewire-child-brain">20% of videos recommended to new YouTube users are AI slop</a>, with slop channels collectively accumulating over 63 billion views. Early-childhood experts warn that <a href="https://www.bloomberg.com/news/articles/2025-12-03/ai-slop-youtube-videos-for-kids-pretend-to-be-educational">AI slop distorts children&#8217;s developing understanding of truth and reality</a>. YouTube CEO Neal Mohan has acknowledged this as a <a href="https://www.digit.in/features/general/youtube-ceo-reducing-ai-slop-videos-enhancing-kids-and-teen-content-key-focus-in-2026.html">2026 priority</a> - but many families have already moved to subscription services like Disney+ to avoid the recommendation engine entirely. </p><p><strong>GitHub / OSS software</strong>. Most OSS repo report 100s of drive by commits with proposals like <a href="https://github.com/mitchellh/vouch">vouch</a>  - A community trust management system to ensure only individuals with some level of established trust can contribute to OSS project. Its a bit ironic. OSS helped jump start many a career - everyone has that first contributor story. With eroding trust</p><div><hr></div><h2>The Broken Signals Problem</h2><p>In many of the examples above, there is the feeling that platforms should step up and address the issue. A key challenge here is that <em><strong>metrics</strong></em> we relied on to navigate the internet are now broken.</p><p>Views can be bot-driven. Comments can be AI-generated. Reviews can be manufactured at scale. The signaling infrastructure that humans built to navigate content is can now be gamed at machine speed.</p><p>This also creates an <em><strong>asymmetry in awareness</strong></em>. As someone who works in AI, I can often spot AI-generated responses to my LinkedIn posts. I can identify AI-generated YouTube videos and choose to save my attention. This is not necessarily the case for the majority of internet netizens today (kids inclusive).  Their attention is consumed without their knowledge or consent. They engage with content that looks real, that has the social proof of engagement, but that was <em>never created with their interests in mind.</em></p><p>The platforms themselves face a fundamental measurement problem. Their ranking algorithms optimize for engagement metrics that agents can now manufacture. The internal signals that determine what gets recommended to humans are being corrupted at the source.</p><div><hr></div><h2>Symmetric Acceleration Doesn&#8217;t Fix It</h2><p>Even when symmetric acceleration <em>works</em>, it doesn&#8217;t necessarily improve human welfare. If AI agents write papers and AI agents review them, does that produce better research - or just <a href="https://newsletter.victordibia.com/p/how-will-ai-impact-academic-research">higher-volume churn</a>? When the credential was designed to signal human expertise, what does it mean when neither expertise nor effort is required to obtain it?</p><p>Software engineering is the latest example. AI coding agents generate pull requests at scale, making human code review a bottleneck. Anthropic&#8217;s response: <a href="https://techcrunch.com/2026/03/09/anthropic-launches-code-review-tool-to-check-flood-of-ai-generated-code/">Claude Code Review</a>, a multi-agent system that reviews AI-generated code for $15-25 per PR. As <a href="https://www.pcgamer.com/software/ai/anthropic-introduces-claude-code-review-so-you-dont-even-need-to-check-all-of-your-own-ai-slop/">PC Gamer put it</a>: &#8220;so you don&#8217;t even need to check all of your own AI slop.&#8221; AI writes the code, AI reviews the code, and the human pays $25 to be removed from the loop.</p><p>At some point - AI generating content, AI generating comments, AI recommending it to other AI - all human value is lost. The question isn&#8217;t whether we <em>can</em> automate both sides, but whether we <em>should</em>.</p><p></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!kO-l!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b9e7d9c-e668-4194-88ad-33a06db65c8a_1678x1061.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!kO-l!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b9e7d9c-e668-4194-88ad-33a06db65c8a_1678x1061.png 424w, https://substackcdn.com/image/fetch/$s_!kO-l!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b9e7d9c-e668-4194-88ad-33a06db65c8a_1678x1061.png 848w, https://substackcdn.com/image/fetch/$s_!kO-l!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b9e7d9c-e668-4194-88ad-33a06db65c8a_1678x1061.png 1272w, https://substackcdn.com/image/fetch/$s_!kO-l!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b9e7d9c-e668-4194-88ad-33a06db65c8a_1678x1061.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!kO-l!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b9e7d9c-e668-4194-88ad-33a06db65c8a_1678x1061.png" width="1456" height="921" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8b9e7d9c-e668-4194-88ad-33a06db65c8a_1678x1061.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:921,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1090746,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/173682265?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b9e7d9c-e668-4194-88ad-33a06db65c8a_1678x1061.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!kO-l!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b9e7d9c-e668-4194-88ad-33a06db65c8a_1678x1061.png 424w, https://substackcdn.com/image/fetch/$s_!kO-l!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b9e7d9c-e668-4194-88ad-33a06db65c8a_1678x1061.png 848w, https://substackcdn.com/image/fetch/$s_!kO-l!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b9e7d9c-e668-4194-88ad-33a06db65c8a_1678x1061.png 1272w, https://substackcdn.com/image/fetch/$s_!kO-l!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8b9e7d9c-e668-4194-88ad-33a06db65c8a_1678x1061.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Where's the human, what is the net effect on human welfare? AI produces, AI consumes?</figcaption></figure></div><div><hr></div><h2>What Needs to Change?</h2><p>There are no clean answers here. But a few directions seem clear.</p><p><strong>For platforms:</strong> Engagement metrics are no longer reliable quality signals, but the alternatives are hard. &#8220;Provenance-aware ranking&#8221; sounds good until you remember that reliably detecting AI-generated content is an unsolved problem. Mandatory disclosure (as the EU AI Act attempts) shifts the burden but depends on enforcement. Some platforms have natural levers - conferences can perhaps raise submission fees, appointment systems can enforce identity verification - but open platforms like job boards and social media face genuine design tradeoffs where every intervention has collateral damage.</p><p>We&#8217;ve solved versions of this before. Spectrum allocation and robocall regulations both exist because unconstrained automated use degraded shared infrastructure. As I argue in the book:</p><blockquote><p><em>From <a href="https://buy.multiagentbook.com/">Designing Multi-Agent Systems</a>, Chapter 13:</em></p><p>&#8220;When collective deployment degrades platform value for everyone, coordination mechanisms like rate limiting or platform quotas may become necessary, similar to spectrum allocation or robocall regulations.&#8221;</p></blockquote><p>Agentic noise is the same class of problem, and it <em>may require</em> the same class of solution.</p><p><strong>For builders of AI agents:</strong> Consider the impact on platform balance, not just the business value to the producer side. This is admittedly hard to operationalize - no individual organization can fix a collective action problem through restraint alone. But at minimum, asking &#8220;what happens to the humans on the consumption side when we deploy this?&#8221; should be part of the design process.</p><div><hr></div><p>The excitement around agent capabilities is warranted. But what is the point of a #1 bestseller that readers call garbage? What is the value of a YouTube Kids feed full of AI-generated content no human designed for children? What do we gain from <a href="https://newsletter.victordibia.com/p/how-will-ai-impact-academic-research">academia overrun by AI-generated and AI-reviewed papers</a>?</p>
      <p>
          <a href="https://newsletter.victordibia.com/p/agentic-noise-how-ai-agents-can-break">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[How to Implement Context Engineering Strategies for your Agent (Claude Code) ]]></title><description><![CDATA[#59 | Managing context growth, preventing early stopping, and measuring what works]]></description><link>https://newsletter.victordibia.com/p/context-engineering-101-how-agents</link><guid isPermaLink="false">https://newsletter.victordibia.com/p/context-engineering-101-how-agents</guid><dc:creator><![CDATA[Victor Dibia, PhD]]></dc:creator><pubDate>Wed, 11 Mar 2026 12:32:37 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!yS9l!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a1c4794-529d-48d6-aaac-85829111dbbc_1678x1061.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!yS9l!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a1c4794-529d-48d6-aaac-85829111dbbc_1678x1061.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!yS9l!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a1c4794-529d-48d6-aaac-85829111dbbc_1678x1061.png 424w, https://substackcdn.com/image/fetch/$s_!yS9l!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a1c4794-529d-48d6-aaac-85829111dbbc_1678x1061.png 848w, https://substackcdn.com/image/fetch/$s_!yS9l!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a1c4794-529d-48d6-aaac-85829111dbbc_1678x1061.png 1272w, https://substackcdn.com/image/fetch/$s_!yS9l!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a1c4794-529d-48d6-aaac-85829111dbbc_1678x1061.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!yS9l!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a1c4794-529d-48d6-aaac-85829111dbbc_1678x1061.png" width="1456" height="921" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/1a1c4794-529d-48d6-aaac-85829111dbbc_1678x1061.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:921,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:245943,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/172101188?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a1c4794-529d-48d6-aaac-85829111dbbc_1678x1061.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!yS9l!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a1c4794-529d-48d6-aaac-85829111dbbc_1678x1061.png 424w, https://substackcdn.com/image/fetch/$s_!yS9l!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a1c4794-529d-48d6-aaac-85829111dbbc_1678x1061.png 848w, https://substackcdn.com/image/fetch/$s_!yS9l!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a1c4794-529d-48d6-aaac-85829111dbbc_1678x1061.png 1272w, https://substackcdn.com/image/fetch/$s_!yS9l!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1a1c4794-529d-48d6-aaac-85829111dbbc_1678x1061.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Context growth is different depending on the context engineering approach implemented on the same code review task (44-file repository, gpt-4.1-mini). Context grows monotonically without any strategy; HeadTail  context compaction strategies show sawtooth patterns at their budget thresholds; Isolation shows bounded tokens per coordinator call.</figcaption></figure></div><p>Your agent completed the task. It read 12 files, traced 3 call stacks, made 15 LLM calls, and eventually found the bug. But it also burned through 120,000 tokens to get there - roughly $1.80 on a frontier model. Run that 50 times a day across a team, and you&#8217;re looking at $2,700/month on a single agent workflow.</p><p>Most benchmarks for general-purpose agents like Claude Code and GitHub Copilot focus on task completion - did it get the job done? But in practice, <em>how</em> it got the job done matters just as much. The agent that reads every file in the directory when it only needed three. The agent that compacts its context, drops critical information, then re-reads the same files. The agent that carries 50,000 tokens of stale tool results into every LLM call because nothing told it to forget. These agents might succeed, but they succeed expensively - and for indie developers watching their API bill, or businesses looking to <a href="https://newsletter.victordibia.com/p/use-coding-agents-to-build-your-product">run agents at scale</a>, context management can reduce token costs significantly, though often with quality tradeoffs that need to be understood.</p><p>Context in agents is cumulative - every message, tool result, and model response from previous steps gets carried forward into each new LLM call. As underlying models improve, agents can work on longer-horizon tasks (<a href="https://metr.org/blog/2025-03-19-measuring-ai-ability-to-complete-long-tasks/">METR benchmarks</a> show frontier models completing tasks equivalent to ~70 hours of human work at 80% reliability, with task complexity doubling roughly every 7 months). But longer tasks mean more accumulated context, and that creates three problems: the context window fills up and requests get rejected, token costs scale with context size, and model performance degrades as context grows (<a href="https://arxiv.org/abs/2307.03172">Liu et al., 2023</a>).</p><p>Context engineering is the discipline of managing what goes into the context window. While we see the occasional &#8220;Claude is compacting&#8221; message, or the post on how note taking or even skills can help with context, the exact details on how to implement these strategies or how they impact performance are often vague. In this post, I break down the core strategies for context engineering, how to implement them, and their tradeoffs based on a benchmark I ran across five agent configurations.</p><p>TLDR; this post covers:</p><ul><li><p>The problem of context explosion in multi-step agent tasks</p></li><li><p>Three core strategies for context engineering: compaction, isolation, and agentic memory</p></li><li><p>A benchmark comparing these strategies on a code review task, showing the tradeoffs between token cost</p></li><li><p>Key takeaways for when and how to apply context engineering in your agents</p></li></ul><blockquote><p>Note: For low complexity tasks, context engineering is less critical. Premature optimization can also hurt performance.</p></blockquote><p><em>This post is adapted from my book <a href="https://multiagentbook.com/">Designing Multi-Agent Systems</a>, where I cover context engineering patterns in Chapter 4 alongside agent architecture, memory systems, and multi-agent coordination.</em></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!LC-R!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc83b8b5b-5ac7-445a-a481-fc88e9840fb0_2752x1536.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!LC-R!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc83b8b5b-5ac7-445a-a481-fc88e9840fb0_2752x1536.png 424w, https://substackcdn.com/image/fetch/$s_!LC-R!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc83b8b5b-5ac7-445a-a481-fc88e9840fb0_2752x1536.png 848w, https://substackcdn.com/image/fetch/$s_!LC-R!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc83b8b5b-5ac7-445a-a481-fc88e9840fb0_2752x1536.png 1272w, https://substackcdn.com/image/fetch/$s_!LC-R!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc83b8b5b-5ac7-445a-a481-fc88e9840fb0_2752x1536.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!LC-R!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc83b8b5b-5ac7-445a-a481-fc88e9840fb0_2752x1536.png" width="1456" height="813" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c83b8b5b-5ac7-445a-a481-fc88e9840fb0_2752x1536.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:813,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:4494885,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/172101188?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc83b8b5b-5ac7-445a-a481-fc88e9840fb0_2752x1536.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!LC-R!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc83b8b5b-5ac7-445a-a481-fc88e9840fb0_2752x1536.png 424w, https://substackcdn.com/image/fetch/$s_!LC-R!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc83b8b5b-5ac7-445a-a481-fc88e9840fb0_2752x1536.png 848w, https://substackcdn.com/image/fetch/$s_!LC-R!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc83b8b5b-5ac7-445a-a481-fc88e9840fb0_2752x1536.png 1272w, https://substackcdn.com/image/fetch/$s_!LC-R!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc83b8b5b-5ac7-445a-a481-fc88e9840fb0_2752x1536.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p></p><div><hr></div><h2>More Context &#8800; Good Context</h2><p>To understand why context engineering matters, the critical first step is to build up the the right intuition for <em>how context grows during agent execution</em>. You can do this via <em><strong>context inspection</strong></em>. </p><p>Importantly, recall <a href="https://newsletter.victordibia.com/p/the-agent-execution-loop-how-to-build">the agentic loop</a> - agents address tasks by making LLM calls, interpreting the response, calling tools, and repeating. By default, all previous messages and tool results get appended to the context window for every new LLM call.</p><p>For example, here&#8217;s what happens to a naive agent running a multi-step research task. The agent appends every message and tool result to its context:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;plaintext&quot;,&quot;nodeId&quot;:&quot;c4ae938e-c82e-4b90-b4e7-d700ea2f8a14&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-plaintext">Iteration 1:    888 tokens  (system + user message)
Iteration 2:  3,400 tokens  (+ list_directory result)
Iteration 3:  8,900 tokens  (+ read_file: context.py)
Iteration 4: 14,200 tokens  (+ read_file: compaction.py)
Iteration 5: 18,900 tokens  (+ grep + read_file: _agent.py)</code></pre></div><p><em>Token growth from a PicoAgents benchmark run (debug_investigation task, gpt model, no compaction). Each iteration adds tool results to the message history.</em><br>Interesting, the problem isn&#8217;t just the hard limit on what fits in the context window (most LLMs now support very large windows). Rather, research shows LLMs struggle with large contexts even when they technically fit:</p><ul><li><p><strong><a href="https://arxiv.org/abs/2307.03172">Lost in the middle</a></strong>: Information in the middle of long contexts gets less attention than information at the beginning or end (<a href="https://arxiv.org/abs/2307.03172">Liu et al., 2023</a>)</p></li><li><p><strong>Performance degradation</strong>: Accuracy can drop significantly based on how information is positioned, not just whether it fits</p></li><li><p><strong>Working memory</strong>: The context window functions as the agent&#8217;s working memory. But unlike RAM, adding more data can actively degrade retrieval of existing data. Context engineering curates what stays in that window</p></li></ul><p>More context doesn&#8217;t always mean better results.</p><div><hr></div><h2>What Is Context Engineering?</h2><p>Context engineering is a set of techniques for managing what goes into the context window, <em>as the agent runs</em>. It answers four key questions:</p><ol><li><p><strong>What to include</strong> - Selecting relevant information</p></li><li><p><strong>What to exclude</strong> - Trimming noise and redundancy</p></li><li><p><strong>How to represent it</strong> - Compressing information efficiently</p></li><li><p><strong>Where to put it</strong> - Positioning for model attention</p></li></ol><p><a href="https://x.com/karpathy">Andrej Karpathy</a> describes it as &#8220;the delicate art and science of filling the context window with just the right information for the next step.&#8221; Many agent failures trace back to context problems; the model had the right capabilities, but the wrong information was in (or missing from) the context window.</p><div><hr></div><h2>Three Core Strategies</h2><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!44ET!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3bcc518b-20e4-44cb-a437-1618f695e41b_1376x768.jpeg" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!44ET!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3bcc518b-20e4-44cb-a437-1618f695e41b_1376x768.jpeg 424w, https://substackcdn.com/image/fetch/$s_!44ET!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3bcc518b-20e4-44cb-a437-1618f695e41b_1376x768.jpeg 848w, https://substackcdn.com/image/fetch/$s_!44ET!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3bcc518b-20e4-44cb-a437-1618f695e41b_1376x768.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!44ET!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3bcc518b-20e4-44cb-a437-1618f695e41b_1376x768.jpeg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!44ET!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3bcc518b-20e4-44cb-a437-1618f695e41b_1376x768.jpeg" width="1376" height="768" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3bcc518b-20e4-44cb-a437-1618f695e41b_1376x768.jpeg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:768,&quot;width&quot;:1376,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:334168,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/jpeg&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/172101188?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3bcc518b-20e4-44cb-a437-1618f695e41b_1376x768.jpeg&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!44ET!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3bcc518b-20e4-44cb-a437-1618f695e41b_1376x768.jpeg 424w, https://substackcdn.com/image/fetch/$s_!44ET!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3bcc518b-20e4-44cb-a437-1618f695e41b_1376x768.jpeg 848w, https://substackcdn.com/image/fetch/$s_!44ET!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3bcc518b-20e4-44cb-a437-1618f695e41b_1376x768.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!44ET!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3bcc518b-20e4-44cb-a437-1618f695e41b_1376x768.jpeg 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>We can categorize context engineering techniques into three core strategies:</p><h3>Strategy 1: Compaction (Reactive Trimming)</h3><p>Compaction reduces context by trimming, summarizing, or selectively retaining messages. It happens <em>just before</em> an LLM call in the agentic loop, triggered by a condition.</p><p>From the agent user&#8217;s perspective, you configure a context strategy when creating the agent:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;77c52aec-0c9a-4e99-9b40-5948ab78535e&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">from picoagents import Agent
from picoagents.compaction import HeadTailCompaction, SlidingWindowCompaction

# Agent with head+tail compaction - keeps task definition + recent work
agent = Agent(
    name="investigator",
    instructions="Investigate the codebase and find the bug.",
    model_client=model_client,
    tools=[read_file, list_directory, grep],
    compaction=HeadTailCompaction(
        token_budget=100_000,  # Compact when context exceeds this
        head_ratio=0.2,        # 20% for task context, 80% for recent work
    ),
    max_iterations=20,
)

response = await agent.run("Find why the auth middleware fails on refresh tokens.")</code></pre></div><p>Under the hood, the agent loop applies this strategy before each LLM call:</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;e157908d-5806-4b8b-9088-78514df2a779&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python">while not done:
    # Check if compaction needed before calling the model
    if context_exceeds_budget(messages):  # e.g., &gt; 80% of limit
        messages = compact(messages)

    response = call_llm(messages)
    # ... handle response, execute tools, append results</code></pre></div><p>If you&#8217;ve used coding agents (Claude Code, GitHub Copilot CLI, etc.), you may occasionally see the agent pause, mention &#8220;compacting&#8221;, summarize prior steps, and continue. This the compaction strategy. The agent hit a threshold (e.g., 80% of the context window, or a hard cap like 100k tokens) and triggered compaction before the next LLM call. The budget determines <em>when</em> to compact; the approaches below determine <em>how</em>:</p><p><strong>Sliding Window</strong>: Keep system message + most recent messages that fit in budget. Simplest approach.</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;232f7c76-9acf-4e9f-a562-6017ccad6061&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># From picoagents/compaction.py - SlidingWindowCompaction.compact()
def compact(self, messages):
    current_tokens = self._count_tokens(messages)
    if current_tokens &lt;= self.token_budget:
        return messages  # No compaction needed

    groups = self._find_atomic_groups(messages)  # Keep tool calls with results

    # Always keep system message
    system_groups, system_tokens = [], 0
    if groups and messages[groups[0][0]].role == "system":
        system_groups.append(groups[0])
        system_tokens = self._count_tokens([messages[i] for i in groups[0]])
        groups = groups[1:]

    # Fill from end with remaining budget
    remaining_budget = self.token_budget - system_tokens
    kept_groups, kept_tokens = [], 0

    for group in reversed(groups):
        group_tokens = self._count_tokens([messages[i] for i in group])
        if kept_tokens + group_tokens &lt;= remaining_budget:
            kept_groups.insert(0, group)
            kept_tokens += group_tokens
        else:
            break  # No more room

    kept_indices = set()
    for group in system_groups + kept_groups:
        kept_indices.update(group)
    return [messages[i] for i in sorted(kept_indices)]
</code></pre></div><p><strong>Head+Tail</strong>: Split the budget between head (system prompt, initial task) and tail (recent work). Drop middle messages. This preserves both the task definition and recent progress.</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;41bb38ae-79b0-414a-aaf0-35cb78291e4e&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># From picoagents/compaction.py - HeadTailCompaction.compact()
# Split budget: 20% head (task context), 80% tail (recent work)
head_budget = int(self.token_budget * self.head_ratio)
tail_budget = self.token_budget - head_budget

# Fill head from start, tail from end, drop middle
for group in groups:
    if head_tokens + group_tokens &lt;= head_budget:
        head_groups.append(group)
    else:
        break

for group in reversed(remaining_groups):
    if tail_tokens + group_tokens &lt;= tail_budget:
        tail_groups.insert(0, group)
    else:
        break
</code></pre></div><p><strong>Tool Result Clearing</strong>: Once a tool has been called deep in history, clear the raw result but keep the message structure. Anthropic recently launched this on the <a href="https://www.anthropic.com/engineering/effective-context-engineering-for-ai-agents">Claude Developer Platform</a> - it&#8217;s the lightest-touch form of compaction.</p><p><strong>Summarization</strong>: Instead of dropping old messages, compress them into a summary using a <em>fast</em> model. Preserves information at the cost of an extra LLM call. It can also lack resolution - the summary may omit details needed later. In the benchmark described below, there is the concept of &#8220;thrashing&#8221; - the agent repeats steps because the compaction strategy dropped information it needed, causing it to re-read files or re-run tools.</p><p><strong>Semantic Selection</strong>: Use embeddings to select contextually relevant messages rather than just the most recent. More expensive but can retain relevant older information that sliding window would drop. <a href="https://langchain-ai.github.io/langgraph/how-tos/memory/semantic-search/">LangGraph</a> and <a href="https://github.com/mem0ai/mem0">Mem0</a> provide implementations.</p><p><strong>Limitation</strong>: All compaction is reactive - context grows, then you trim. Even semantic selection responds to accumulated context. If the agent needs something that was trimmed or poorly summarized, that information is gone.</p><h3>Strategy 2: Isolation (Architectural Prevention)</h3><p>Run sub-tasks in separate contexts. Only the summary of steps enters the main agent&#8217;s context.</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;2a6b1b0d-4d07-4620-8ee5-bfaf5f3c6267&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># Define a sub-agent with its own tools and compaction
sub_agent = Agent(
    name="code_reviewer",
    description="Reviews code in a directory",
    instructions="Read all .py files and document classes and functions.",
    model_client=model_client,
    tools=[read_file, list_directory],
    compaction=HeadTailCompaction(token_budget=50_000, head_ratio=0.2),
    max_iterations=20,
)

# Wrap it as a tool &#8212; coordinator delegates, sees only the summary
coordinator = Agent(
    name="coordinator",
    instructions="Delegate each directory to the code_reviewer tool.",
    model_client=model_client,
    tools=[sub_agent.as_tool()],  # Agent becomes a callable tool
    max_iterations=15,
)

response = await coordinator.run("Review the repository.")
# Coordinator context stays small; sub-agent context is discarded</code></pre></div><p><strong>How it works</strong>: A coordinator agent delegates tasks to sub-agents, each running in its own context window. The sub-agents do the heavy lifting &#8212; reading files, calling tools, accumulating context &#8212; and only the final result crosses back. The intermediate work is discarded, so the coordinator&#8217;s context stays bounded regardless of how much work the sub-agents do. In PicoAgents, <code>as_tool()</code> wraps any agent as a callable tool to enable this pattern.</p><p><strong>When to use</strong>:</p><ul><li><p>Tasks involve distinct sub-problems</p></li><li><p>Sub-tasks generate lots of intermediate context</p></li><li><p>You can define clear interfaces (input -&gt; output)</p></li><li><p>You want to parallelize work (multiple specialists can run concurrently)</p></li></ul><p>This same principle applies temporally - some systems restart the agent in fresh sessions, passing only a summary forward instead of the full context. <a href="https://github.com/ksenxx/kiss_ai/blob/main/src/kiss/core/RELENTLESS_AGENT.md">RelentlessAgent</a> takes this approach, running up to 10,000 sequential sub-sessions where each one receives only the original task plus a summary of prior work.</p><h3>Strategy 3: Agentic Memory (External Storage)</h3><p>Give the agent tools to explicitly manage its own memory <em>outside</em> the context window. The agent decides what to save, when to retrieve, and what to forget.</p><p><a href="https://www.anthropic.com/engineering/effective-context-engineering-for-ai-agents">Anthropic</a> calls this &#8220;structured note-taking&#8221; - the agent writes notes persisted to external storage, then retrieves them when needed. Think of it like maintaining a <code>NOTES.md</code> file. This enables tracking progress across complex work without keeping everything in active context.</p><div class="highlighted_code_block" data-attrs="{&quot;language&quot;:&quot;python&quot;,&quot;nodeId&quot;:&quot;047b7f01-a68b-4491-9691-4d099f927636&quot;}" data-component-name="HighlightedCodeBlockToDOM"><pre class="shiki"><code class="language-python"># From picoagents/tools/_context_tools.py - TodoWriteTool
# Agent persists progress to disk, outside the context window

def _save_todos(todos):
    path = _get_todo_path()  # .picoagents/todos/session_*.json
    path.parent.mkdir(parents=True, exist_ok=True)
    data = {
        "session_id": _get_session_id(),
        "updated_at": datetime.now().isoformat(),
        "todos": todos,
    }
    path.write_text(json.dumps(data, indent=2))

# Agent calls todo_write to track progress externally
# This keeps task state out of the context window entirely
</code></pre></div><p><strong>Key difference from compaction</strong>: Compaction trims what&#8217;s already in context. Agentic memory moves information <em>outside</em> the context entirely - the agent retrieves it on demand.</p><p><a href="https://www.anthropic.com/engineering/equipping-agents-for-the-real-world-with-agent-skills">Agent Skills</a> work the same way but for procedural knowledge instead of facts. A skill description costs ~100 tokens at startup; the full instructions (~2,000 tokens) only enter context when the task matches. Agentic memory stores what the agent learned (facts, findings, progress); skills store how the agent should act (procedures, workflows). Both keep information outside the context window until it&#8217;s needed. I walk through <a href="https://newsletter.victordibia.com/p/implementing-claude-code-skills-from">implementing skills from scratch</a> in a separate post.</p><p><strong>When to use</strong>:</p><ul><li><p>Long-running tasks where information needs to persist</p></li><li><p>Tasks where the agent needs to selectively recall specific facts</p></li><li><p>You want the agent to decide what&#8217;s worth remembering</p></li></ul><div><hr></div><h2>Benchmark Comparison</h2><p>To illustrate the tradeoffs, I ran a code review task across five agent configurations using PicoAgents&#8217; evaluation system. The task: review a 44-file Python repository, read every <code>.py</code> file, document all classes and functions, and produce a quality assessment. All agents use the same model (gpt-4.1-mini), the same two tools (read_file, list_directory), and the same system prompt. The only variables are context strategy and whether a completion hook is enabled.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!OtOp!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed9c9423-3e67-4779-9541-8d641a36117a_1678x1061.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!OtOp!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed9c9423-3e67-4779-9541-8d641a36117a_1678x1061.png 424w, https://substackcdn.com/image/fetch/$s_!OtOp!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed9c9423-3e67-4779-9541-8d641a36117a_1678x1061.png 848w, https://substackcdn.com/image/fetch/$s_!OtOp!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed9c9423-3e67-4779-9541-8d641a36117a_1678x1061.png 1272w, https://substackcdn.com/image/fetch/$s_!OtOp!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed9c9423-3e67-4779-9541-8d641a36117a_1678x1061.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!OtOp!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed9c9423-3e67-4779-9541-8d641a36117a_1678x1061.png" width="1456" height="921" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/ed9c9423-3e67-4779-9541-8d641a36117a_1678x1061.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:921,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:245943,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/172101188?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed9c9423-3e67-4779-9541-8d641a36117a_1678x1061.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!OtOp!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed9c9423-3e67-4779-9541-8d641a36117a_1678x1061.png 424w, https://substackcdn.com/image/fetch/$s_!OtOp!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed9c9423-3e67-4779-9541-8d641a36117a_1678x1061.png 848w, https://substackcdn.com/image/fetch/$s_!OtOp!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed9c9423-3e67-4779-9541-8d641a36117a_1678x1061.png 1272w, https://substackcdn.com/image/fetch/$s_!OtOp!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fed9c9423-3e67-4779-9541-8d641a36117a_1678x1061.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>The five configurations:</p><ul><li><p><strong>No Compaction</strong>: Full context carried forward, no trimming. Completion hook forces the agent to keep working until the task is done (up to 50 iterations).</p></li><li><p><strong>HeadTail 8k</strong>: Head+tail compaction with an 8k token budget (20% head, 80% tail). Same completion hook.</p></li><li><p><strong>HeadTail 8k (no hook)</strong>: Same 8k compaction, but no completion hook; the agent stops when it does not call any additional tools.</p></li><li><p><strong>HeadTail 15k</strong>: Head+tail with a larger 15k token budget. Completion hook enabled.</p></li><li><p><strong>Isolation</strong>: A coordinator delegates to sub-agents via <code>as_tool()</code>. Each sub-agent has its own 50k HeadTail budget. No completion hook on the coordinator.</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!SnpG!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde9f7de5-58da-488a-840b-61e87c8b4bcc_1444x446.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!SnpG!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde9f7de5-58da-488a-840b-61e87c8b4bcc_1444x446.png 424w, https://substackcdn.com/image/fetch/$s_!SnpG!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde9f7de5-58da-488a-840b-61e87c8b4bcc_1444x446.png 848w, https://substackcdn.com/image/fetch/$s_!SnpG!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde9f7de5-58da-488a-840b-61e87c8b4bcc_1444x446.png 1272w, https://substackcdn.com/image/fetch/$s_!SnpG!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde9f7de5-58da-488a-840b-61e87c8b4bcc_1444x446.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!SnpG!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde9f7de5-58da-488a-840b-61e87c8b4bcc_1444x446.png" width="1444" height="446" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/de9f7de5-58da-488a-840b-61e87c8b4bcc_1444x446.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:446,&quot;width&quot;:1444,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:71205,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/172101188?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde9f7de5-58da-488a-840b-61e87c8b4bcc_1444x446.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!SnpG!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde9f7de5-58da-488a-840b-61e87c8b4bcc_1444x446.png 424w, https://substackcdn.com/image/fetch/$s_!SnpG!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde9f7de5-58da-488a-840b-61e87c8b4bcc_1444x446.png 848w, https://substackcdn.com/image/fetch/$s_!SnpG!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde9f7de5-58da-488a-840b-61e87c8b4bcc_1444x446.png 1272w, https://substackcdn.com/image/fetch/$s_!SnpG!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde9f7de5-58da-488a-840b-61e87c8b4bcc_1444x446.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Results from a single run per config using PicoAgents evaluation (handtracking repository, 44 .py files). Quality scored by an LLM judge (gpt-5.2-chat) on completeness and actionability. All hooked agents use LLMCompletionCheckHook with max_restarts=5</figcaption></figure></div><blockquote><p>The completion hook fires when the agent would normally stop (no more tool calls). It summarizes the conversation so far, asks a judge LLM &#8220;is this task complete?&#8221;, and if not, injects a message telling the agent to continue. This gives the agent <em>cognitive durability</em> &#8212; the ability to persist through long tasks &#8212; but adds iterations (and tokens) each time it restarts.</p></blockquote><p><strong>A note on evaluation</strong>: Quality scores come from an LLM-as-judge, which introduces its own variance. Building confidence in these numbers requires inspecting the judge&#8217;s rationale for each score, ensuring the judge sees the full agent trace, and providing ground truth references (e.g., the repository has 44 .py files; did the agent find them?). With single runs, treat the scores as directional signals, not precise measurements. I&#8217;ll cover LLM judge design in detail in a future post.</p><p>Several patterns emerge:</p><p><strong>No compaction works, but it&#8217;s expensive.</strong> NoCompaction scored highest (6.0) by brute-forcing - carrying full history across 50 iterations, 915k tokens. The agent never forgets what it read, so it avoids re-reads (only 27% duplication). The tradeoff is cost: 2-6x more tokens than compacted agents, and 22 minutes of wall time. </p>
      <p>
          <a href="https://newsletter.victordibia.com/p/context-engineering-101-how-agents">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[Implementing Claude Code Skills from Scratch]]></title><description><![CDATA[#58 | The pattern behind Claude Code's extensibility - and a working implementation in 100 lines of Python.]]></description><link>https://newsletter.victordibia.com/p/implementing-claude-code-skills-from</link><guid isPermaLink="false">https://newsletter.victordibia.com/p/implementing-claude-code-skills-from</guid><dc:creator><![CDATA[Victor Dibia, PhD]]></dc:creator><pubDate>Wed, 25 Feb 2026 13:31:36 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!-xd2!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc35516d8-aacf-4666-8f07-898cd5f99bc6_1693x945.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!-xd2!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc35516d8-aacf-4666-8f07-898cd5f99bc6_1693x945.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!-xd2!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc35516d8-aacf-4666-8f07-898cd5f99bc6_1693x945.png 424w, https://substackcdn.com/image/fetch/$s_!-xd2!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc35516d8-aacf-4666-8f07-898cd5f99bc6_1693x945.png 848w, https://substackcdn.com/image/fetch/$s_!-xd2!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc35516d8-aacf-4666-8f07-898cd5f99bc6_1693x945.png 1272w, https://substackcdn.com/image/fetch/$s_!-xd2!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc35516d8-aacf-4666-8f07-898cd5f99bc6_1693x945.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!-xd2!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc35516d8-aacf-4666-8f07-898cd5f99bc6_1693x945.png" width="1456" height="813" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c35516d8-aacf-4666-8f07-898cd5f99bc6_1693x945.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:813,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:487560,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/173884733?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc35516d8-aacf-4666-8f07-898cd5f99bc6_1693x945.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!-xd2!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc35516d8-aacf-4666-8f07-898cd5f99bc6_1693x945.png 424w, https://substackcdn.com/image/fetch/$s_!-xd2!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc35516d8-aacf-4666-8f07-898cd5f99bc6_1693x945.png 848w, https://substackcdn.com/image/fetch/$s_!-xd2!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc35516d8-aacf-4666-8f07-898cd5f99bc6_1693x945.png 1272w, https://substackcdn.com/image/fetch/$s_!-xd2!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc35516d8-aacf-4666-8f07-898cd5f99bc6_1693x945.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">The execution flow of a Skill. Skill metadata sits in the system prompt, allowing the agent to lazily load the full instruction payload only when triggered by the user's intent.</figcaption></figure></div><p>Agents can act by <a href="https://newsletter.victordibia.com/p/the-arc-of-agent-action-from-code">calling tools, executing code, or following instructions</a>, but doing so efficiently has massive implications for <a href="https://newsletter.victordibia.com/p/the-arc-of-agent-action-from-code">context window usage and security</a>. One approach now supported by Claude Code, Cursor, Gemini CLI, and a growing list of <a href="https://newsletter.victordibia.com/p/the-agent-execution-loop-how-to-build">coding agents</a> is <strong>skills</strong>. A skill is a folder of instructions and scripts that an agent can discover and load on demand. Unlike a CLAUDE.md or system prompt that sits in context permanently, skills extend context only when needed - the agent sees a one-line description at startup (~100 tokens) and pulls in full instructions only when the task matches (~2,000 tokens). With 20 skills, that's ~2,000 tokens at startup instead of ~40,000. You define a <code>SKILL.md</code> file once, and any compatible agent can use it.</p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;818b17ab-6557-4bfe-be90-60450eca87aa&quot;,&quot;caption&quot;:&quot;Anthropic recently introduced Agent Skills [1]&#8212;folders containing a SKILLS.md file with instructions, scripts, and resources that agents can discover and load dynamically. The model reads the skill when relevant, executes bundled code, and solves problems using complex control flow rather than chaining discrete tool calls.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;The Arc of Agent Action from Code to Tools and Back to Code - And Why Anthropic's SKILLS.md is Not New&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:85692678,&quot;name&quot;:&quot;Victor Dibia, PhD&quot;,&quot;bio&quot;:&quot;Software Engineer, Researcher (Microsoft), Best Selling Author working on and writing about Generative AI, Agents. Core maintainer for the AutoGen Framework (50k Stars on GitHub). Previously at Cloudera, IBM Research. All views are my own.&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/27536f63-7d8e-48dc-b34c-150acfacdc8b_1726x1396.jpeg&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2026-01-05T13:31:37.483Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!p1g3!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f76085e-5b6a-4823-b948-8935a6326407_1376x768.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://newsletter.victordibia.com/p/the-arc-of-agent-action-from-code&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:174707398,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:13,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1253044,&quot;publication_name&quot;:&quot;Designing with AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1FgP!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fff160652-8bbb-475e-80a7-ba4eb5f80dcb_504x504.png&quot;,&quot;belowTheFold&quot;:false,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><p>Importantly, this pattern is now an <a href="https://agentskills.io/">open standard</a> adopted by 25+ tools - you can define a skills folder with a <code>SKILL.md</code> file once, and any compatible agent can use it. </p><p>How does it work under the hood? And how would you implement skills support to your own agent (in any framework of your choice)? Well, that&#8217;s what this post attempts to lay out.</p><p>I will also share a complete implementation of a <code>SkillsTool</code> in <a href="https://github.com/victordibia/designing-multiagent-systems">PicoAgents</a>, the companion framework from <a href="https://multiagentbook.com/">Designing Multi-Agent Systems</a> book. If you are interested in learning about how agents or agent frameworks work, consider grabbing a copy of the book!</p><blockquote><p><em>This post is adapted from <a href="https://multiagentbook.com/">Designing Multi-Agent Systems</a>. The book covers tool design, context management, and agent orchestration patterns in more depth with complete implementation code.</em></p></blockquote><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!hrra!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa28fea22-e718-4a42-ad0e-d154e6176e7e_2752x1536.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!hrra!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa28fea22-e718-4a42-ad0e-d154e6176e7e_2752x1536.png 424w, https://substackcdn.com/image/fetch/$s_!hrra!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa28fea22-e718-4a42-ad0e-d154e6176e7e_2752x1536.png 848w, https://substackcdn.com/image/fetch/$s_!hrra!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa28fea22-e718-4a42-ad0e-d154e6176e7e_2752x1536.png 1272w, https://substackcdn.com/image/fetch/$s_!hrra!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa28fea22-e718-4a42-ad0e-d154e6176e7e_2752x1536.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!hrra!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa28fea22-e718-4a42-ad0e-d154e6176e7e_2752x1536.png" width="1456" height="813" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a28fea22-e718-4a42-ad0e-d154e6176e7e_2752x1536.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:813,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:4494885,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/173884733?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa28fea22-e718-4a42-ad0e-d154e6176e7e_2752x1536.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!hrra!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa28fea22-e718-4a42-ad0e-d154e6176e7e_2752x1536.png 424w, https://substackcdn.com/image/fetch/$s_!hrra!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa28fea22-e718-4a42-ad0e-d154e6176e7e_2752x1536.png 848w, https://substackcdn.com/image/fetch/$s_!hrra!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa28fea22-e718-4a42-ad0e-d154e6176e7e_2752x1536.png 1272w, https://substackcdn.com/image/fetch/$s_!hrra!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa28fea22-e718-4a42-ad0e-d154e6176e7e_2752x1536.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div><hr></div><h2>What Is a Skill?</h2><p>A skill is a folder containing a <code>SKILL.md</code> file with two parts: YAML frontmatter (metadata) and markdown content (instructions).</p><p>Here&#8217;s a commit skill based on <a href="https://code.claude.com/docs/en/skills">Claude Code&#8217;s built-in skills</a>:</p><pre><code><code>---
name: commit
description: Create a git commit
allowed-tools: Bash(git add:*), Bash(git status:*), Bash(git commit:*)
---

## Context

- Current git status: !`git status`
- Current git diff: !`git diff HEAD`
- Current branch: !`git branch --show-current`
- Recent commits: !`git log --oneline -10`

## Your task

Based on the above changes, create a single git commit.
Stage and create the commit using a single message.
Do not use any other tools or do anything else.
</code></code></pre><p>That&#8217;s the entire skill. A skill is:</p><ol><li><p><strong>Metadata</strong> (frontmatter) - name, description, and optionally which tools it needs</p></li><li><p><strong>Instructions</strong> (body) - what the agent should do when the skill is activated</p></li></ol><p>The directory structure looks like this:</p><pre><code><code>skills/
&#9500;&#9472;&#9472; commit/
&#9474;   &#9492;&#9472;&#9472; SKILL.md
&#9500;&#9472;&#9472; code-review/
&#9474;   &#9500;&#9472;&#9472; SKILL.md
&#9474;   &#9500;&#9472;&#9472; examples/
&#9474;   &#9474;   &#9492;&#9472;&#9472; sample-review.md
&#9474;   &#9492;&#9472;&#9472; scripts/
&#9474;       &#9492;&#9472;&#9472; diff-analyzer.py
&#9492;&#9472;&#9472; debug/
    &#9492;&#9472;&#9472; SKILL.md
</code></code></pre><p>Each skill is self-contained. The <code>SKILL.md</code> is required; everything else - scripts, templates, reference docs - is optional.</p><div><hr></div><h2>Instructions vs Skills vs Tools</h2><p>While all three are ways to extend what the agent can do, there are differences in terms of when they are loaded, the cost in terms of context usage and execution patters. Examples of each </p><ul><li><p>Instructions:  &#8220;always do X.&#8221;; </p></li><li><p>Tools: <code>get_weather(city)</code>. </p></li><li><p>Skill: &#8220;when the user asks for a code review, follow this checklist and use these tools.&#8221; </p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!kZZs!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9407ea9-e0ca-4a1b-af49-be002af99c82_1368x702.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!kZZs!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9407ea9-e0ca-4a1b-af49-be002af99c82_1368x702.png 424w, https://substackcdn.com/image/fetch/$s_!kZZs!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9407ea9-e0ca-4a1b-af49-be002af99c82_1368x702.png 848w, https://substackcdn.com/image/fetch/$s_!kZZs!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9407ea9-e0ca-4a1b-af49-be002af99c82_1368x702.png 1272w, https://substackcdn.com/image/fetch/$s_!kZZs!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9407ea9-e0ca-4a1b-af49-be002af99c82_1368x702.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!kZZs!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9407ea9-e0ca-4a1b-af49-be002af99c82_1368x702.png" width="1368" height="702" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b9407ea9-e0ca-4a1b-af49-be002af99c82_1368x702.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:702,&quot;width&quot;:1368,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:122917,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/173884733?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9407ea9-e0ca-4a1b-af49-be002af99c82_1368x702.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!kZZs!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9407ea9-e0ca-4a1b-af49-be002af99c82_1368x702.png 424w, https://substackcdn.com/image/fetch/$s_!kZZs!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9407ea9-e0ca-4a1b-af49-be002af99c82_1368x702.png 848w, https://substackcdn.com/image/fetch/$s_!kZZs!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9407ea9-e0ca-4a1b-af49-be002af99c82_1368x702.png 1272w, https://substackcdn.com/image/fetch/$s_!kZZs!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9407ea9-e0ca-4a1b-af49-be002af99c82_1368x702.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Skills are useful when a task requires deep domain knowledge or multi-step orchestration that the agent may or may not encounter during a run - too detailed and specialized for primary context, but too complex for a single tool call. They sit between global instructions and atomic tools: task-specific prompts that load into context only when relevant.</p><p>Skills can also bundle scripts, reference docs, and assets alongside the SKILL.md - the instructions tell the agent when and how to use them.</p><div><hr></div><h2>Progressive Disclosure</h2><p>Skills use a <strong>three-tier loading strategy</strong> to manage context efficiently. From the <a href="https://agentskills.io/specification">Agent Skills specification</a>:</p><blockquote><ol><li><p><strong>Metadata (~100 tokens)</strong>: The <code>name</code> and <code>description</code> fields are loaded at startup for all skills</p></li><li><p><strong>Instructions (&lt; 5,000 tokens recommended)</strong>: The full <code>SKILL.md</code> body is loaded when the skill is activated</p></li><li><p><strong>Resources (as needed)</strong>: Files in <code>scripts/</code>, <code>references/</code>, or <code>assets/</code> are loaded only when required</p></li></ol></blockquote><p>This is progressive disclosure applied to agent context. At startup, the agent knows <em>what skills exist</em> (just names and descriptions) as these are added to the context on load. Only when a skill is needed does the full content load into the conversation.</p><p>From <a href="https://code.claude.com/docs/en/skills">Claude Code&#8217;s documentation</a>:</p><blockquote><p>&#8220;In a regular session, skill descriptions are loaded into context so Claude knows what&#8217;s available, but <strong>full skill content only loads when invoked</strong>.&#8221;</p></blockquote><p>Why does this matter? Because context is expensive. If you have 20 skills averaging 2,000 tokens each, eagerly loading all of them would consume 40,000 tokens before the agent does anything. With progressive disclosure, you spend ~2,000 tokens on metadata (100 tokens x 20 skills) and only load the ~2,000 tokens of instructions for the skill that&#8217;s actually needed.</p><div><hr></div><h2>How Skills Are Implemented</h2><p>From the <a href="https://agentskills.io/integrate-skills">Agent Skills integration guide</a>:</p><p><strong>Skills have two parts: metadata injection and a load tool.</strong></p><p>At startup, skill names and descriptions are injected into the system prompt so the model knows what&#8217;s available:</p><pre><code><code>&lt;available_skills&gt;
  &lt;skill&gt;
    &lt;name&gt;code-review&lt;/name&gt;
    &lt;description&gt;Review code changes for bugs, security issues, and improvements&lt;/description&gt;
  &lt;/skill&gt;
  &lt;skill&gt;
    &lt;name&gt;debug&lt;/name&gt;
    &lt;description&gt;Systematic approach to debugging errors and unexpected behavior&lt;/description&gt;
  &lt;/skill&gt;
&lt;/available_skills&gt;
</code></code></pre><p>The model also gets a <code>skills</code> tool (or <code>Skill</code> in Claude Code) with a <strong>load</strong> operation. When a task matches a skill, the model calls <code>skills(action='load', name='code-review')</code> and gets the full SKILL.md body back as a tool result - instructions that now appear in context for the model to follow.</p><div><hr></div><h2>Implementing Skills in Your Agent</h2><p>The implementation has three parts: a SKILL.md parser, a discovery mechanism, and a tool the agent can call.</p><p> </p><h3>Parsing, Discovery, and the Skills Tool</h3><p>The SKILL.md format is what we saw above - YAML frontmatter + markdown body. The implementation needs to parse that, scan directories for skill folders, and expose a tool the agent can call. In pseudocode:</p><pre><code><code>class SkillsTool:
    def discover(skills_path) -&gt; {name: (path, metadata)}:
        # scan directories for folders containing SKILL.md
        # parse each: split on "---", extract name + description

    def get_system_prompt_section() -&gt; str:
        # inject skill metadata into system prompt at startup
        # "Available Skills: code-review: Review code changes..."

    def execute(action, name=""):
        if action == "load":
            return full SKILL.md body  # ~2,000 tokens, loaded on demand
</code></code></pre><p>The full implementation is in <code>SkillsTool</code> (~80 lines of Python).</p><h3>Wiring It to an Agent</h3><p>The tool gets added to the agent like any other tool. At startup, <code>get_system_prompt_section()</code> appends skill metadata to the agent&#8217;s instructions so the model knows what&#8217;s available without an extra tool call:</p><pre><code><code>from picoagents import Agent
from picoagents.tools import SkillsTool

skills_tool = SkillsTool(
    builtin_path=Path("./skills"),         # shipped with your app
    project_path=Path("./.claude/skills"),  # project-specific
)

agent = Agent(
    name="assistant",
    instructions="You are a helpful assistant.\n" + skills_tool.get_system_prompt_section(),
    model_client=client,
    tools=[skills_tool, *other_tools],
)
</code></code></pre><p>When the user says &#8220;review this code,&#8221; the model sees <code>code-review</code> in its system prompt, calls <code>skills(action='load', name='code-review')</code>, gets the full instructions back as a tool result, and follows them.</p><div><hr></div><h2>What This Looks Like in Practice</h2><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!7Hlr!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbabf8c8-6e05-4185-8b00-d1d737bf7dac_1408x768.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!7Hlr!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbabf8c8-6e05-4185-8b00-d1d737bf7dac_1408x768.png 424w, https://substackcdn.com/image/fetch/$s_!7Hlr!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbabf8c8-6e05-4185-8b00-d1d737bf7dac_1408x768.png 848w, https://substackcdn.com/image/fetch/$s_!7Hlr!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbabf8c8-6e05-4185-8b00-d1d737bf7dac_1408x768.png 1272w, https://substackcdn.com/image/fetch/$s_!7Hlr!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbabf8c8-6e05-4185-8b00-d1d737bf7dac_1408x768.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!7Hlr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbabf8c8-6e05-4185-8b00-d1d737bf7dac_1408x768.png" width="1408" height="768" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/dbabf8c8-6e05-4185-8b00-d1d737bf7dac_1408x768.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:768,&quot;width&quot;:1408,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:417233,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/173884733?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbabf8c8-6e05-4185-8b00-d1d737bf7dac_1408x768.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!7Hlr!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbabf8c8-6e05-4185-8b00-d1d737bf7dac_1408x768.png 424w, https://substackcdn.com/image/fetch/$s_!7Hlr!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbabf8c8-6e05-4185-8b00-d1d737bf7dac_1408x768.png 848w, https://substackcdn.com/image/fetch/$s_!7Hlr!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbabf8c8-6e05-4185-8b00-d1d737bf7dac_1408x768.png 1272w, https://substackcdn.com/image/fetch/$s_!7Hlr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbabf8c8-6e05-4185-8b00-d1d737bf7dac_1408x768.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><br>A concrete trace:</p><p><strong>1. Agent starts.</strong> Skill metadata is injected into the system prompt (~100 tokens per skill). The model sees skill names and descriptions from the first turn.</p><p><strong>2. User asks:</strong> &#8220;Can you review the changes I made?&#8221;</p><p><strong>3. Model loads the matching skill.</strong> It already knows <code>code-review</code> exists from the system prompt. Calls <code>skills(action='load', name='code-review')</code>. Gets back the full review checklist - correctness, security, performance, maintainability - plus the review format template (~1,000 tokens).</p><p><strong>4. Model follows the instructions.</strong> It reads the diff, checks each file against the checklist, and produces a structured review with severity levels.</p><p>The load in step 3 only happens when needed. If the user had asked a simple question, no skill would load and those ~1,000 tokens are never spent.</p>
      <p>
          <a href="https://newsletter.victordibia.com/p/implementing-claude-code-skills-from">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[Use Coding Agents (Claude Code) to Build Your Product. Don't Make Them Your Product.]]></title><description><![CDATA[#57 | On where general-purpose coding agents like Claude Code shine, where they break, and how to integrate them into your workflow.]]></description><link>https://newsletter.victordibia.com/p/use-coding-agents-claude-code-to</link><guid isPermaLink="false">https://newsletter.victordibia.com/p/use-coding-agents-claude-code-to</guid><dc:creator><![CDATA[Victor Dibia, PhD]]></dc:creator><pubDate>Wed, 18 Feb 2026 13:30:25 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!ACpM!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F44ab17a4-3289-4030-aa0c-5e43e511c4ad_1678x1195.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!ACpM!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F44ab17a4-3289-4030-aa0c-5e43e511c4ad_1678x1195.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!ACpM!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F44ab17a4-3289-4030-aa0c-5e43e511c4ad_1678x1195.png 424w, https://substackcdn.com/image/fetch/$s_!ACpM!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F44ab17a4-3289-4030-aa0c-5e43e511c4ad_1678x1195.png 848w, https://substackcdn.com/image/fetch/$s_!ACpM!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F44ab17a4-3289-4030-aa0c-5e43e511c4ad_1678x1195.png 1272w, https://substackcdn.com/image/fetch/$s_!ACpM!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F44ab17a4-3289-4030-aa0c-5e43e511c4ad_1678x1195.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!ACpM!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F44ab17a4-3289-4030-aa0c-5e43e511c4ad_1678x1195.png" width="1456" height="1037" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/44ab17a4-3289-4030-aa0c-5e43e511c4ad_1678x1195.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1037,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1579228,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/179010845?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F44ab17a4-3289-4030-aa0c-5e43e511c4ad_1678x1195.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!ACpM!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F44ab17a4-3289-4030-aa0c-5e43e511c4ad_1678x1195.png 424w, https://substackcdn.com/image/fetch/$s_!ACpM!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F44ab17a4-3289-4030-aa0c-5e43e511c4ad_1678x1195.png 848w, https://substackcdn.com/image/fetch/$s_!ACpM!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F44ab17a4-3289-4030-aa0c-5e43e511c4ad_1678x1195.png 1272w, https://substackcdn.com/image/fetch/$s_!ACpM!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F44ab17a4-3289-4030-aa0c-5e43e511c4ad_1678x1195.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>General-purpose coding <a href="https://newsletter.victordibia.com/p/the-agent-execution-loop-how-to-build">agents</a> have shown remarkable versatility, moving well beyond coding and into multiple domains. Claude Code now powers everything from software engineering to data analysis to desktop automation. OpenAI&#8217;s Codex has evolved from a cloud coding agent into a full desktop app with Skills, Automations, and multi-channel integration. Both companies are aggressively embedding these tools into enterprise workflows - Anthropic through partnerships with Accenture and Goldman Sachs, OpenAI through deep OS-level integration.</p><p>And then there&#8217;s  <a href="https://github.com/openclaw/openclaw">OpenClaw</a>. In the span of a few weeks, <a href="https://github.com/openclaw/openclaw">OpenClaw</a>, the open-source personal AI agent created by Peter Steinberger (originally called Clawdbot, briefly MoltBot), went from an indie project to <a href="https://pitchwall.co/blog/openclaw-explained-the-viral-open-source-ai-agent-with-100k-github-stars">190,000+ GitHub stars</a>, deployments across Silicon Valley and China, and a social network built entirely by and for AI agents (<a href="https://www.nbcnews.com/tech/tech-news/ai-agents-social-media-platform-moltbook-rcna256738">MoltBook</a>, with over a million AI agents). On February 14, <a href="https://techcrunch.com/2026/02/15/openclaw-creator-peter-steinberger-joins-openai/">Steinberger announced he&#8217;s joining OpenAI</a> and moving the project to an open-source foundation.  </p><blockquote><p>All of this suggests we&#8217;re at an inflection point.</p></blockquote><p>Naturally, for many teams, there are real concerns: <strong>How do we respond to this</strong>? How disruptive is this for our business? <em>Should we ship capable general-purpose agents as part of our product</em>?</p><p>TLDR; <em><strong>You probably shouldn&#8217;t ship them as your product</strong></em> - but you should absolutely use them to <em>build</em> your product (adds security issues, often your business goal can be accomplished without the full agent capabilities). Here&#8217;s why, and what your team should do instead - use them to <em>build faster</em>, ship <em>agentic workflows with deterministic steps</em> (not fully autonomous agents) as part of your product.</p><blockquote><p><em>This post is adapted from themes in <a href="https://multiagentbook.com/">Designing Multi-Agent Systems</a>. The book covers agentic workflow design, orchestration patterns, and evaluation in depth.</em></p></blockquote><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!oRT4!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4367ab58-9209-4df4-a147-0efbda1974e3_2752x1536.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!oRT4!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4367ab58-9209-4df4-a147-0efbda1974e3_2752x1536.png 424w, https://substackcdn.com/image/fetch/$s_!oRT4!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4367ab58-9209-4df4-a147-0efbda1974e3_2752x1536.png 848w, https://substackcdn.com/image/fetch/$s_!oRT4!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4367ab58-9209-4df4-a147-0efbda1974e3_2752x1536.png 1272w, https://substackcdn.com/image/fetch/$s_!oRT4!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4367ab58-9209-4df4-a147-0efbda1974e3_2752x1536.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!oRT4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4367ab58-9209-4df4-a147-0efbda1974e3_2752x1536.png" width="1456" height="813" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4367ab58-9209-4df4-a147-0efbda1974e3_2752x1536.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:813,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:4494885,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/179010845?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4367ab58-9209-4df4-a147-0efbda1974e3_2752x1536.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!oRT4!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4367ab58-9209-4df4-a147-0efbda1974e3_2752x1536.png 424w, https://substackcdn.com/image/fetch/$s_!oRT4!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4367ab58-9209-4df4-a147-0efbda1974e3_2752x1536.png 848w, https://substackcdn.com/image/fetch/$s_!oRT4!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4367ab58-9209-4df4-a147-0efbda1974e3_2752x1536.png 1272w, https://substackcdn.com/image/fetch/$s_!oRT4!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4367ab58-9209-4df4-a147-0efbda1974e3_2752x1536.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p></p><h2>Two Problems With Deploying General-Purpose Agents</h2><p>The common thread across Claude Code, Codex, OpenClaw, and the rest is that they&#8217;re tools designed to work for an <em>individual user</em> and enable <em>individual productivity</em>. The user and the agent share the same goals and risk profile. That&#8217;s exactly what breaks when you try to make the agent serve customers on behalf of a business.</p><h3>The Security Problem</h3><p>This is often under-discussed. Tools like Claude Code, Codex, and OpenClaw operate within the boundary of a trusted local environment. They have access to your file system. They can run arbitrary commands. They can read your email, manage your calendar, browse the web on your behalf. That&#8217;s where the <em>magic</em> comes from.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!roaQ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F30dd5fbc-010e-4e07-878e-50aced6b48b4_1920x1920.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!roaQ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F30dd5fbc-010e-4e07-878e-50aced6b48b4_1920x1920.png 424w, https://substackcdn.com/image/fetch/$s_!roaQ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F30dd5fbc-010e-4e07-878e-50aced6b48b4_1920x1920.png 848w, https://substackcdn.com/image/fetch/$s_!roaQ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F30dd5fbc-010e-4e07-878e-50aced6b48b4_1920x1920.png 1272w, https://substackcdn.com/image/fetch/$s_!roaQ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F30dd5fbc-010e-4e07-878e-50aced6b48b4_1920x1920.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!roaQ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F30dd5fbc-010e-4e07-878e-50aced6b48b4_1920x1920.png" width="1456" height="1456" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/30dd5fbc-010e-4e07-878e-50aced6b48b4_1920x1920.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1456,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!roaQ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F30dd5fbc-010e-4e07-878e-50aced6b48b4_1920x1920.png 424w, https://substackcdn.com/image/fetch/$s_!roaQ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F30dd5fbc-010e-4e07-878e-50aced6b48b4_1920x1920.png 848w, https://substackcdn.com/image/fetch/$s_!roaQ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F30dd5fbc-010e-4e07-878e-50aced6b48b4_1920x1920.png 1272w, https://substackcdn.com/image/fetch/$s_!roaQ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F30dd5fbc-010e-4e07-878e-50aced6b48b4_1920x1920.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Trying to replicate that magic as part of a service is a fundamentally different proposition. If you sandbox the agent, you cut off the access that makes it useful. Meta&#8217;s <a href="https://ai.meta.com/blog/practical-ai-agent-security/">Agents Rule of Two</a> - inspired by Chromium&#8217;s security policies - offers a useful framework: until robust defenses against prompt injection exist, agents should satisfy no more than two of these three properties:</p><ul><li><p><strong>Process untrustworthy inputs</strong> - data from unknown sources (user prompts, emails, web content)</p></li><li><p><strong>Access sensitive systems</strong> - production databases, credentials, personal data</p></li><li><p><strong>Change state or communicate externally</strong> - send emails, execute transactions, modify files</p></li></ul><p>General-purpose agents inherently want all three. (I covered the security implications of this in more detail in <a href="https://newsletter.victordibia.com/p/the-arc-of-agent-action-from-code">The Arc of Agent Action</a>.)</p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;feaf4fc3-9649-4b0b-94e1-dde9fd140f64&quot;,&quot;caption&quot;:&quot;Anthropic recently introduced Agent Skills [1]&#8212;folders containing a SKILLS.md file with instructions, scripts, and resources that agents can discover and load dynamically. The model reads the skill when relevant, executes bundled code, and solves problems using complex control flow rather than chaining discrete tool calls.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;The Arc of Agent Action from Code to Tools and Back to Code - And Why Anthropic's SKILLS.md is Not New&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:85692678,&quot;name&quot;:&quot;Victor Dibia, PhD&quot;,&quot;bio&quot;:&quot;Software Engineer, Researcher (Microsoft), Best Selling Author working on and writing about Generative AI, Agents. Core maintainer for the AutoGen Framework (50k Stars on GitHub). Previously at Cloudera, IBM Research. All views are my own.&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/27536f63-7d8e-48dc-b34c-150acfacdc8b_1726x1396.jpeg&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2026-01-05T13:31:37.483Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!p1g3!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f76085e-5b6a-4823-b948-8935a6326407_1376x768.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://newsletter.victordibia.com/p/the-arc-of-agent-action-from-code&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:174707398,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:13,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1253044,&quot;publication_name&quot;:&quot;Designing with AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1FgP!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fff160652-8bbb-475e-80a7-ba4eb5f80dcb_504x504.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><p>Local agents get away with it because they operate inside the user&#8217;s own trust boundary. The user is the admin. The threat model is yourself. But the moment you move that agent to a server, the &#8220;user&#8221; becomes an attacker sending untrusted prompts, and the &#8220;file system&#8221; becomes your production database. The security model collapses.</p><p>OpenClaw is the clearest illustration of this tension. It's extraordinarily capable, and the security community's response has been proportionally alarmed. <a href="https://www.paloaltonetworks.com/blog/network-security/why-moltbot-may-signal-ai-crisis/">Palo Alto Networks called it a "lethal trifecta"</a> of risks - access to private data, exposure to untrusted content, and the ability to communicate externally while retaining persistent memory. Censys identified over <a href="https://adversa.ai/blog/openclaw-security-101-vulnerabilities-hardening-2026/">30,000 exposed instances</a> and researchers found <a href="https://thehackernews.com/2026/02/researchers-find-341-malicious-clawhub.html">hundreds of malicious skills</a> in the marketplace. <a href="https://blogs.cisco.com/ai/personal-ai-agents-like-openclaw-are-a-security-nightmare">Cisco's AI security team</a> found a third-party OpenClaw skill performing data exfiltration and prompt injection without user awareness. One of OpenClaw's own maintainers warned: "if you can't understand how to run a command line, this is far too dangerous of a project for you to use safely." The power and the danger come from exactly the same source: unrestricted access.</p><blockquote><p>What makes OpenClaw especially instructive is how hard it <em>tries</em> to add security - and how it's still not enough. The codebase contains a <a href="https://github.com/openclaw/openclaw/blob/main/src/agents/pi-tools.policy.ts">6-layer tool policy pipeline</a> (owner-only gates, per-provider allowlists, group restrictions, Docker sandbox policies, subagent restrictions), <a href="https://github.com/openclaw/openclaw/blob/main/src/agents/pi-tools.ts">workspace-only guards</a> that reject file operations outside the user's directory, and factory functions that scope every tool to a specific session. Thousands of lines of security infrastructure - and researchers still found exposed instances, malicious skills, and active exfiltration in the wild.</p></blockquote><p>This isn&#8217;t a new pattern. Desktop software had direct access to local hardware, memory, and the file system. When the industry moved to the web, recreating that desktop magic took years of engineering - and the web <em>still</em> remains more constrained because of security and sandboxing. Coding agents scaling from local to cloud will face the same slog, but harder: in traditional web apps, client and server were architecturally distinct. Agent systems collapse that separation. The model plans, executes, and decides. It <em>is</em> the client and the server.</p><blockquote><p>You can see this concretely in <a href="https://github.com/badlogic/pi-mono/blob/main/packages/agent/src/agent-loop.ts">Pi's agent loop</a> - the framework underneath OpenClaw. The LLM receives the full context (system prompt, message history, all previous tool results) with no redaction. It decides which tools to call. Those tools execute with the full permissions of the process. Results flow back into the same context. There is no architectural boundary between "what can I see" and "what can I do" - the separation that made web security tractable simply doesn't exist.</p></blockquote><h3>The Business Specificity Problem</h3><p>General-purpose agents are designed to handle a wide range of tasks. But your business is not trying to help your customer with a wide range of tasks.</p><p>If you&#8217;re a bank, you&#8217;re trying to drive sales of banking services. If you&#8217;re a tax preparation company, you&#8217;re trying to provide a better tax preparation experience. If you&#8217;re a marketing team, you&#8217;re trying to improve content quality. If you run a recommendation system, that&#8217;s a thing, and it has specific requirements.</p><p>In these cases, the economics matter enormously. A general-purpose model is paying a &#8220;cognitive tax&#8221; to maintain the ability to do <em>anything</em>: write a poem, debug Python, plan a trip. <strong>Your business process doesn&#8217;t need that optionality, but you&#8217;re paying for the compute to support it</strong>. A single agentic coding task can cost anywhere from $0.50 to $10 depending on complexity - Anthropic&#8217;s  documentation reports Claude Code averages <a href="https://code.claude.com/docs/en/costs">$6 per developer per day</a>, with 90% of users under $12/day. Across millions of customer requests, the math gets uncomfortable fast. </p><blockquote><p>To quantify: <a href="https://github.com/openclaw/openclaw/blob/main/src/agents/system-prompt.ts">OpenClaw's system prompt builder</a> is 678 lines of code assembling sections for 23 tools, memory, messaging, safety, sandbox, and more. A domain-specific agent needs maybe 3-5 tools and a 200-500 token prompt. That's a 4-10x overhead per request in prompt tokens alone, before the model even starts reasoning across capabilities the domain doesn't need.</p></blockquote><p>Yes, inference costs are falling - roughly <a href="https://a16z.com/llmflation-llm-inference-cost/">10x per year</a> by a16z&#8217;s estimate, and possibly <a href="https://epoch.ai/data-insights/llm-inference-price-trends">faster</a> on some benchmarks. But cheaper tokens don&#8217;t solve the architectural problems. The security exposure of a general-purpose agent - prompt injection risk, unpredictable tool invocations, variable data access patterns - doesn&#8217;t shrink when inference gets cheaper. Neither does the observability challenge. You still can&#8217;t predict <em>how</em> the agent will solve a given request, which means you can&#8217;t predict what it will cost, what it will access, or what attack surface it exposes.</p><blockquote><p>The cost of a general-purpose agent isn't just inference - it's the meta-inference. OpenClaw's codebase includes a <a href="https://github.com/badlogic/pi-mono/blob/main/packages/coding-agent/src/core/compaction/compaction.ts">compaction system</a> that makes its own LLM call to summarize history when context fills up, a context pruning extension for cache management, auth profile rotation across API keys, and tool loop detection. Each is an additional cost layer that exists only because the agent is general-purpose and long-running. A focused workflow with deterministic steps likely needs none of it.</p></blockquote><p>The short story is: <strong>you often need to do </strong><em><strong>just one thing</strong></em><strong> very well</strong>. An agent that can <strong>do everything at high cost and unpredictable behavior</strong> is not necessarily a good fit for your business at scale.</p><h2>What Should Your Team Do?</h2><p><strong>Start with your business problem.</strong> Are you in the business of selling personal productivity tools? If the answer is no, you probably should not be trying to ship a general-purpose agent as part of your API or customer-facing service.</p><p><strong>But that doesn&#8217;t mean you shouldn&#8217;t use these agents.</strong> In fact, you absolutely should - as <em>tools</em>. As part of your internal processes. As accelerators for your engineering team. As components in your internal systems.</p><p>If you&#8217;re not in the personal productivity business, what you should be thinking about is: <em>how can I accelerate my team to build my product?</em> Use these tools to optimize your process. And underneath that question, there are three concrete approaches, with tradeoff implications.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!TDMn!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7e2011f5-0e27-4fa0-b521-adb3f7266ee6_1678x1025.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!TDMn!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7e2011f5-0e27-4fa0-b521-adb3f7266ee6_1678x1025.png 424w, https://substackcdn.com/image/fetch/$s_!TDMn!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7e2011f5-0e27-4fa0-b521-adb3f7266ee6_1678x1025.png 848w, https://substackcdn.com/image/fetch/$s_!TDMn!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7e2011f5-0e27-4fa0-b521-adb3f7266ee6_1678x1025.png 1272w, https://substackcdn.com/image/fetch/$s_!TDMn!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7e2011f5-0e27-4fa0-b521-adb3f7266ee6_1678x1025.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!TDMn!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7e2011f5-0e27-4fa0-b521-adb3f7266ee6_1678x1025.png" width="1456" height="889" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/7e2011f5-0e27-4fa0-b521-adb3f7266ee6_1678x1025.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:889,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1057085,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/179010845?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7e2011f5-0e27-4fa0-b521-adb3f7266ee6_1678x1025.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!TDMn!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7e2011f5-0e27-4fa0-b521-adb3f7266ee6_1678x1025.png 424w, https://substackcdn.com/image/fetch/$s_!TDMn!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7e2011f5-0e27-4fa0-b521-adb3f7266ee6_1678x1025.png 848w, https://substackcdn.com/image/fetch/$s_!TDMn!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7e2011f5-0e27-4fa0-b521-adb3f7266ee6_1678x1025.png 1272w, https://substackcdn.com/image/fetch/$s_!TDMn!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7e2011f5-0e27-4fa0-b521-adb3f7266ee6_1678x1025.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><ul><li><p><strong>AI-Assisted Engineering</strong> - Low scale/cost (your eng team), low security exposure (local, trusted user), low predictability needed (exploration is fine)</p></li><li><p><strong>Internal Agents</strong> - Medium scale/cost (your company), medium security exposure (internal, controlled data), medium predictability needed (should be reliable)</p></li><li><p><strong>Agentic Workflows</strong> - High scale/cost (your customers), high security exposure (production data, external prompts), high predictability needed (strongly preferred)</p></li></ul><h3>Approach 1: AI-Assisted Engineering</h3><p>You can get your teams to <a href="https://newsletter.victordibia.com/p/vibe-coding-with-engineering-discipline">move faster</a> if you invest in an agent-first approach to building software. </p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;e2b3fce6-7121-41de-a288-79296ed73697&quot;,&quot;caption&quot;:&quot;AI coding agents have gotten remarkably capable. METR&#8217;s research [1] shows that the length of tasks AI agents can complete autonomously has doubled every 7 months over the past 6 years. The recently announced OpenAI&#8217;s GPT-5-Codex [2] can run autonomously for over 7 hours, handling complex refactoring and code reviews without human intervention.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Vibe Coding .. With Engineering Discipline: Building Real Apps with AI Agents&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:85692678,&quot;name&quot;:&quot;Victor Dibia, PhD&quot;,&quot;bio&quot;:&quot;Software Engineer, Researcher (Microsoft), Best Selling Author working on and writing about Generative AI, Agents. Core maintainer for the AutoGen Framework (50k Stars on GitHub). Previously at Cloudera, IBM Research. All views are my own.&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/27536f63-7d8e-48dc-b34c-150acfacdc8b_1726x1396.jpeg&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-12-19T13:02:52.116Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!v-V3!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67853d02-f244-4829-84df-a07fc900527d_1731x1154.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://newsletter.victordibia.com/p/vibe-coding-with-engineering-discipline&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:179070186,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:10,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1253044,&quot;publication_name&quot;:&quot;Designing with AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1FgP!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fff160652-8bbb-475e-80a7-ba4eb5f80dcb_504x504.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><p>But the gains aren&#8217;t automatic. OpenAI&#8217;s own &#8220;<a href="https://openai.com/index/harness-engineering/">harness engineering</a>&#8220; team built an internal product with zero manually-written code - three engineers, a million lines, 1,500 PRs in five months, 6-hour unattended agent runs. Impressive. But look at what it took: restructured documentation (&#8221;a map, not a manual&#8221;), custom linters injecting remediation into agent context, mechanically enforced architecture rules, a full observability stack agents could query directly, and Chrome DevTools wired in so Codex could drive and validate the UI. Early on, 20% of the team&#8217;s time went to cleaning up &#8220;AI slop&#8221; - until they automated that too.</p><p>The benefits are real, but they require iterative investment in tooling, architecture, and feedback loops. Go in clear-eyed. Without that discipline, you get the slop without the speed. (More on this in <a href="https://newsletter.victordibia.com/p/vibe-coding-with-engineering-discipline">vibe coding with engineering discipline</a>.)</p><h3>Approach 2: Internal Agents</h3><p>There&#8217;s a tier between &#8220;personal productivity tool&#8221; and &#8220;customer-facing service&#8221; that&#8217;s worth calling out: internal tools where the trust boundary is your company, not an individual user. A support agent that searches your internal knowledge base. A compliance tool that reviews contracts against your policies. A sales assistant that queries your CRM.</p><p>These sit in a sweet spot. You control the data and the environment, your users aren&#8217;t adversarial, and the scale is manageable - hundreds of employees, not millions of customers. <strong>When the agent gets something wrong at internal scale, the blast radius is small. At customer scale, it isn't</strong>. Many companies will find their first production agent deployments here - not in their product, but in their operations.</p><h3>Approach 3: Agentic Workflows</h3><p>This is where I think there has been the most value. Take a process that exists today, often manual, often with a lot of humans in the loop. Figure out which parts an agent can handle. Figure out which parts are well-understood and deterministic, and <em>keep them</em> deterministic. Represent the entire pipeline as a workflow, a typical software engineering workflow. Where you can remove a human step, carefully replace it with an agent. And then, critically, track cost/performance.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!JlxB!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4199fda9-cbb3-44ae-9153-c8404d4f5a8e_1678x1195.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!JlxB!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4199fda9-cbb3-44ae-9153-c8404d4f5a8e_1678x1195.png 424w, https://substackcdn.com/image/fetch/$s_!JlxB!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4199fda9-cbb3-44ae-9153-c8404d4f5a8e_1678x1195.png 848w, https://substackcdn.com/image/fetch/$s_!JlxB!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4199fda9-cbb3-44ae-9153-c8404d4f5a8e_1678x1195.png 1272w, https://substackcdn.com/image/fetch/$s_!JlxB!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4199fda9-cbb3-44ae-9153-c8404d4f5a8e_1678x1195.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!JlxB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4199fda9-cbb3-44ae-9153-c8404d4f5a8e_1678x1195.png" width="1456" height="1037" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4199fda9-cbb3-44ae-9153-c8404d4f5a8e_1678x1195.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1037,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1457008,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/179010845?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4199fda9-cbb3-44ae-9153-c8404d4f5a8e_1678x1195.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!JlxB!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4199fda9-cbb3-44ae-9153-c8404d4f5a8e_1678x1195.png 424w, https://substackcdn.com/image/fetch/$s_!JlxB!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4199fda9-cbb3-44ae-9153-c8404d4f5a8e_1678x1195.png 848w, https://substackcdn.com/image/fetch/$s_!JlxB!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4199fda9-cbb3-44ae-9153-c8404d4f5a8e_1678x1195.png 1272w, https://substackcdn.com/image/fetch/$s_!JlxB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4199fda9-cbb3-44ae-9153-c8404d4f5a8e_1678x1195.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Here&#8217;s a concrete example. Say you&#8217;re processing insurance claims. <em><strong>Don&#8217;t wrap Claude in a container and point it at the whole process</strong></em>. Instead, decompose it: document extraction is deterministic (OCR, structured parsing, well-solved problems). Classification of the claim type uses a small, fine-tuned model. Routing follows explicit business rules. Maybe one step - say generating a summary of a complex medical narrative - genuinely benefits from a frontier model. You bundle that entire pipeline into an API and deploy it with careful observability.</p><blockquote><p>I cover the trade-offs between deterministic workflows and autonomous agents in Chapter 2: Multi-Agent Patterns, walk through building these pipelines in Chapter 6: Building Multi-Agent Workflows, and strategies for making smaller models replace expensive ones in Chapter 11: Optimizing Multi-Agent Systems, in <a href="https://buy.multiagentbook.com/">Designing Multi-Agent Systems</a>.</p></blockquote><p>Why not just let Claude handle the whole thing end-to-end? It might succeed 90% of the time if you are using the latest best in class models. But <em>how</em> it succeeds is unpredictable. It might rewrite its approach between runs. It might select different tools each time. Even when outputs are correct, token consumption varies wildly, and you&#8217;ve increased your security exposure (prompt injection risk, data access patterns, tool invocations) far beyond what a deterministic workflow requires. Correctness isn&#8217;t the only thing you need at scale. You need <em>predictability</em>. You need to know what your system is doing, how much it costs per request, and what attack surface you&#8217;re exposing.</p><p>You can put a workflow like this in place as a first step, and over time, as you see results, you optimize. Optimization means you generate training data, you develop good evals, and then you figure out how to replace some of the large-model-powered steps with smaller, fine-tuned models. You compress cost while preserving quality.</p><h2>Parting Thoughts</h2><p>IMO, the path forward is not &#8220;ship a general-purpose agent to your customers.&#8221; This should not be your approach for customer facing products. It&#8217;s: <em><strong>use these remarkable tools to build your product better, faster, and smarter, and architect your customer-facing systems with the right balance of agentic capability, deterministic reliability, and cost efficiency</strong></em>.</p><p>General-purpose coding agents are genuinely transformative - for the people using them. The mistake is assuming that what works on your laptop will work the same way in your cloud, for your customers, at your scale. <strong>The desktop-to-web transition took a decade of engineering to get right. The local-to-cloud transition for agents is just getting started</strong>.</p><div><hr></div><p><em>Victor Dibia is a Principal Research Software Engineer at Microsoft Research and Core AI, and the author of <a href="https://buy.multiagentbook.com/">Designing Multi-Agent Systems</a> - available in print on <a href="https://www.amazon.com/dp/B0G2BCQQJY">Amazon</a> and in digital formats with lifetime updates at <a href="https://buy.multiagentbook.com/">buy.multiagentbook.com</a>. He is the creator of AutoGen Studio and a core contributor to <a href="https://github.com/microsoft/autogen">AutoGen</a> and Microsoft Agent Framework. Subscribe to his newsletter at <a href="https://newsletter.victordibia.com/">newsletter.victordibia.com</a>.</em></p>]]></content:encoded></item><item><title><![CDATA[Is Scaling a Dead End? Why Model Scaling is Necessary Infrastructure ]]></title><description><![CDATA[#56 | The bitter lesson isn&#8217;t dying. It&#8217;s being amortized.]]></description><link>https://newsletter.victordibia.com/p/is-scaling-a-dead-end-why-model-scaling</link><guid isPermaLink="false">https://newsletter.victordibia.com/p/is-scaling-a-dead-end-why-model-scaling</guid><dc:creator><![CDATA[Victor Dibia, PhD]]></dc:creator><pubDate>Mon, 19 Jan 2026 13:31:51 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!APpl!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faf1d8c75-48b2-4d9d-bbd9-ebcf8880e93f_1612x1028.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!APpl!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faf1d8c75-48b2-4d9d-bbd9-ebcf8880e93f_1612x1028.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!APpl!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faf1d8c75-48b2-4d9d-bbd9-ebcf8880e93f_1612x1028.png 424w, https://substackcdn.com/image/fetch/$s_!APpl!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faf1d8c75-48b2-4d9d-bbd9-ebcf8880e93f_1612x1028.png 848w, https://substackcdn.com/image/fetch/$s_!APpl!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faf1d8c75-48b2-4d9d-bbd9-ebcf8880e93f_1612x1028.png 1272w, https://substackcdn.com/image/fetch/$s_!APpl!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faf1d8c75-48b2-4d9d-bbd9-ebcf8880e93f_1612x1028.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!APpl!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faf1d8c75-48b2-4d9d-bbd9-ebcf8880e93f_1612x1028.png" width="1456" height="929" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/af1d8c75-48b2-4d9d-bbd9-ebcf8880e93f_1612x1028.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:929,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:925344,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/173992195?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faf1d8c75-48b2-4d9d-bbd9-ebcf8880e93f_1612x1028.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!APpl!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faf1d8c75-48b2-4d9d-bbd9-ebcf8880e93f_1612x1028.png 424w, https://substackcdn.com/image/fetch/$s_!APpl!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faf1d8c75-48b2-4d9d-bbd9-ebcf8880e93f_1612x1028.png 848w, https://substackcdn.com/image/fetch/$s_!APpl!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faf1d8c75-48b2-4d9d-bbd9-ebcf8880e93f_1612x1028.png 1272w, https://substackcdn.com/image/fetch/$s_!APpl!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faf1d8c75-48b2-4d9d-bbd9-ebcf8880e93f_1612x1028.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><blockquote><p>TLDR; It&#8217;s true - direct scaling is showing signs of diminishing returns. However, a missing part of the debate is that massive scale is perhaps <em>unavoidable</em>. Rather, large base models are the critical infrastructure required to distill, train, and validate the efficient small models (or alternate architectures) of the future. In other words, efficiency gains we see today, including distillation, synthetic data, RLHF, and GRPO, are not <em>alternatives</em> to scaling. They are dividends from the scaling investments that preceded them.</p></blockquote><p>As investments in AI explode (<a href="https://lucidityinsights.com/infobytes/big-tech-ai-infrastructure-investment-2025">Big Tech expected to spend 300 Billion on AI in 2025</a>), data centers are built, and most industry labs seem entrenched in the view that more GPUs is better, there is a natural question: is the quest to throw compute at the problem valid? Or better still, should it be the only approach?</p><p>There seem to be two camps. On one side are folks who agree that scaling has got us this far when nothing else did,  and will likely be the gift that keeps on giving. A faction of the <a href="http://www.incompleteideas.net/IncIdeas/BitterLesson.html">bitter lesson</a> believers [1]. On the other are those who argue it&#8217;s a dead end, either due to the clear <em>unsustainability</em> of it all, or because there are fundamental flaws in the <em>current transformer architecture</em> itself that mean we can&#8217;t get much farther than we are today. This second camp often points to new approaches, such as world models and alternative architectures, as the path forward, or point to emerging cracks or instability in the scaling laws.</p><p>As I think about both, I am left with the idea that there is some nuance that is worth clarifying. <em>We probably should not try to scale ad infinitum, but at the same time, scale is unavoidable in that its more of a design feature than a bug.</em></p><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://newsletter.victordibia.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Designing with AI is a reader-supported publication. To receive new posts and support the newsletter, consider becoming a subscriber.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div><div><hr></div><p></p><h2>On the Slow Death of Scaling (By Sara Hooker)</h2><p>This post is in part a reflection inspired by <a href="https://www.linkedin.com/in/sararosehooker/">Sara Hooker</a>&#8217;s essay,  <a href="https://papers.ssrn.com/sol3/papers.cfm?abstract_id=5877662">On Slow Death of Scaling</a> [2], where she argues that recent developments show <em>cracks in the scaling laws</em> that have enabled progress thus far, and that <em>it is time to invest in other areas to extract performance gains</em>: data quality, improvements in system scaffolding (agents), and a focus on UX. The reasons cited include diminishing returns on scaling, unreliable scaling laws, and the availability of surgical synthetic data generation for training smaller, higher-quality models.</p><blockquote><p>One of the arguments Sara makes on the unreliability of scaling laws is that they <em><strong>predict test loss</strong></em>, <em><strong>not downstream capabilities</strong></em>. When you measure what models actually do, the scaling law predictions sometimes break down, which means companies betting everything on scale are <em>probably under-investing elsewhere</em>.</p></blockquote><p>I agree with everything Sara says here (she&#8217;s great and definitely read more of her writing).</p><p>However, the minor challenge I see is that a reader of that article might walk <em>away thinking we probably do not need the super-scaled models</em>. I think we <strong>critically and necessarily do.</strong> And that there is a cyclic, chicken-and-egg dependency between the super-scaled  models (for lack of a better term) and all of the innovations that help us move away from them.</p><p>In plain terms: <strong>small models are a derivative of the larger models, and probably will </strong><em><strong>never</strong></em><strong> be better than them.</strong> This follows a well-established engineering pattern.</p><div><hr></div><h2>Downstream of Scale: Why Efficient Models Require Large Ones</h2><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!ix3s!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc13f87b4-062f-4be1-b1a8-db5af69479dd_1678x1195.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!ix3s!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc13f87b4-062f-4be1-b1a8-db5af69479dd_1678x1195.png 424w, https://substackcdn.com/image/fetch/$s_!ix3s!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc13f87b4-062f-4be1-b1a8-db5af69479dd_1678x1195.png 848w, https://substackcdn.com/image/fetch/$s_!ix3s!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc13f87b4-062f-4be1-b1a8-db5af69479dd_1678x1195.png 1272w, https://substackcdn.com/image/fetch/$s_!ix3s!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc13f87b4-062f-4be1-b1a8-db5af69479dd_1678x1195.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!ix3s!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc13f87b4-062f-4be1-b1a8-db5af69479dd_1678x1195.png" width="1456" height="1037" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c13f87b4-062f-4be1-b1a8-db5af69479dd_1678x1195.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1037,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1245210,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/173992195?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc13f87b4-062f-4be1-b1a8-db5af69479dd_1678x1195.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!ix3s!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc13f87b4-062f-4be1-b1a8-db5af69479dd_1678x1195.png 424w, https://substackcdn.com/image/fetch/$s_!ix3s!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc13f87b4-062f-4be1-b1a8-db5af69479dd_1678x1195.png 848w, https://substackcdn.com/image/fetch/$s_!ix3s!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc13f87b4-062f-4be1-b1a8-db5af69479dd_1678x1195.png 1272w, https://substackcdn.com/image/fetch/$s_!ix3s!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc13f87b4-062f-4be1-b1a8-db5af69479dd_1678x1195.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Several observations point to the constant need for super-scaled models - and how they are (IMO) <em><strong>necessary infrastructure</strong></em> for progress.</p><h3>Smaller Models Are Distilled from Larger Ones</h3><p>Knowledge distillation, training smaller &#8220;student&#8221; models to mimic larger &#8220;teacher&#8221; models, has become a cornerstone of efficient AI deployment. But the technique has an obvious dependency: you need the teacher first. Even when the goal is to just improve performance (e.g., reasoning models), you still need strong, capable base models.</p><p>DeepSeek&#8217;s work on distilling reasoning capabilities validated this directly: &#8220;reasoning patterns of larger models can be distilled into smaller models, resulting in better performance compared to reasoning patterns discovered through RL on small models&#8221; [3]. The implication is clear. You cannot bootstrap reasoning in small models without first having large models that possess it.</p><h3>Synthetic Data Is Generated by Large Super-Scaled Models</h3><p>The synthetic data revolution, which enables training smaller, specialized models on carefully curated generated data, depends entirely on capable generators. Research on self-improvement methods shows that when models generate their own training data, they are <a href="https://arxiv.org/abs/2307.01850">&#8220;only limited by the best model available&#8221; [4]</a>. This creates a ceiling: your synthetic data is only as good as your largest model.</p><p>The industry pattern is consistent. NVIDIA&#8217;s Nemotron-4 340B, IBM&#8217;s LAB methodology, and Mixtral-8x7B all use large teacher models as the source for synthetic training data. The small, efficient models that result are derivatives of the large ones that generated their training signal.</p><h3>Advances Like RLHF and GRPO Require Strong Base Models</h3><p>Reinforcement learning from human feedback (RLHF) and  techniques like Group Relative Policy Optimization (GRPO) have dramatically improved model capabilities. But these techniques are refinements, not foundations.</p><p>As <span class="mention-wrap" data-attrs="{&quot;name&quot;:&quot;Nathan Lambert&quot;,&quot;id&quot;:10472909,&quot;type&quot;:&quot;user&quot;,&quot;url&quot;:null,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!RihO!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8fedcdfb-e137-4f6a-9089-a46add6c6242_500x500.jpeg&quot;,&quot;uuid&quot;:&quot;882a0ba8-45be-404b-afd7-1fae9c82b0c9&quot;}" data-component-name="MentionToDOM"></span> notes in <a href="https://rlhfbook.com/">The RLHF Book</a>: &#8220;Effective RLHF requires a strong starting point, so RLHF cannot be a solution to every problem alone&#8221; [5]. OpenAI&#8217;s research on <a href="https://arxiv.org/abs/2312.09390">weak-to-strong generalization</a> found that &#8220;weak-to-strong generalization is particularly poor for ChatGPT reward modeling... naive RLHF will likely scale poorly to superhuman models without additional work&#8221; [6].</p><p>GRPO, which enabled DeepSeek-R1&#8217;s impressive reasoning capabilities, works by sampling multiple completions and using group-relative rewards. But this requires a base model strong enough that variance exists in sample quality and some samples actually reach correct answers. <a href="https://arxiv.org/abs/2501.12948">DeepSeek-R1-Zero</a>, often cited as evidence that pure RL can discover reasoning, started from DeepSeek-V3-Base, a 671-billion parameter model [3, 7]. The &#8220;pure RL without supervised fine-tuning&#8221; innovation was only possible because they had already paid the massive scaling tax.</p><h3>The Dependency Graph</h3><p>The relationship looks something like this:</p><pre><code><code>Large Scaled Models (the "inefficient" phase)
           &#8595;
    &#9500;&#9472;&#9472;&gt; Distillation (requires teacher)
    &#9500;&#9472;&#9472;&gt; Synthetic Data Generation (requires capable generator)
    &#9492;&#9472;&#9472;&gt; RLHF/GRPO (requires strong base model)
           &#8595;
Small Efficient Models (the efficiency gains we celebrate)
</code></code></pre><p>Every technique in the middle layer requires capable large models as input. Small efficient models cannot bootstrap themselves.</p><div><hr></div><h2>Even &#8220;Alternative&#8221; Architectures Follow This Pattern</h2><p>One might hope that fundamentally different architectures, such as world models and JEPA, could escape this dependency. The evidence suggests otherwise.</p><p>Yann LeCun has been vocal that autoregressive language models and scaling obsession are the wrong path to advanced AI. He argues that &#8220;simply scaling the model and providing it with more data might not be a viable solution&#8221; and proposes <a href="https://openreview.net/pdf?id=BZ5a1r-kVsf">JEPA</a> (Joint Embedding Predictive Architecture) as an alternative focused on learning world models [8].</p><p>Yet Meta&#8217;s own <a href="https://arxiv.org/abs/2506.09985">V-JEPA 2 paper</a> tells a different story. The model scales from 300 million to over 1 billion parameters, trained on more than 1 million hours of internet video [9]. The paper explicitly identifies scaling as a core ingredient: data scaling (2M to 22M videos), model scaling (300M to 1B+ parameters), and longer training (90K to 252K iterations). Most tellingly, they report that &#8220;V-JEPA 2 demonstrates a linear scaling behavior with respect to model size.&#8221;</p><p>Google&#8217;s Genie world models follow the same trajectory. <a href="https://deepmind.google/discover/blog/genie-2-a-large-scale-foundation-world-model/">Genie 2</a>, their foundation world model, &#8220;demonstrates various emergent capabilities at scale, such as object interactions, complex character animation, physics, and the ability to model and thus predict the behavior of other agents&#8221; [10]. Google&#8217;s job postings for the team state plainly: &#8220;We believe scaling [AI training] on video and multimodal data is on the critical path to artificial general intelligence&#8221; [11].</p><p>The architecture changes. The scaling dependency doesn&#8217;t. Or maynot.</p><div><hr></div><h2>The Engineering Pattern</h2><p>Personally, I think this is how OpenAI has been able to improve both performance and cost over the last several years (caveat, this is speculative), by amortizing their scaling investments across increasingly efficient derivative models. It may be unclear to most, but the price of the <a href="https://multiagentbook.com/labs/ai-cost-trends">best quality intelligence</a> has dropped <strong><a href="https://multiagentbook.com/labs/ai-cost-trends">400x in the last 4 years</a></strong>. Let that sink in. </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!SzC-!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7bc4d97b-14ec-43e7-9111-b7cdf6e8171e_1350x1114.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!SzC-!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7bc4d97b-14ec-43e7-9111-b7cdf6e8171e_1350x1114.png 424w, https://substackcdn.com/image/fetch/$s_!SzC-!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7bc4d97b-14ec-43e7-9111-b7cdf6e8171e_1350x1114.png 848w, https://substackcdn.com/image/fetch/$s_!SzC-!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7bc4d97b-14ec-43e7-9111-b7cdf6e8171e_1350x1114.png 1272w, https://substackcdn.com/image/fetch/$s_!SzC-!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7bc4d97b-14ec-43e7-9111-b7cdf6e8171e_1350x1114.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!SzC-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7bc4d97b-14ec-43e7-9111-b7cdf6e8171e_1350x1114.png" width="1350" height="1114" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/7bc4d97b-14ec-43e7-9111-b7cdf6e8171e_1350x1114.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1114,&quot;width&quot;:1350,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:234366,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/173992195?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7bc4d97b-14ec-43e7-9111-b7cdf6e8171e_1350x1114.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!SzC-!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7bc4d97b-14ec-43e7-9111-b7cdf6e8171e_1350x1114.png 424w, https://substackcdn.com/image/fetch/$s_!SzC-!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7bc4d97b-14ec-43e7-9111-b7cdf6e8171e_1350x1114.png 848w, https://substackcdn.com/image/fetch/$s_!SzC-!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7bc4d97b-14ec-43e7-9111-b7cdf6e8171e_1350x1114.png 1272w, https://substackcdn.com/image/fetch/$s_!SzC-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7bc4d97b-14ec-43e7-9111-b7cdf6e8171e_1350x1114.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>This follows an engineering pattern we see across industries. You build the big, expensive, &#8220;inefficient&#8221; version first, with clear intent, and then do the work to make it small. Early CPUs were room-sized; now they fit in your pocket. Early aircraft were massive and inefficient; optimization came later. The brute-force version comes first, optimization follows.</p><p>What&#8217;s unique about ML is that the brute-force version doesn&#8217;t become necessarily obsolete in the near term. It remains operationally necessary as infrastructure for producing the optimized versions.  </p><div><hr></div><h2>Conclusion</h2><p>The efficiency gains we see today, including distillation, synthetic data, RLHF, and GRPO, are not <em>alternatives</em> to scaling. They are dividends from the scaling investments that preceded them.</p><p>It is unclear that even new architectures will allow us to escape this cycle. Even with the promise of world models, they appear to follow this arc as well. And maybe that&#8217;s fine. What might be needed is a commitment to the shrinking pattern that most industries are accustomed to: build the big version (with clear intent), then do the work to make it small.</p><p>The bitter lesson isn&#8217;t dying. It&#8217;s being amortized.</p><div><hr></div><h2>References</h2><p>[1] Sutton, R. (2019). The bitter lesson. <em>Incomplete Ideas</em>. http://www.incompleteideas.net/IncIdeas/BitterLesson.html</p><p>[2] Hooker, S. (2024). On the Slow Death of Scaling. <em><a href="https://papers.ssrn.com/sol3/papers.cfm?abstract_id=5877662">arXiv preprint</a></em><a href="https://papers.ssrn.com/sol3/papers.cfm?abstract_id=5877662">. https://arxiv.org/abs/2407.05694</a></p><p>[3] DeepSeek-AI. (2025). DeepSeek-R1: Incentivizing reasoning capability in LLMs via reinforcement learning. <em>arXiv preprint</em>. <a href="https://arxiv.org/abs/2501.12948">https://arxiv.org/abs/2501.12948</a></p><p>[4] Alemohammad, S., et al. (2024). Self-consuming generative models go MAD. <em>ICLR 2024</em>. <a href="https://arxiv.org/abs/2307.01850">https://arxiv.org/abs/2307.01850</a></p><p>[5] Lambert, N. (2024). <em>The RLHF Book</em>. https://rlhfbook.com/</p><p>[6] Burns, C., et al. (2023). Weak-to-strong generalization: Eliciting strong capabilities with weak supervision. <em>OpenAI Research</em>. https://arxiv.org/abs/2312.09390</p><p>[7] DeepSeek-AI. (2024). DeepSeek-V3 technical report. <em>arXiv preprint</em>. https://arxiv.org/abs/2412.19437</p><p>[8] LeCun, Y. (2022). A path towards autonomous machine intelligence. <em>OpenReview</em>. https://openreview.net/pdf?id=BZ5a1r-kVsf</p><p>[9] Bardes, A., et al. (2025). V-JEPA 2: Self-supervised video models enable understanding, prediction and planning. <em>arXiv preprint</em>. https://arxiv.org/abs/2506.09985</p><p>[10] Google DeepMind. (2024). Genie 2: A large-scale foundation world model. <em>DeepMind Blog</em>. https://deepmind.google/discover/blog/genie-2-a-large-scale-foundation-world-model/</p><p>[11] Kostrikov, I. (2025, January 6). Google is forming a new team to build AI that can simulate the physical world. <em>TechCrunch</em>. https://techcrunch.com/2025/01/06/google-is-forming-a-new-team-to-build-ai-that-can-simulate-the-physical-world/</p>]]></content:encoded></item><item><title><![CDATA[The Arc of Agent Action from Code to Tools and Back to Code - And Why Anthropic's SKILLS.md is Not New]]></title><description><![CDATA[Issue #55 | Early agents (LIDA, AutoGen) supported actions via code. Then the industry moved to action via tools/structured output. And now we are back again to code. What happened?]]></description><link>https://newsletter.victordibia.com/p/the-arc-of-agent-action-from-code</link><guid isPermaLink="false">https://newsletter.victordibia.com/p/the-arc-of-agent-action-from-code</guid><dc:creator><![CDATA[Victor Dibia, PhD]]></dc:creator><pubDate>Mon, 05 Jan 2026 13:31:37 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!p1g3!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f76085e-5b6a-4823-b948-8935a6326407_1376x768.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!p1g3!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f76085e-5b6a-4823-b948-8935a6326407_1376x768.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!p1g3!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f76085e-5b6a-4823-b948-8935a6326407_1376x768.png 424w, https://substackcdn.com/image/fetch/$s_!p1g3!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f76085e-5b6a-4823-b948-8935a6326407_1376x768.png 848w, https://substackcdn.com/image/fetch/$s_!p1g3!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f76085e-5b6a-4823-b948-8935a6326407_1376x768.png 1272w, https://substackcdn.com/image/fetch/$s_!p1g3!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f76085e-5b6a-4823-b948-8935a6326407_1376x768.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!p1g3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f76085e-5b6a-4823-b948-8935a6326407_1376x768.png" width="1376" height="768" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/7f76085e-5b6a-4823-b948-8935a6326407_1376x768.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:768,&quot;width&quot;:1376,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1057926,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/174707398?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f76085e-5b6a-4823-b948-8935a6326407_1376x768.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!p1g3!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f76085e-5b6a-4823-b948-8935a6326407_1376x768.png 424w, https://substackcdn.com/image/fetch/$s_!p1g3!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f76085e-5b6a-4823-b948-8935a6326407_1376x768.png 848w, https://substackcdn.com/image/fetch/$s_!p1g3!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f76085e-5b6a-4823-b948-8935a6326407_1376x768.png 1272w, https://substackcdn.com/image/fetch/$s_!p1g3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7f76085e-5b6a-4823-b948-8935a6326407_1376x768.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Anthropic recently introduced <a href="https://www.anthropic.com/engineering/equipping-agents-for-the-real-world-with-agent-skills">Agent Skills</a> [1]&#8212;folders containing a <code>SKILLS.md</code> file with instructions, scripts, and resources that agents can discover and load dynamically. The model reads the skill when relevant, executes bundled code, and solves problems using complex control flow rather than chaining discrete tool calls. </p><p>If this sounds familiar, it should. In late 2023, when I was building the first version of AutoGen Studio, this is exactly what we did.  Users wrote Python files as skills, we added them to context (full code or just signatures), and the model wrote programs that integrated/orchestrated those skills. At the time, most models were inconsistent in generated structured output (the building blocks for tool calling) but mostly of them could write code fairly well.</p><blockquote><p>Fun fact: One of the first agentic systems work I led at Microsoft in <strong>August 2022</strong> (before chatgpt release, before autogen) was <a href="https://microsoft.github.io/lida/">LIDA</a> - an agentic workflow for automated data visualization based on code execution. The visualization module worked by <em><strong>generating code (using the Davinci 2 and gpt-3.5-turbo models at the time)</strong></em> which was executed to created visualizations.</p></blockquote><p>Then everyone moved to structured tool calling, driven by libraries like <a href="https://python.useinstructor.com/">Instructor</a> (from Jason Liu) and <a href="https://pydantic.dev/">Pydantic</a> that promised &#8220;guarantees about output types&#8221; with automatic validation and retries. <strong>Now it&#8217;s moving <a href="https://blog.cloudflare.com/code-mode/">back to</a> code (citing <a href="https://www.anthropic.com/engineering/code-execution-with-mcp">efficiency gains</a>)</strong>. </p><p>This post traces that arc and explains why the return may or may not make sense.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!weGk!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1babe5e9-4020-454a-b2c3-4554ac6ea04b_2752x1536.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!weGk!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1babe5e9-4020-454a-b2c3-4554ac6ea04b_2752x1536.png 424w, https://substackcdn.com/image/fetch/$s_!weGk!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1babe5e9-4020-454a-b2c3-4554ac6ea04b_2752x1536.png 848w, https://substackcdn.com/image/fetch/$s_!weGk!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1babe5e9-4020-454a-b2c3-4554ac6ea04b_2752x1536.png 1272w, https://substackcdn.com/image/fetch/$s_!weGk!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1babe5e9-4020-454a-b2c3-4554ac6ea04b_2752x1536.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!weGk!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1babe5e9-4020-454a-b2c3-4554ac6ea04b_2752x1536.png" width="1456" height="813" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/1babe5e9-4020-454a-b2c3-4554ac6ea04b_2752x1536.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:813,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:4494885,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/174707398?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1babe5e9-4020-454a-b2c3-4554ac6ea04b_2752x1536.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!weGk!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1babe5e9-4020-454a-b2c3-4554ac6ea04b_2752x1536.png 424w, https://substackcdn.com/image/fetch/$s_!weGk!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1babe5e9-4020-454a-b2c3-4554ac6ea04b_2752x1536.png 848w, https://substackcdn.com/image/fetch/$s_!weGk!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1babe5e9-4020-454a-b2c3-4554ac6ea04b_2752x1536.png 1272w, https://substackcdn.com/image/fetch/$s_!weGk!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1babe5e9-4020-454a-b2c3-4554ac6ea04b_2752x1536.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><blockquote><p><em>This post draws on themes from <a href="https://multiagentbook.com/">Designing Multi-Agent Systems</a>, which covers tool design, code execution, and security considerations for agents in depth.</em></p></blockquote><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://newsletter.victordibia.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Designing with AI is a reader-supported publication. To receive new posts and support my work, consider becoming a subscriber.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div><div><hr></div><h2>Era 1: Action via Code</h2><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!WMJz!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff19a2132-52c3-43e2-9a98-ee4b9fd4c532_1376x768.jpeg" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!WMJz!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff19a2132-52c3-43e2-9a98-ee4b9fd4c532_1376x768.jpeg 424w, https://substackcdn.com/image/fetch/$s_!WMJz!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff19a2132-52c3-43e2-9a98-ee4b9fd4c532_1376x768.jpeg 848w, https://substackcdn.com/image/fetch/$s_!WMJz!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff19a2132-52c3-43e2-9a98-ee4b9fd4c532_1376x768.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!WMJz!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff19a2132-52c3-43e2-9a98-ee4b9fd4c532_1376x768.jpeg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!WMJz!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff19a2132-52c3-43e2-9a98-ee4b9fd4c532_1376x768.jpeg" width="1376" height="768" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f19a2132-52c3-43e2-9a98-ee4b9fd4c532_1376x768.jpeg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:768,&quot;width&quot;:1376,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:270426,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/jpeg&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/174707398?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff19a2132-52c3-43e2-9a98-ee4b9fd4c532_1376x768.jpeg&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!WMJz!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff19a2132-52c3-43e2-9a98-ee4b9fd4c532_1376x768.jpeg 424w, https://substackcdn.com/image/fetch/$s_!WMJz!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff19a2132-52c3-43e2-9a98-ee4b9fd4c532_1376x768.jpeg 848w, https://substackcdn.com/image/fetch/$s_!WMJz!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff19a2132-52c3-43e2-9a98-ee4b9fd4c532_1376x768.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!WMJz!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff19a2132-52c3-43e2-9a98-ee4b9fd4c532_1376x768.jpeg 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>At about mid 2022, OpenAI Davinci2 models were getting really good at writing code and it became clear that they could write code that got executed in a loop to solve problems. The setup was simple: prompt the model to write code in detectable blocks (triple backticks, XML tags), parse it out, execute it. This was attractive as it <em>mostly</em> worked across models of all sizes (for the longest time, most models just did not support json mode or structured tool calling, especially small 7B models that drove the bulk of experimentation at the time).</p><p>As an example, in the early days of AutoGen Studio, users could write a <code>generate_image.py</code> file with a <code>generate_image()</code> method. The model was prompted to write code that assumed those skills existed and could import them:</p><pre><code><code># AutoGen Studio skill file (2023)
# skills/generate_image.py

def generate_image(prompt: str, size: str = "1024x1024") -&gt; str:
    """Generate an image using DALL-E and return the URL."""
    # ... implementation
    return image_url
</code></code></pre><p>The model didn&#8217;t call a tool; it wrote a program that orchestrated tools:</p><pre><code><code># Model-generated code
from skills import generate_image, save_to_disk

url = generate_image("a sunset over mountains")
save_to_disk(url, "sunset.png")
print(f"Saved image to sunset.png")
</code></code></pre><p><strong>The problems:</strong></p><ul><li><p><strong>Unreliable.</strong> You were dependent on the model following prompt instructions. Would it use the right code block format? Would it call the skill you defined or hallucinate something else?</p></li><li><p><strong>Insecure.</strong> Models could write destructive code. <code>rm -rf</code>, data exfiltration, anything code can do. This drove the race to support sandboxed environments - Docker, WASM, etc.</p></li><li><p><strong>High exposure.</strong> Code interpreters were general-purpose, which meant general-purpose attack surfaces especially with prompt injections still being a really serious issue at the time (<em>most models had zero safety fine-tuning at the time</em>). As <a href="https://huggingface.co/docs/smolagents/en/tutorials/secure_code_execution">Hugging Face&#8217;s smolagents documentation</a> [2] puts it: &#8220;No local python sandbox can ever be completely secure.&#8221;</p></li></ul><div><hr></div><h2>Era 2: Action via Tool Calling</h2><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!JTya!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9a0d1ad-1646-494a-ad30-fcf653db46b1_1376x768.jpeg" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!JTya!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9a0d1ad-1646-494a-ad30-fcf653db46b1_1376x768.jpeg 424w, https://substackcdn.com/image/fetch/$s_!JTya!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9a0d1ad-1646-494a-ad30-fcf653db46b1_1376x768.jpeg 848w, https://substackcdn.com/image/fetch/$s_!JTya!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9a0d1ad-1646-494a-ad30-fcf653db46b1_1376x768.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!JTya!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9a0d1ad-1646-494a-ad30-fcf653db46b1_1376x768.jpeg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!JTya!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9a0d1ad-1646-494a-ad30-fcf653db46b1_1376x768.jpeg" width="1376" height="768" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/b9a0d1ad-1646-494a-ad30-fcf653db46b1_1376x768.jpeg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:768,&quot;width&quot;:1376,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:441559,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/jpeg&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/174707398?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9a0d1ad-1646-494a-ad30-fcf653db46b1_1376x768.jpeg&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!JTya!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9a0d1ad-1646-494a-ad30-fcf653db46b1_1376x768.jpeg 424w, https://substackcdn.com/image/fetch/$s_!JTya!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9a0d1ad-1646-494a-ad30-fcf653db46b1_1376x768.jpeg 848w, https://substackcdn.com/image/fetch/$s_!JTya!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9a0d1ad-1646-494a-ad30-fcf653db46b1_1376x768.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!JTya!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fb9a0d1ad-1646-494a-ad30-fcf653db46b1_1376x768.jpeg 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Structured tool calling improved things. Instead of arbitrary code, you defined discrete tools with JSON schemas. A tool did one thing with typed inputs and outputs. The model selected tools and filled parameters - a constrained problem compared to open-ended code generation.</p><p><strong>Benefits:</strong></p><ul><li><p><strong>Reliability.</strong> Structured output became a solved problem. Models got good at producing valid JSON. Pydantic, Instructor and similar libraries formalized the patterns.</p></li><li><p><strong>Security.</strong> No sandboxes needed for the tool layer. Either your <code>get_weather()</code> function was secure or it wasn&#8217;t. You could audit tools independently. The blast radius was bounded.</p></li><li><p><strong>Clean abstraction.</strong> APIs and functions wrapped as tools felt natural.</p></li></ul><p>One downside: this approach alienated smaller models. They couldn&#8217;t reliably produce structured output.</p><p>The tool use era was also driven by the introduction of MCP by Anthropic in November 2024 as an open standard for connecting AI assistants to external data systems. The core idea: write a tool once as an MCP server, and any client that supports the protocol can use it. MCP was designed to solve the &#8220;M&#215;N problem&#8221; - if you have M AI applications and N tools, you&#8217;d otherwise need M&#215;N integrations. MCP reduces this to M+N. </p><div><hr></div><h2>Era 3: Back to Code</h2><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!5BZo!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea72b962-17f7-4312-907a-f4c44d9cfb76_1376x768.jpeg" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!5BZo!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea72b962-17f7-4312-907a-f4c44d9cfb76_1376x768.jpeg 424w, https://substackcdn.com/image/fetch/$s_!5BZo!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea72b962-17f7-4312-907a-f4c44d9cfb76_1376x768.jpeg 848w, https://substackcdn.com/image/fetch/$s_!5BZo!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea72b962-17f7-4312-907a-f4c44d9cfb76_1376x768.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!5BZo!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea72b962-17f7-4312-907a-f4c44d9cfb76_1376x768.jpeg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!5BZo!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea72b962-17f7-4312-907a-f4c44d9cfb76_1376x768.jpeg" width="1376" height="768" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/ea72b962-17f7-4312-907a-f4c44d9cfb76_1376x768.jpeg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:768,&quot;width&quot;:1376,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:515268,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/jpeg&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/174707398?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea72b962-17f7-4312-907a-f4c44d9cfb76_1376x768.jpeg&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!5BZo!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea72b962-17f7-4312-907a-f4c44d9cfb76_1376x768.jpeg 424w, https://substackcdn.com/image/fetch/$s_!5BZo!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea72b962-17f7-4312-907a-f4c44d9cfb76_1376x768.jpeg 848w, https://substackcdn.com/image/fetch/$s_!5BZo!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea72b962-17f7-4312-907a-f4c44d9cfb76_1376x768.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!5BZo!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea72b962-17f7-4312-907a-f4c44d9cfb76_1376x768.jpeg 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Recently there&#8217;s been a pivot back to code-based action. Anthropic&#8217;s SKILLS approach is one example - folders containing a <code>SKILL.md</code> file plus scripts and resources that agents discover and load dynamically. <a href="https://blog.cloudflare.com/code-mode/">Cloudflare&#8217;s Code Mode</a> [4] converts MCP tools into TypeScript APIs that agents call via generated code. Anthropic also released an article citing efficiency gains from <a href="https://www.anthropic.com/engineering/code-execution-with-mcp">representing MCP tools as code that agents can compose</a>.  </p><h3>What challenges are driving the code pivot?</h3><p><strong>Context explosion.</strong> Every tool call consumes tokens -the definition, the call, the result. Chain multiple tool calls and you burn through context on overhead. As Anthropic&#8217;s engineering team notes in their post on <a href="https://www.anthropic.com/engineering/code-execution-with-mcp">code execution with MCP</a> [5]: &#8220;loading all tool definitions upfront and passing intermediate results through the context window slows down agents and increases costs.&#8221; Cloudflare&#8217;s benchmarks show code-based tool invocation can reduce token usage by 98.7%&#8212;from 150,000 tokens to 2,000 [4]. For long-horizon tasks, this becomes a real constraint.</p><p><strong>Repetitive tool calling.</strong> Tool calling encourages sequential, atomic operations. Processing 100 items means 100 tool calls. The overhead compounds.</p><p><strong>Restricted problem-solving.</strong> When the model can only compose tool calls (sequential or parallel), you&#8217;ve limited it to what you anticipated. Code allows complex control flow - loops, conditionals, error handling - to solve larger subproblems in one pass.</p><p>Vercel recently demonstrated an <em><strong>extreme version</strong></em> of this pattern [14]. They stripped their text-to-SQL agent from 15+ specialized tools down to two: bash execution and SQL execution. Instead of curated tool schemas, they gave Claude Opus 4.5 direct filesystem access to their semantic layer definitions. The claimed results: 3.5x faster execution, 100% success rate (up from 80%), and 37% fewer tokens. Their framing - &#8220;Model + file system + goal&#8221; - is radically simpler than orchestrating dozens of tools. However, the post notably <em>omits how security is preserved</em>. They mention &#8220;Vercel Sandbox&#8221; but never elaborate on isolation properties, prompt injection defenses, or what prevents destructive commands (perhaps they assume trusted users and correct queries from the slack users of the tool?). For anyone implementing similar patterns, these questions matter - you can only afford radical simplicity at the agent layer if you have tested, robust sandboxing underneath (a non-trivial requirement that often goes undiscussed). See the Agent Security Rule of Two framework below for one way to think about these trade-offs. </p><p>Early research signal also highlight code efficiency - Wang et al.&#8217;s <a href="https://arxiv.org/abs/2402.01030">CodeAct paper</a> [6] (ICML 2024) showed that using Python code as the action format outperforms JSON-based tool calling by up to 20% in success rate across 17 LLMs tested. Code actions also required 30% fewer steps on average&#8212;which translates directly to lower token costs.</p><p>The reasons aren&#8217;t mysterious. Code has better composability (you can nest actions, define reusable functions), better expressiveness for control flow, and leverages the procedural knowledge already in the model&#8217;s pretraining data.</p><div><hr></div><h2>What Changed? Security and Infrastructure</h2><p>The observant reader will notice: the security problems that <em>pushed us away from code are still there</em>. Models can still write insecure code. <strong>They can still be jailbroken (admittedly not as easily as before).</strong></p><p>What&#8217;s changed:</p><p><strong>1. Sandboxing infrastructure is now commoditized.</strong> Major providers now offer built-in code execution sandboxes. OpenAI&#8217;s <a href="https://platform.openai.com/docs/assistants/tools/code-interpreter">Code Interpreter</a> [10], Google&#8217;s <a href="https://ai.google.dev/gemini-api/docs/code-execution">Gemini Code Execution</a> [11], Anthropic&#8217;s <a href="https://docs.anthropic.com/en/docs/agents-and-tools/tool-use/code-execution-tool">Code Execution Tool</a> [12]. Developers flip a switch; the provider handles sandboxing. For custom solutions, gVisor and Docker are production-ready. Cloudflare&#8217;s V8 isolates launch in milliseconds [4]. Sandboxing went from &#8220;hard problem you solve yourself&#8221; to &#8220;managed infrastructure you consume.&#8221;</p><p><strong>2. Models became less gullible (Safety Training).</strong> Significant safety training on code injection makes previous mistakes rarer. Models are better at refusing obviously malicious instructions (though dedicated adversaries can still cause problems).</p><p><strong>3. Code is in the training data, tool calls aren&#8217;t.</strong> LLMs have seen enormous amounts of real-world TypeScript and Python in pretraining. They&#8217;ve seen far fewer examples of JSON tool-calling schemas. As Cloudflare notes [5]: code is a more natural action format for models trained on code compared to some esoteric function signature representing an MCP tool. LLMs are trained on GitHub. They are native speakers of Python/TypeScript. <strong>They are </strong><em><strong>second-language</strong></em><strong> speakers of your specific get_customer_data JSON schema.</strong></p><p><strong>4. Context preservation is now understood as critical.</strong> Research shows that LLMs struggle with large contexts even when they fit in the window. Anthropic&#8217;s <a href="https://www.anthropic.com/engineering/effective-context-engineering-for-ai-agents">context engineering guidance</a> [8] treats the context window as &#8220;a precious resource to be managed&#8221; with techniques like compaction, structured note-taking, and multi-agent architectures. For long-horizon task solving, context efficiency matters.</p>
      <p>
          <a href="https://newsletter.victordibia.com/p/the-arc-of-agent-action-from-code">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[Vibe Coding .. With Engineering Discipline: Building Real Apps with AI Agents]]></title><description><![CDATA[#54 | Five practices to make your AI-assisted coding more production-ready]]></description><link>https://newsletter.victordibia.com/p/vibe-coding-with-engineering-discipline</link><guid isPermaLink="false">https://newsletter.victordibia.com/p/vibe-coding-with-engineering-discipline</guid><dc:creator><![CDATA[Victor Dibia, PhD]]></dc:creator><pubDate>Fri, 19 Dec 2025 13:02:52 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!v-V3!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67853d02-f244-4829-84df-a07fc900527d_1731x1154.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!v-V3!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67853d02-f244-4829-84df-a07fc900527d_1731x1154.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!v-V3!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67853d02-f244-4829-84df-a07fc900527d_1731x1154.png 424w, https://substackcdn.com/image/fetch/$s_!v-V3!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67853d02-f244-4829-84df-a07fc900527d_1731x1154.png 848w, https://substackcdn.com/image/fetch/$s_!v-V3!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67853d02-f244-4829-84df-a07fc900527d_1731x1154.png 1272w, https://substackcdn.com/image/fetch/$s_!v-V3!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67853d02-f244-4829-84df-a07fc900527d_1731x1154.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!v-V3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67853d02-f244-4829-84df-a07fc900527d_1731x1154.png" width="1456" height="971" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/67853d02-f244-4829-84df-a07fc900527d_1731x1154.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:971,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1335104,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/179070186?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67853d02-f244-4829-84df-a07fc900527d_1731x1154.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!v-V3!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67853d02-f244-4829-84df-a07fc900527d_1731x1154.png 424w, https://substackcdn.com/image/fetch/$s_!v-V3!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67853d02-f244-4829-84df-a07fc900527d_1731x1154.png 848w, https://substackcdn.com/image/fetch/$s_!v-V3!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67853d02-f244-4829-84df-a07fc900527d_1731x1154.png 1272w, https://substackcdn.com/image/fetch/$s_!v-V3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F67853d02-f244-4829-84df-a07fc900527d_1731x1154.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>AI coding agents have gotten remarkably capable. <a href="https://metr.org/blog/2025-03-19-measuring-ai-ability-to-complete-long-tasks/">METR&#8217;s research</a> [1] shows that the length of tasks AI agents can complete autonomously has <strong>doubled every 7 months over the past 6 years</strong>. The recently announced <a href="https://openai.com/index/introducing-gpt-5-2/">OpenAI&#8217;s GPT-5-Codex</a> [2] can run autonomously for over 7 hours, handling complex refactoring and code reviews without human intervention.</p><blockquote><p><strong>How To Build Agents from Scratch</strong><br>Interested in how coding agents like Claude Code work, or how to build them from scratch for your own tasks? I wrote a book - <a href="https://multiagentbook.com/">Designing Multi-Agent Systems</a> - that covers building agents from scratch (Chapter 4), evaluation frameworks (Chapter 10), optimization strategies for the 10 common failure modes (Chapter 11), and implementing software engineering agents like Claude Code (Chapter 15).</p></blockquote><p>These advances have enabled new types of experimentation, specifically - <strong>&#8220;Vibe coding&#8221;</strong> - the practice of describing what you want and letting the AI generate code based on intuition and feel - works great for prototypes and quick experiments.</p><p>And yet, many teams - especially those with early-career engineers - still hit <strong>roadblocks as soon as they try to build complex real-world applications that matter</strong>. A game app that&#8217;s deployed and distributed with auto-scaling, auth, multi-user support, and streaming. A pipeline for <em><strong>coherent</strong></em> music generation. A document processing system for enterprise grade legal review. Today, it seems that end-to-end results for tasks like this <em>rarely come from getting your coding agent to accomplish the entire thing.</em></p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;22363913-0126-48b8-b4cc-4aabc27aac32&quot;,&quot;caption&quot;:&quot;I have found myself at a rather interesting nexus quite often recently as I work with AI coding models like Claude. As I integrate these tools into my workflow, I see fundamental changes in how I work, and it is unclear at this point that these changes are trending in the right direction.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot; /upgrade ... or ...&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:85692678,&quot;name&quot;:&quot;Victor Dibia, PhD&quot;,&quot;bio&quot;:&quot;Software Engineer, Researcher (Microsoft), Best Selling Author working on and writing about Generative AI, Agents. Core maintainer for the AutoGen Framework (50k Stars on GitHub). Previously at Cloudera, IBM Research. All views are my own.&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/27536f63-7d8e-48dc-b34c-150acfacdc8b_1726x1396.jpeg&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-10-27T15:02:56.939Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!FEAl!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe8efd626-4e16-4a2b-8e6f-834fdf2c86aa_2838x1851.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://newsletter.victordibia.com/p/upgrade-or&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:173907581,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:12,&quot;comment_count&quot;:1,&quot;publication_id&quot;:1253044,&quot;publication_name&quot;:&quot;Designing with AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1FgP!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fff160652-8bbb-475e-80a7-ba4eb5f80dcb_504x504.png&quot;,&quot;belowTheFold&quot;:false,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><p>Why does this happen? Well, agents, even while capable, <strong>make common mistakes</strong>. They gloss over security vulnerabilities. They may select suboptimal architectural designs. They may fail to consider system and deployment constraints e.g., your deployment system only works with Node.js, compliance requires a specific encryption standard, your CI pipeline has particular requirements. As the tasks get more complex and longer, the agents may fail to keep up with earlier instructions as the context fills up.</p><p>This often leads to confusion and disappointment for teams who see the success stories but frequently run into these issues.  </p><p>TLDR; The good news is that with the right managements and <em><strong>engineering discipline , you can get the right results (with productivity benefits) from AI agents.</strong></em></p><p>This post highlights 5 useful things to do to get closer to this goal. This is how I use coding agents like <a href="https://newsletter.victordibia.com/p/upgrade-or">Claude Code today, despite its quirks</a>.<br></p><blockquote><p>P.S. I wrote about <a href="https://newsletter.victordibia.com/p/developers-stop-asking-llms-genai">how to build software with AI models</a> [3] a while ago. This post is an extension that acknowledges the autonomy that agents like Claude Code, OpenAI Codex bring - they have sophisticated agentic harnesses allowing them to tackle medium to large scale tasks.</p></blockquote><p><strong> </strong></p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;226a67b2-482a-45a2-b361-da34d7641b88&quot;,&quot;caption&quot;:&quot;It's no longer a question of whether to use GenAI tools, but how to use them effectively. Just as digital literacy was crucial for navigating the internet era, AI literacy is becoming essential for engineers to achieve equitable outcomes from AI tools.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;How to Effectively Use Generative AI for Software Engineering Tasks!&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:85692678,&quot;name&quot;:&quot;Victor Dibia, PhD&quot;,&quot;bio&quot;:&quot;Software Engineer, Researcher (Microsoft), Best Selling Author working on and writing about Generative AI, Agents. Core maintainer for the AutoGen Framework (50k Stars on GitHub). Previously at Cloudera, IBM Research. All views are my own.&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/27536f63-7d8e-48dc-b34c-150acfacdc8b_1726x1396.jpeg&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2024-11-25T15:20:16.450Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!LYiA!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54d55546-d64d-491e-bada-43150eb40014_2446x1344.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://newsletter.victordibia.com/p/developers-stop-asking-llms-genai&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:143432589,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:29,&quot;comment_count&quot;:1,&quot;publication_id&quot;:1253044,&quot;publication_name&quot;:&quot;Designing with AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1FgP!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fff160652-8bbb-475e-80a7-ba4eb5f80dcb_504x504.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://newsletter.victordibia.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Designing with AI is a reader-supported publication. To receive new posts and support my work, consider becoming a free or paid subscriber.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div><div><hr></div><h2>1. Share Context and Constraints</h2><p>If you hired a new engineer - irrespective of their expertise - you would still need to onboard them. You&#8217;d tell them about your build system, tacit knowledge, tribal knowledge, tech stack, deployment stack, and more. For agents, <em>you still have to do the same thing</em>. Agents will not magically gain this knowledge. It is an opportunity to offer the agent <strong>context</strong> -  what exists - your codebase structure, tech stack, design patterns, how things work today, as well as important <strong>constraints</strong> - what MUST or CANNOT happen - hard rules, compliance requirements, deployment limits, non-negotiables.</p><p>Tools like <a href="https://www.anthropic.com/engineering/claude-code-best-practices">CLAUDE.md</a> [4], and <a href="https://github.com/github/spec-kit">Spec-Driven Development</a> [5] are designed to support this.</p><blockquote><p><strong>CLAUDE.md</strong> is a special markdown file that Claude Code automatically loads into every conversation&#8212;essentially an onboarding document for your agent. It&#8217;s where you codify tacit knowledge: build commands, environment setup, repo conventions, and hard-won lessons about your codebase. Keep it under 300 lines and use hierarchical files in subdirectories so the agent sees only the most relevant context.  </p></blockquote><p></p><blockquote><p><strong>GitHub&#8217;s Spec Kit</strong> takes a more structured approach with four gated phases: Specify &#8594; Plan &#8594; Tasks &#8594; Implement. Instead of coding first and writing docs later, you start with a specification that captures intent, then translate it into technical decisions, break it into implementable pieces, and only then let the agent code. This solves the &#8220;scattered requirements&#8221; problem&#8212;security policies, compliance rules, and design constraints get baked into the spec where the AI can actually use them, rather than living in someone&#8217;s head or buried in a wiki. The philosophy: treat coding agents like literal-minded pair programmers who excel at pattern recognition but need unambiguous instructions. <a href="https://github.blog/ai-and-ml/generative-ai/spec-driven-development-with-ai-get-started-with-a-new-open-source-toolkit/">GitHub</a></p></blockquote><p> </p><h3>Building a Practice of Iterative Context Enrichment</h3><p>As a team, consider having an architect (or senior engineer) first initialize your CLAUDE.md (it scans the codebase), then have them add useful context and <em><strong>update it iteratively</strong></em> as they work with the agent. For example, you ask the agent to build the frontend and it uses <code>npm run build</code>, but instead your repo is designed to use <code>bun</code> - you <strong>correct it and ask it to add the bun instruction to CLAUDE.md</strong>.</p><p>Context and constraints <strong>live across the lifetime of a project</strong> (or even across projects). Teams should build muscle to manage these as practices - a file system for designs, how they are organized, where specs live.</p><blockquote><p><strong>Boris Cherny</strong>, creator/ lead for claude code talks about how they author and version Claude.md for the claude code project <a href="https://x.com/bcherny/status/2007179840848597422">here</a>. <br><br>&#8221;Our team shares a single  CLAUDE.md for the Claude Code repo. We check it into git, and the whole team contributes multiple times a week. Anytime we see Claude do something incorrectly we add it to the http://CLAUDE.md, so Claude knows not to do it next time. Other teams maintain their own CLAUDE.md&#8216;s. <strong>It is each team&#8217;s job to keep theirs up to date.</strong>&#8221;</p></blockquote><p><a href="https://docs.anthropic.com/en/docs/claude-code/overview">CLAUDE.md files</a> [4] can be hierarchical - you can have a project-level file and nested ones in subdirectories. Claude prioritizes the most specific, most nested file when relevant.</p><p>For more structured approaches, <a href="https://github.com/github/spec-kit">GitHub&#8217;s Spec Kit</a> [5] provides a toolkit with three phases: <code>/specify</code> to generate specifications, <code>/plan</code> to produce technical plans, and <code>/tasks</code> to derive actionable task lists. It&#8217;s agent-agnostic and works with Claude Code, GitHub Copilot, Gemini, and others.</p><div><hr></div><h2>2. Co-design with the Agent</h2><p>I find that I often explore a problem in detail  <em><strong>first within a design session with an agent</strong></em>. I ask a bunch of questions. I ask the agent to start a running doc. I iterate and improve that design doc.</p><p>For example: &#8220;I am interested in this feature - adding query filtering in the UI. Review the current implementation and propose a design that honors the design system, has minimal code changes, and reuses components. <strong>Change nothing in the code base yet.</strong>&#8220;</p><p>Sometimes it&#8217;s useful to signal to the agent directly &#8220;change nothing&#8221; to indicate brainstorm mode (some agents have specific toggles for this).</p><p>Some plan comes back. I offer corrections. I often have a running directory of such design interactions that I refine very carefully and make into specs. These design docs are referenced as specs for features, and the agent can benchmark its work against those. <br>And then when things are clear and well defined, I tell the agent - &#8220;hit it!&#8221; </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!vedh!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe7c9a52b-264f-4cd5-ace0-65b61a8ccb13_640x312.gif" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!vedh!,w_424,c_limit,f_webp,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe7c9a52b-264f-4cd5-ace0-65b61a8ccb13_640x312.gif 424w, https://substackcdn.com/image/fetch/$s_!vedh!,w_848,c_limit,f_webp,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe7c9a52b-264f-4cd5-ace0-65b61a8ccb13_640x312.gif 848w, https://substackcdn.com/image/fetch/$s_!vedh!,w_1272,c_limit,f_webp,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe7c9a52b-264f-4cd5-ace0-65b61a8ccb13_640x312.gif 1272w, https://substackcdn.com/image/fetch/$s_!vedh!,w_1456,c_limit,f_webp,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe7c9a52b-264f-4cd5-ace0-65b61a8ccb13_640x312.gif 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!vedh!,w_1456,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe7c9a52b-264f-4cd5-ace0-65b61a8ccb13_640x312.gif" width="728" height="354.9" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/e7c9a52b-264f-4cd5-ace0-65b61a8ccb13_640x312.gif&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:312,&quot;width&quot;:640,&quot;resizeWidth&quot;:728,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;Hit It Captain Christopher Pike GIF - Hit It Captain Christopher Pike Anson  Mount - Discover &amp; Share GIFs&quot;,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="Hit It Captain Christopher Pike GIF - Hit It Captain Christopher Pike Anson  Mount - Discover &amp; Share GIFs" title="Hit It Captain Christopher Pike GIF - Hit It Captain Christopher Pike Anson  Mount - Discover &amp; Share GIFs" srcset="https://substackcdn.com/image/fetch/$s_!vedh!,w_424,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe7c9a52b-264f-4cd5-ace0-65b61a8ccb13_640x312.gif 424w, https://substackcdn.com/image/fetch/$s_!vedh!,w_848,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe7c9a52b-264f-4cd5-ace0-65b61a8ccb13_640x312.gif 848w, https://substackcdn.com/image/fetch/$s_!vedh!,w_1272,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe7c9a52b-264f-4cd5-ace0-65b61a8ccb13_640x312.gif 1272w, https://substackcdn.com/image/fetch/$s_!vedh!,w_1456,c_limit,f_auto,q_auto:good,fl_lossy/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe7c9a52b-264f-4cd5-ace0-65b61a8ccb13_640x312.gif 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><h3></h3><div><hr></div><h2>3. Test-Driven Development</h2><p>As agents take on more tasks, the chance that changes occur that you have no idea of increases. From my experience, this can be a really bad thing. It&#8217;s generally poor form to get to a meeting where there&#8217;s a feature that <em>works</em>, but nobody can explain <em>how</em> it works.</p><p>In my <a href="https://newsletter.victordibia.com/p/developers-stop-asking-llms-genai">previous post</a> [3], I advocate for knowledge parity - read everything. But I get it, the temptation is high. It also kinda <a href="https://newsletter.victordibia.com/p/upgrade-or">defeats the productivity promise</a> if you are sitting watching the agent go on a 7hr bender. Also, the bosses want to see more work done in a fraction of the time.</p><p>At minimum, if you don&#8217;t know everything, you should know/define exactly how things <em>should</em> work - <strong>as defined in tests.</strong></p><p>Try to author and fully understand your tests. Add instructions to CLAUDE.md to require permissions for test changes (so the agent doesn&#8217;t delete the test to declare the task done - which it can do).</p><div><hr></div><h2>4. Checkpointing</h2><p>I wrote about checkpointing previously in my <a href="https://newsletter.victordibia.com/p/developers-stop-asking-llms-genai">last article</a> [3], mostly focused on writing continuation summaries of what went well in long sessions. This still applies to agents - research like <a href="https://arxiv.org/abs/2307.03172">&#8220;Lost in the Middle&#8221;</a> [6] shows that LLMs struggle with information in the middle of long contexts; performance forms a U-shape with best results at the beginning and end.</p><p>Checkpointing takes on more importance in agent land. An agent working for 1 hour can change hundreds of files, which can be problematic to debug if <em>1 out of 100 changes</em> has issues.</p><p>I have learned to carefully isolate any working sessions with an AI agent via frequent git commits. Once a feature is done, tested, and in good condition, it&#8217;s best to check it in and version it.</p><p>Agents are good at version-based rollbacks using git. They can struggle at rollbacks within a session, manually reverting individual changes across 100s of files. It&#8217;s also great for your sanity / security - roll back on error. Good old engineering practice.</p><div><hr></div><h2>5. Code Review is for Humans  </h2><p>We should not need to rehash the obvious here, but code review is key.</p><p>You can have AI agent reviews - GitHub Copilot does leave <em>decent</em> suggestions. But they should be more FYI. We should still have engineering practices of having at least one or two reviewers to gate merging.</p><p>Importantly - research on code review shows it serves dual purposes [<a href="https://www.researchgate.net/publication/224327153_What_Types_of_Defects_Are_Really_Discovered_in_Code_Reviews">7</a>][<a href="https://dl.acm.org/doi/10.1145/3387904.3389270">8</a>] - <strong>defect detection</strong> - catching bugs, security issues, logic errors, but also - <strong>knowledge dissemination</strong> - ensuring multiple engineers understand the code. <br><br>Ofcourse, defect detection is still very <a href="https://x.com/rauchg/status/2026864132423823499">important</a>. </p><blockquote><p><strong>Feb 24</strong>: Clouldflare <a href="https://blog.cloudflare.com/vinext/">announces</a> that a single dev rebuilt Next.js with significant improvements in 1 week.  Massive internet excitement on the rise of 1-man dev teams.</p><p><strong>Feb 27</strong>: Its revealed that there 2 critical, 2 high, 2 medium, 1 low security vulnerabilities in the framework.</p></blockquote>
      <p>
          <a href="https://newsletter.victordibia.com/p/vibe-coding-with-engineering-discipline">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[Agent Middleware: Adding Control and Observability to AI Agents]]></title><description><![CDATA[Issue #53 | How to intercept agent operations for logging, safety, rate limiting, and compliance - the middleware pattern for production AI agents.]]></description><link>https://newsletter.victordibia.com/p/agent-middleware-adding-control-and</link><guid isPermaLink="false">https://newsletter.victordibia.com/p/agent-middleware-adding-control-and</guid><dc:creator><![CDATA[Victor Dibia, PhD]]></dc:creator><pubDate>Tue, 16 Dec 2025 12:31:43 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!jTyB!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6eb304e2-14b2-4411-9011-bbdc5ce5a692_1469x957.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!jTyB!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6eb304e2-14b2-4411-9011-bbdc5ce5a692_1469x957.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!jTyB!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6eb304e2-14b2-4411-9011-bbdc5ce5a692_1469x957.png 424w, https://substackcdn.com/image/fetch/$s_!jTyB!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6eb304e2-14b2-4411-9011-bbdc5ce5a692_1469x957.png 848w, https://substackcdn.com/image/fetch/$s_!jTyB!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6eb304e2-14b2-4411-9011-bbdc5ce5a692_1469x957.png 1272w, https://substackcdn.com/image/fetch/$s_!jTyB!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6eb304e2-14b2-4411-9011-bbdc5ce5a692_1469x957.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!jTyB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6eb304e2-14b2-4411-9011-bbdc5ce5a692_1469x957.png" width="1456" height="949" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/6eb304e2-14b2-4411-9011-bbdc5ce5a692_1469x957.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:949,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:81363,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/175448644?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6eb304e2-14b2-4411-9011-bbdc5ce5a692_1469x957.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!jTyB!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6eb304e2-14b2-4411-9011-bbdc5ce5a692_1469x957.png 424w, https://substackcdn.com/image/fetch/$s_!jTyB!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6eb304e2-14b2-4411-9011-bbdc5ce5a692_1469x957.png 848w, https://substackcdn.com/image/fetch/$s_!jTyB!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6eb304e2-14b2-4411-9011-bbdc5ce5a692_1469x957.png 1272w, https://substackcdn.com/image/fetch/$s_!jTyB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6eb304e2-14b2-4411-9011-bbdc5ce5a692_1469x957.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>In a <a href="https://newsletter.victordibia.com/p/the-agent-execution-loop-how-to-build">previous post on the agent execution loop</a>, I showed how agents work: a while loop that calls a model, executes tools, and iterates until done. That loop is the engine or <em>harness</em>. But engines need control routines that <em>govern</em> behaviour.</p><p>When you deploy an agent to production, new questions emerge:</p><ul><li><p>How do I log every model call and tool execution?</p></li><li><p>How do I block malicious prompts before they reach the LLM?</p></li><li><p>How do I rate limit users to control costs?</p></li><li><p>How do I redact PII from outputs before they reach users or broader telemetry systems?</p></li><li><p>How do I create audit trails for compliance?</p></li></ul><p>One way to do this is <strong>middleware</strong> - routines that intercept agent operations at key points in the execution loop (e.g., before/after a model call, before/after tool calls etc). If you&#8217;ve built web applications with Express, Django, or FastAPI, you know this pattern. And it works just as well for agents.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!22gs!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2a173bfc-da9f-4ec2-8002-b6669bdfa6c3_2752x1536.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!22gs!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2a173bfc-da9f-4ec2-8002-b6669bdfa6c3_2752x1536.png 424w, https://substackcdn.com/image/fetch/$s_!22gs!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2a173bfc-da9f-4ec2-8002-b6669bdfa6c3_2752x1536.png 848w, https://substackcdn.com/image/fetch/$s_!22gs!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2a173bfc-da9f-4ec2-8002-b6669bdfa6c3_2752x1536.png 1272w, https://substackcdn.com/image/fetch/$s_!22gs!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2a173bfc-da9f-4ec2-8002-b6669bdfa6c3_2752x1536.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!22gs!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2a173bfc-da9f-4ec2-8002-b6669bdfa6c3_2752x1536.png" width="1456" height="813" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2a173bfc-da9f-4ec2-8002-b6669bdfa6c3_2752x1536.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:813,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:4494885,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/175448644?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2a173bfc-da9f-4ec2-8002-b6669bdfa6c3_2752x1536.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!22gs!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2a173bfc-da9f-4ec2-8002-b6669bdfa6c3_2752x1536.png 424w, https://substackcdn.com/image/fetch/$s_!22gs!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2a173bfc-da9f-4ec2-8002-b6669bdfa6c3_2752x1536.png 848w, https://substackcdn.com/image/fetch/$s_!22gs!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2a173bfc-da9f-4ec2-8002-b6669bdfa6c3_2752x1536.png 1272w, https://substackcdn.com/image/fetch/$s_!22gs!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2a173bfc-da9f-4ec2-8002-b6669bdfa6c3_2752x1536.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p></p><blockquote><p><strong>Note</strong>: This post is adapted from my book <strong><a href="https://buy.multiagentbook.com/">Designing Multi-Agent Systems</a></strong>, where Chapter 4 walks you through building a complete Agent class from scratch. The agent class is part of  <strong><a href="https://github.com/victordibia/designing-multiagent-systems">PicoAgent</a>s : </strong>a minimal, hackable multi-agent framework which the reader gets to build across the sections of the book. While we will be using the picoagents sample in this post, the same patterns apply across frameworks like LangChain, Microsoft Agent Framework, and others (see examples at the end of the post).</p><ul><li><p>Digital PDF: <a href="https://buy.multiagentbook.com/">buy.multiagentbook.com</a></p></li><li><p>Print on Amazon: <a href="https://www.amazon.com/dp/B0G2BCQQJY">amazon.com/dp/B0G2BCQQJY</a></p></li><li><p>Middleware source: <a href="https://github.com/victordibia/designing-multiagent-systems/blob/main/picoagents/src/picoagents/_middleware.py">picoagents/_middleware.py</a></p></li></ul></blockquote><h2></h2><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://newsletter.victordibia.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Designing with AI is a reader-supported publication. To receive new posts and support my work, consider becoming a free or paid subscriber.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div><div><hr></div><h2>What is Agent Middleware?</h2><p>Middleware intercepts agent operations before and after they execute. When an agent prepares to call the LLM or execute a tool, that operation first passes through a middleware chain. Each middleware can:</p><ol><li><p><strong>Inspect</strong> the operation (inputs, context, metadata)</p></li><li><p><strong>Modify</strong> inputs or outputs</p></li><li><p><strong>Block</strong> the operation entirely by raising an exception</p></li><li><p><strong>Log</strong> what happened for observability</p></li></ol><p>Here&#8217;s the mental model:</p><pre><code><code>User Task
    &#8595;
[Middleware Chain]
    &#8595;
Model Call &#8594; [Middleware] &#8594; LLM API &#8594; [Middleware] &#8594; Response
    &#8595;
Tool Call &#8594; [Middleware] &#8594; Execute &#8594; [Middleware] &#8594; Result
    &#8595;
Final Response
</code></code></pre><p>Every model call and tool execution routes through the middleware chain. This gives you a single control plane for security, observability, and policy enforcement.</p><blockquote><p>While this example focuses on model and tool call operations, you can also implement middleware for other operations e.g., calls to fetch or write to memory.</p></blockquote><h2>The BaseMiddleware Interface</h2><p>To standardize how middleware works, we define a common interface with three hooks. In PicoAgents <a href="https://github.com/victordibia/designing-multiagent-systems/blob/7a78f274e628d86e1b0ea1b5d3ef9ff0dc9ead70/picoagents/src/picoagents/_middleware.py#L47">middleware interface</a>, we  use async generators (<code>yield</code>) for the hooks rather than return values - this enables streaming support and event emission for observability:</p><pre><code><code>from abc import ABC, abstractmethod
from collections.abc import AsyncGenerator
from typing import Any, Union

class BaseMiddleware(ABC):
    &#8220;&#8221;&#8220;Abstract base class for middleware.&#8221;&#8220;&#8221;

    @abstractmethod
    async def process_request(
        self, context: MiddlewareContext
    ) -&gt; AsyncGenerator[Union[MiddlewareContext, &#8220;AgentEvent&#8221;], None]:
        &#8220;&#8221;&#8220;Process before the operation executes.&#8221;&#8220;&#8221;
        yield context

    @abstractmethod
    async def process_response(
        self, context: MiddlewareContext, result: Any
    ) -&gt; AsyncGenerator[Union[Any, &#8220;AgentEvent&#8221;], None]:
        &#8220;&#8221;&#8220;Process after the operation completes successfully.&#8221;&#8220;&#8221;
        yield result

    @abstractmethod
    async def process_error(
        self, context: MiddlewareContext, error: Exception
    ) -&gt; AsyncGenerator[Union[Any, &#8220;AgentEvent&#8221;], None]:
        &#8220;&#8221;&#8220;Handle errors from the operation.&#8221;&#8220;&#8221;
        raise error
</code></code></pre><p><code>process_request()</code> runs before any model call or tool execution. This is where you validate inputs, start timers, or block suspicious requests. Yield the context to continue, or raise to abort.</p><p><code>process_response()</code> runs after successful operations. Use it to filter outputs, cache results, or log completion times. Yield the (possibly modified) result.</p><p><code>process_error()</code> handles failures. Log errors, provide fallbacks, or implement retry logic. Yield a recovery value, or re-raise to propagate.</p><p>The async generator pattern (<code>yield</code> instead of <code>return</code>) enables middleware to emit events for observability and supports streaming responses - both critical for production agents.</p><h2>Example 1: Logging Middleware</h2><p>The simplest and most useful middleware: log everything.</p><pre><code><code>import time

class LoggingMiddleware(BaseMiddleware):
    &#8220;&#8221;&#8220;Log all agent operations with timing.&#8221;&#8220;&#8221;

    async def process_request(self, context: MiddlewareContext):
        print(f&#8221;[{context.agent_name}] Starting {context.operation}&#8221;)
        context.metadata[&#8221;start_time&#8221;] = time.time()
        yield context

    async def process_response(self, context: MiddlewareContext, result: Any):
        duration = time.time() - context.metadata.get(&#8221;start_time&#8221;, 0)
        print(f&#8221;[{context.agent_name}] {context.operation} completed in {duration:.2f}s&#8221;)
        yield result

    async def process_error(self, context: MiddlewareContext, error: Exception):
        print(f&#8221;[{context.agent_name}] {context.operation} failed: {error}&#8221;)
        raise error
</code></code></pre><p>Pass it to your agent:</p><pre><code><code>agent = Agent(
    name=&#8221;assistant&#8221;,
    model_client=OpenAIChatCompletionClient(model=&#8221;gpt-4.1-mini&#8221;),
    instructions=&#8221;You are a helpful assistant.&#8221;,
    middlewares=[LoggingMiddleware()]
)

response = await agent.run(&#8221;What&#8217;s 2+2?&#8221;)
</code></code></pre><p>Output:</p><pre><code><code>[assistant] Starting model_call
[assistant] model_call completed in 0.82s
</code></code></pre><p>Every operation is now visible. When debugging why an agent made a specific decision or measuring latency, this is your first line of defense.</p><blockquote><p>Note: What is not shown above is how the middleware chain is &#8220;wired&#8221; into the agent execution loop. See the implementation of the <a href="https://github.com/victordibia/designing-multiagent-systems/blob/7a78f274e628d86e1b0ea1b5d3ef9ff0dc9ead70/picoagents/src/picoagents/agents/_agent.py#L434">agent class</a> to review how model calls and tool calls are passed through a chain of middleware associated with the agent.</p></blockquote><h2>Example 2: Guardrail Middleware</h2><p>Agents are vulnerable to prompt injection - malicious inputs that hijack the model&#8217;s behavior. A guardrail middleware can block dangerous patterns before they reach the LLM:</p><pre><code><code>import re

class GuardrailMiddleware(BaseMiddleware):
    &#8220;&#8221;&#8220;Block malicious input before it reaches the model.&#8221;&#8220;&#8221;

    def __init__(self, blocked_patterns: list[str] = None):
        # Basic patterns for demonstration
        self.blocked_patterns = [re.compile(p) for p in (blocked_patterns or [
            r&#8221;ignore.*previous.*instructions&#8221;,
            r&#8221;system.*prompt.*override&#8221;,
            r&#8221;&lt;script.*?&gt;.*?&lt;/script&gt;&#8221;,
        ])]

    async def process_request(self, context: MiddlewareContext):
        if context.operation == &#8220;model_call&#8221;:
            for message in context.data:
                if hasattr(message, &#8220;content&#8221;):
                    for pattern in self.blocked_patterns:
                        if pattern.search(message.content):
                            raise ValueError(f&#8221;Blocked pattern: {pattern.pattern}&#8221;)
        yield context

    async def process_response(self, context, result):
        yield result

    async def process_error(self, context, error):
        raise error
</code></code></pre><p>When the middleware detects dangerous patterns, it raises an exception immediately. The malicious input never reaches the model. No expensive API call is made. No problematic content enters your logs.</p><blockquote><p><strong>Important</strong>: Regex-based filtering is a first line of defense, not a complete solution. Adversarial users can easily rephrase prompts to bypass pattern matching (&#8221;disregard prior directives&#8221; instead of &#8220;ignore previous instructions&#8221;). For production systems, consider dedicated guardrail models like <a href="https://ai.meta.com/research/publications/llama-guard-llm-based-input-output-safeguard-for-human-ai-conversations/">Llama Guard</a>, <a href="https://azure.microsoft.com/en-us/products/ai-services/ai-content-safety">Azure AI Content Safety</a>, or similar classification-based approaches that understand semantic meaning rather than just matching strings.</p></blockquote><h2><strong>Example 3: Rate Limiting Middleware</strong></h2><p>LLM API calls are expensive. Without rate limiting, a single abusive user can blow through your budget. Here&#8217;s a simple rate limiter middleware!</p><pre><code><code>import asyncio
import time

class RateLimitMiddleware(BaseMiddleware):
    &#8220;&#8221;&#8220;Limit API calls per minute.&#8221;&#8220;&#8221;

    def __init__(self, max_calls_per_minute: int = 60):
        self.max_calls = max_calls_per_minute
        # NOTE: In production, use Redis or similar for distributed state.
        # In-memory state won&#8217;t work across multiple containers or serverless functions.
        self.call_times: list[float] = []

    async def process_request(self, context: MiddlewareContext):
        now = time.time()

        # Remove calls outside the 60-second window
        self.call_times = [t for t in self.call_times if now - t &lt; 60]

        # If at limit, wait until oldest call expires
        if len(self.call_times) &gt;= self.max_calls:
            oldest_call = self.call_times[0]
            wait_time = 60 - (now - oldest_call)
            if wait_time &gt; 0:
                await asyncio.sleep(wait_time)
                now = time.time()

        self.call_times.append(now)
        yield context

    async def process_response(self, context, result):
        yield result

    async def process_error(self, context, error):
        raise error
</code></code></pre><p>Usage:</p><pre><code><code>agent = Agent(
    name=&#8221;limited_assistant&#8221;,
    model_client=OpenAIChatCompletionClient(model=&#8221;gpt-4.1-mini&#8221;),
    instructions=&#8221;You are a helpful assistant.&#8221;,
    middlewares=[RateLimitMiddleware(max_calls_per_minute=10)]
)
</code></code></pre><p>Now your agent can&#8217;t be abused into making unlimited API calls. The middleware throttles requests to stay within budget.</p><h2>Example 4: PII Redaction Middleware</h2><p>Consider a scenario where a customer service agent handles sensitive data. A PII redaction middleware can detect and sanitize both inputs and outputs:</p><pre><code><code>import re

class PIIRedactionMiddleware(BaseMiddleware):
    &#8220;&#8221;&#8220;Redact personally identifiable information from inputs and outputs.&#8221;&#8220;&#8221;

    def __init__(self):
        self.patterns = {
            r&#8221;\b\d{3}-\d{2}-\d{4}\b&#8221;: &#8220;[SSN-REDACTED]&#8221;,
            r&#8221;\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b&#8221;: &#8220;[EMAIL-REDACTED]&#8221;,
            r&#8221;\b\d{3}[-.]?\d{3}[-.]?\d{4}\b&#8221;: &#8220;[PHONE-REDACTED]&#8221;,
            r&#8221;\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b&#8221;: &#8220;[CC-REDACTED]&#8221;,
        }

    def _redact(self, text: str) -&gt; str:
        for pattern, replacement in self.patterns.items():
            text = re.sub(pattern, replacement, text)
        return text

    async def process_request(self, context: MiddlewareContext):
        # Redact PII from inputs before they reach the model
        if context.operation == &#8220;model_call&#8221; and isinstance(context.data, list):
            for msg in context.data:
                if hasattr(msg, &#8220;content&#8221;):
                    msg.content = self._redact(msg.content)
        yield context

    async def process_response(self, context: MiddlewareContext, result: Any):
        # Redact PII from outputs before they reach users
        if hasattr(result, &#8220;message&#8221;) and hasattr(result.message, &#8220;content&#8221;):
            result.message.content = self._redact(result.message.content)
        yield result

    async def process_error(self, context, error):
        raise error
</code></code></pre><p>Now sensitive data never leaves your system unprotected:</p><pre><code><code>Input: &#8220;Customer called from 555-123-4567, email john@example.com&#8221;

Output: &#8220;Customer called from [PHONE-REDACTED], email [EMAIL-REDACTED]&#8221;
</code></code></pre><blockquote><p><strong>Note</strong>: For streaming responses, PII redaction requires buffering chunks until you have enough context to detect patterns - <em><strong>you can&#8217;t redact a partial phone number mid-stream</strong></em>. In PicoAgents, you can  handle this via the async generator pattern, allowing middleware to buffer and transform chunks before yielding them.</p></blockquote><h2>How Agents Use Middleware</h2><p>Internally, agents route their core operations through the middleware chain:</p><pre><code><code>class Agent:
    async def run(self, task: str):
        # ... prepare messages ...

        # Model call wrapped with middleware
        completion_result = await self.middleware_chain.execute(
            operation=&#8221;model_call&#8221;,
            agent_name=self.name,
            agent_context=self.context,
            data=llm_messages,
            func=lambda msgs: self.model_client.create(msgs, tools=tools)
        )

        # Tool execution wrapped with middleware
        for tool_call in completion_result.tool_calls:
            tool_result = await self.middleware_chain.execute(
                operation=&#8221;tool_call&#8221;,
                agent_name=self.name,
                agent_context=self.context,
                data=tool_call,
                func=lambda tc: self._execute_tool(tc)
            )
</code></code></pre><p>The <code>middleware_chain.execute()</code> method calls each middleware in sequence. Each middleware receives the operation details and can decide whether to proceed, modify the data, or block entirely.</p><h2>Composing Multiple Middleware</h2><p>Middleware composes naturally (depending on the implementation). Order matters - middleware executes in the order you provide:</p><pre><code><code>agent = Agent(
    name=&#8221;production_agent&#8221;,
    model_client=OpenAIChatCompletionClient(model=&#8221;gpt-4.1-mini&#8221;),
    instructions=&#8221;You are a helpful assistant.&#8221;,
    middlewares=[
        LoggingMiddleware(),         # Log everything first
        GuardrailMiddleware(),       # Block malicious inputs
        RateLimitMiddleware(60),     # Enforce rate limits
        PIIRedactionMiddleware(),    # Sanitize inputs/outputs
    ]
)
</code></code></pre><p>Request flow: Logging &#8594; Guardrail &#8594; Rate Limit &#8594; (execute) &#8594; PII Redaction &#8594; Logging</p><p>If guardrail middleware blocks a request, rate limiting and execution never happen. This short-circuit behavior is intentional&#8212;you don&#8217;t want to waste resources on blocked operations.</p><p>PicoAgents includes ready-to-use middleware for common patterns that you can learn from: <br><code>LoggingMiddleware</code>, <code>RateLimitMiddleware</code>, <code>PIIRedactionMiddleware</code>, <code>GuardrailMiddleware</code>, and <code>MetricsMiddleware</code>. Use them directly or as starting points for custom implementations.</p><h2>Same Pattern, Different Frameworks</h2><p>The middleware pattern isn&#8217;t unique to PicoAgents&#8212;it&#8217;s how production agent frameworks handle control and observability. The syntax differs, but the core architecture is identical: intercept operations, optionally modify or block, then proceed.</p><p><strong><a href="https://github.com/microsoft/agent-framework">Microsoft Agent Framework</a></strong> provides three middleware interfaces: <code>AgentMiddleware</code> (wraps agent runs), <code>FunctionMiddleware</code> (wraps tool calls), and <code>ChatMiddleware</code> (wraps LLM requests). You can define them as classes, functions, or use decorators:</p><pre><code><code>from agent_framework import AgentMiddleware, AgentRunContext

class SecurityMiddleware(AgentMiddleware):
    async def process(self, context: AgentRunContext, next):
        if is_malicious(context.messages):
            context.result = AgentRunResponse(messages=[...])
            context.terminate = True  # Stop execution
            return
        await next(context)
</code></code></pre>
      <p>
          <a href="https://newsletter.victordibia.com/p/agent-middleware-adding-control-and">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[The Agent Execution Loop: How to Build an AI Agent From Scratch]]></title><description><![CDATA[Issue #52 | How AI agents work under the hood - the execution loop that enables reasoning, tool calling, and iterative problem solving.]]></description><link>https://newsletter.victordibia.com/p/the-agent-execution-loop-how-to-build</link><guid isPermaLink="false">https://newsletter.victordibia.com/p/the-agent-execution-loop-how-to-build</guid><dc:creator><![CDATA[Victor Dibia, PhD]]></dc:creator><pubDate>Thu, 11 Dec 2025 15:26:55 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!L0RT!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c66190d-1e75-49a9-be45-a38de3a1599f_1592x902.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>2025 has been dubbed <a href="https://www.itbrew.com/stories/2025/12/03/2025-became-the-year-of-agentic-ai">the year of agentic AI</a>. Tools like <a href="https://github.com/google-gemini/gemini-cli">Google Gemini CLI</a>, <a href="https://www.anthropic.com/claude-code">Claude Code</a>, <a href="https://github.blog/ai-and-ml/github-copilot/agent-mode-101-all-about-github-copilots-powerful-mode/">GitHub Copilot agent mode</a>, and Cursor are all examples of agents&#8212;autonomous entities that can take an open-ended task, plan, take action, reflect on the results, and loop until the task is done. And they&#8217;re creating real value.</p><p>But how do agents actually work? How can you build one?</p><p>In this post, I&#8217;ll walk through the core of how agents function: the <strong>agent execution loop</strong> that powers these complex behaviors.</p><blockquote><p><strong>Note</strong>: This post is adapted from my book <strong><a href="http://buy.multiagentbook.com">Designing Multi-Agent Systems</a></strong>, where Part II (Chapters 4-6) guides you through building a complete agent framework from scratch.</p><ul><li><p>Digital PDF: <a href="https://buy.multiagentbook.com/">buy.multiagentbook.com</a></p></li><li><p>Print on Amazon: <a href="https://www.amazon.com/dp/B0G2BCQQJY">amazon.com/dp/B0G2BCQQJY</a> </p></li></ul></blockquote><p></p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;8b36c8d4-0a13-42e2-900d-c3efe4d02651&quot;,&quot;caption&quot;:&quot;A few of you who have been around here for a while might recall I mentioned I was working on a book on AI agents. I&#8217;m excited to share that Designing Multi-Agent Systems: Principles, Patterns, and Implementation for AI Agents is now available in both print and digital editio&#8230;&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Two Years, 15 Chapters: The Multi-Agent Systems Book Is Finally Here!&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:85692678,&quot;name&quot;:&quot;Victor Dibia, PhD&quot;,&quot;bio&quot;:&quot;Software Engineer, Researcher (Microsoft), Best Selling Author working on and writing about Generative AI, Agents. Core maintainer for the AutoGen Framework (50k Stars on GitHub). Previously at Cloudera, IBM Research. All views are my own.&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/27536f63-7d8e-48dc-b34c-150acfacdc8b_1726x1396.jpeg&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-11-14T23:53:39.449Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!5Eor!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdaf3907a-3356-488e-80b1-7e35411c9ee6_2458x1564.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://newsletter.victordibia.com/p/the-designing-multi-agent-systems&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:178934727,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:17,&quot;comment_count&quot;:2,&quot;publication_id&quot;:1253044,&quot;publication_name&quot;:&quot;Designing with AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1FgP!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fff160652-8bbb-475e-80a7-ba4eb5f80dcb_504x504.png&quot;,&quot;belowTheFold&quot;:false,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://newsletter.victordibia.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Designing with AI is a reader-supported publication. To receive new posts and support my work, consider becoming a free or paid subscriber.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div><div><hr></div><h2>What is an Agent?</h2><p>An <strong>agent</strong> is an entity that can <strong>reason</strong>, <strong>act</strong>, <strong>communicate</strong>, and <strong>adapt</strong> to solve problems.</p><p>Consider two questions you might ask a generative AI model like GPT5 from OpenAI or Claude from Anthropic:</p><ol><li><p><strong>&#8220;What is the capital of France?&#8221;</strong>  </p></li><li><p><strong>&#8220;What is the stock price of NVIDIA today?&#8221;</strong> </p></li></ol><p>The first question can be answered by a model directly (it likely has seen instances of this specific fact/knowledge and this is now encoded in its model weights). The second cannot - the model will hallucinate a plausible-sounding but incorrect answer because <strong>it doesn&#8217;t have access to real-time data</strong>.</p><p>An <strong>agent</strong> setup solves this by recognizing it needs current data, calling a financial API, and returning the actual price. This requires <strong>action</strong>, not just text generation.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!-DiB!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9a91564-e917-409b-92ca-144f1a6ca5b1_1536x900.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!-DiB!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9a91564-e917-409b-92ca-144f1a6ca5b1_1536x900.png 424w, https://substackcdn.com/image/fetch/$s_!-DiB!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9a91564-e917-409b-92ca-144f1a6ca5b1_1536x900.png 848w, https://substackcdn.com/image/fetch/$s_!-DiB!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9a91564-e917-409b-92ca-144f1a6ca5b1_1536x900.png 1272w, https://substackcdn.com/image/fetch/$s_!-DiB!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9a91564-e917-409b-92ca-144f1a6ca5b1_1536x900.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!-DiB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9a91564-e917-409b-92ca-144f1a6ca5b1_1536x900.png" width="1456" height="853" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a9a91564-e917-409b-92ca-144f1a6ca5b1_1536x900.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:853,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:97187,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/181268201?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9a91564-e917-409b-92ca-144f1a6ca5b1_1536x900.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!-DiB!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9a91564-e917-409b-92ca-144f1a6ca5b1_1536x900.png 424w, https://substackcdn.com/image/fetch/$s_!-DiB!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9a91564-e917-409b-92ca-144f1a6ca5b1_1536x900.png 848w, https://substackcdn.com/image/fetch/$s_!-DiB!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9a91564-e917-409b-92ca-144f1a6ca5b1_1536x900.png 1272w, https://substackcdn.com/image/fetch/$s_!-DiB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa9a91564-e917-409b-92ca-144f1a6ca5b1_1536x900.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p></p><h2>Agent Components</h2><p>In simple terms, an agent has three core components:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!L0RT!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c66190d-1e75-49a9-be45-a38de3a1599f_1592x902.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!L0RT!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c66190d-1e75-49a9-be45-a38de3a1599f_1592x902.png 424w, https://substackcdn.com/image/fetch/$s_!L0RT!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c66190d-1e75-49a9-be45-a38de3a1599f_1592x902.png 848w, https://substackcdn.com/image/fetch/$s_!L0RT!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c66190d-1e75-49a9-be45-a38de3a1599f_1592x902.png 1272w, https://substackcdn.com/image/fetch/$s_!L0RT!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c66190d-1e75-49a9-be45-a38de3a1599f_1592x902.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!L0RT!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c66190d-1e75-49a9-be45-a38de3a1599f_1592x902.png" width="1456" height="825" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2c66190d-1e75-49a9-be45-a38de3a1599f_1592x902.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:825,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:55426,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/181268201?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c66190d-1e75-49a9-be45-a38de3a1599f_1592x902.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!L0RT!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c66190d-1e75-49a9-be45-a38de3a1599f_1592x902.png 424w, https://substackcdn.com/image/fetch/$s_!L0RT!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c66190d-1e75-49a9-be45-a38de3a1599f_1592x902.png 848w, https://substackcdn.com/image/fetch/$s_!L0RT!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c66190d-1e75-49a9-be45-a38de3a1599f_1592x902.png 1272w, https://substackcdn.com/image/fetch/$s_!L0RT!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c66190d-1e75-49a9-be45-a38de3a1599f_1592x902.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p> </p><ul><li><p><strong>Model</strong>: The reasoning engine (typically an LLM like GPT-5) that processes context and decides what to do</p></li><li><p><strong>Tools</strong>: Functions the agent can call to take action - APIs, databases, code execution, web search</p></li><li><p><strong>Memory</strong>: Short-term (conversation history) and long-term (persistent storage across sessions)</p></li></ul><h2>Calling an LLM</h2><p>Before building an agent, you need to understand how to call a generative AI language model. Here&#8217;s the basic pattern using the OpenAI API:</p><pre><code><code>from openai import AsyncOpenAI

client = AsyncOpenAI(api_key=&#8221;your-api-key&#8221;)

response = await client.chat.completions.create(
    model=&#8221;gpt-5&#8221;,
    messages=[
        {&#8221;role&#8221;: &#8220;system&#8221;, &#8220;content&#8221;: &#8220;You are a helpful assistant.&#8221;},
        {&#8221;role&#8221;: &#8220;user&#8221;, &#8220;content&#8221;: &#8220;What is 2 + 2?&#8221;}
    ]
)

print(response.choices[0].message.content)
# Output: &#8220;4&#8221;
</code></code></pre><p>The API takes a list of messages (system instructions, user input, previous assistant responses) and returns a completion. This is a single request-response cycle.</p><p>To enable the use of tools (tool calling), you also pass tool definitions:</p><pre><code><code>response = await client.chat.completions.create(
    model=&#8221;gpt-5&#8221;,
    messages=messages,
    tools=[{
        &#8220;type&#8221;: &#8220;function&#8221;,
        &#8220;function&#8221;: {
            &#8220;name&#8221;: &#8220;get_stock_price&#8221;,
            &#8220;description&#8221;: &#8220;Get current stock price for a symbol&#8221;,
            &#8220;parameters&#8221;: {
                &#8220;type&#8221;: &#8220;object&#8221;,
                &#8220;properties&#8221;: {
                    &#8220;symbol&#8221;: {&#8221;type&#8221;: &#8220;string&#8221;, &#8220;description&#8221;: &#8220;Stock symbol like NVDA&#8221;}
                },
                &#8220;required&#8221;: [&#8221;symbol&#8221;]
            }
        }
    }]
)
</code></code></pre><p>When the model decides it needs to use a tool, instead of returning text content, it returns a <code>tool_calls</code> array with the function name and arguments.</p><h2>The Agent Execution Loop</h2><p>Here&#8217;s the core pattern that every agent follows:</p><pre><code><code>1. Prepare Context  &#8594;  Combine task + instructions + memory + history
2. Call Model       &#8594;  Send context to LLM, get response
3. Handle Response  &#8594;  If text, we&#8217;re done. If tool calls, execute them.
4. Iterate          &#8594;  Add tool results to context, go back to step 2
5. Return           &#8594;  Final response ready
</code></code></pre><p>In code:</p><pre><code><code>async def run(task: str):
    # 1. Prepare context
    messages = [
        {&#8221;role&#8221;: &#8220;system&#8221;, &#8220;content&#8221;: self.instructions},
        {&#8221;role&#8221;: &#8220;user&#8221;, &#8220;content&#8221;: task}
    ]

    while True:
        # 2. Call model
        response = await self.client.chat.completions.create(
            model=&#8221;gpt-5&#8221;,
            messages=messages,
            tools=self.tool_schemas
        )

        assistant_message = response.choices[0].message
        messages.append(assistant_message)

        # 3. Handle response
        if not assistant_message.tool_calls:
            # No tool calls - we&#8217;re done
            return assistant_message.content

        # 4. Execute tools and iterate
        for tool_call in assistant_message.tool_calls:
            result = await self.execute_tool(
                tool_call.function.name,
                json.loads(tool_call.function.arguments)
            )
            messages.append({
                &#8220;role&#8221;: &#8220;tool&#8221;,
                &#8220;tool_call_id&#8221;: tool_call.id,
                &#8220;content&#8221;: result
            })

        # Loop continues - model will process tool results
</code></code></pre><p>The key insight: an agent takes <strong>multiple steps</strong> (model call &#8594; tool execution &#8594; model call) within a single run. The loop continues until the model returns a text response instead of tool calls. In some cases we might need additional logic to guide program control flow (e.g., termination conditions such as a maximum number of turns) to avoid edge cases like infinite loops with cost implications.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!zqBA!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c965fa2-30d3-44bb-92dc-0365c023c765_1774x1058.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!zqBA!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c965fa2-30d3-44bb-92dc-0365c023c765_1774x1058.png 424w, https://substackcdn.com/image/fetch/$s_!zqBA!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c965fa2-30d3-44bb-92dc-0365c023c765_1774x1058.png 848w, https://substackcdn.com/image/fetch/$s_!zqBA!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c965fa2-30d3-44bb-92dc-0365c023c765_1774x1058.png 1272w, https://substackcdn.com/image/fetch/$s_!zqBA!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c965fa2-30d3-44bb-92dc-0365c023c765_1774x1058.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!zqBA!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c965fa2-30d3-44bb-92dc-0365c023c765_1774x1058.png" width="1456" height="868" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3c965fa2-30d3-44bb-92dc-0365c023c765_1774x1058.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:868,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:90866,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/181268201?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c965fa2-30d3-44bb-92dc-0365c023c765_1774x1058.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!zqBA!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c965fa2-30d3-44bb-92dc-0365c023c765_1774x1058.png 424w, https://substackcdn.com/image/fetch/$s_!zqBA!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c965fa2-30d3-44bb-92dc-0365c023c765_1774x1058.png 848w, https://substackcdn.com/image/fetch/$s_!zqBA!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c965fa2-30d3-44bb-92dc-0365c023c765_1774x1058.png 1272w, https://substackcdn.com/image/fetch/$s_!zqBA!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3c965fa2-30d3-44bb-92dc-0365c023c765_1774x1058.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">The basic Agent Action-Perception Loop. Learn more in the book  <a href="https://buy.multiagentbook.com/">buy.multiagentbook.com</a></figcaption></figure></div><h2>Tool Execution</h2><p>When the model returns a tool call, you need to actually execute it:</p><pre><code><code>async def execute_tool(self, name: str, arguments: dict) -&gt; str:
    tool = self.tools[name]
    try:
        result = await tool(**arguments)
        return str(result)
    except Exception as e:
        return f&#8221;Error: {e}&#8221;
</code></code></pre><p>The result gets added back to the message history as a <code>tool</code> message, and the loop continues. The model sees what the tool returned and can either call more tools or generate a final response.</p><h2>A Complete Example</h2><p>Putting it together:</p><pre><code><code>class Agent:
    def __init__(self, instructions: str, tools: list):
        self.client = AsyncOpenAI()
        self.instructions = instructions
        self.tools = {t.__name__: t for t in tools}
        self.tool_schemas = [self._make_schema(t) for t in tools]

    async def run(self, task: str) -&gt; str:
        messages = [
            {&#8221;role&#8221;: &#8220;system&#8221;, &#8220;content&#8221;: self.instructions},
            {&#8221;role&#8221;: &#8220;user&#8221;, &#8220;content&#8221;: task}
        ]

        while True:
            response = await self.client.chat.completions.create(
                model=&#8221;gpt-5&#8221;,
                messages=messages,
                tools=self.tool_schemas
            )

            msg = response.choices[0].message
            messages.append(msg)

            if not msg.tool_calls:
                return msg.content

            for tc in msg.tool_calls:
                result = await self.execute_tool(
                    tc.function.name,
                    json.loads(tc.function.arguments)
                )
                messages.append({
                    &#8220;role&#8221;: &#8220;tool&#8221;,
                    &#8220;tool_call_id&#8221;: tc.id,
                    &#8220;content&#8221;: result
                })

# Usage
async def get_stock_price(symbol: str) -&gt; str:
    # In reality, call an API here
    return f&#8221;{symbol}: $142.50&#8221;

agent = Agent(
    instructions=&#8221;You help users get stock information.&#8221;,
    tools=[get_stock_price]
)

result = await agent.run(&#8221;What&#8217;s NVIDIA trading at?&#8221;)
print(result)
# &#8220;NVIDIA (NVDA) is currently trading at $142.50.&#8221;
</code></code></pre><p>The agent:</p><ol><li><p>Receives &#8220;What&#8217;s NVIDIA trading at?&#8221;</p></li><li><p>Calls the model, which decides to use <code>get_stock_price</code></p></li><li><p>Executes <code>get_stock_price(&#8221;NVDA&#8221;)</code> &#8594; returns &#8220;$142.50&#8221;</p></li><li><p>Adds the result to messages, calls the model again</p></li><li><p>Model generates a natural language response incorporating the data</p></li></ol><h2>Same Pattern, Different Frameworks</h2><p>The execution loop we built is the same pattern used by production agent frameworks. The syntax differs, but the core architecture is identical: define tools, create an agent with instructions, run it on a task. The code snippets below show how each of these ideas are implemented in frameworks like <a href="https://github.com/microsoft/agent-framework">Microsoft Agent Framework</a>, <a href="https://github.com/google/adk-python">Google ADK</a>, and <a href="https://github.com/langchain-ai/langgraph">LangGraph</a>.</p><blockquote><p><strong>Further Reading</strong>: The GitHub repo for the book shows how the same concepts (agents, workflows, orchestrators) are implemented across each of these frameworks: <a href="https://github.com/victordibia/designing-multiagent-systems/tree/main/examples/frameworks">github.com/victordibia/designing-multiagent-systems/examples/frameworks</a></p></blockquote><p><strong>Microsoft Agent Framework:</strong></p><pre><code><code>from agent_framework import ai_function
from agent_framework.azure import AzureOpenAIChatClient

@ai_function
def get_weather(location: str) -&gt; str:
    &#8220;&#8221;&#8220;Get current weather for a given location.&#8221;&#8220;&#8221;
    return f&#8221;The weather in {location} is sunny, 75&#176;F&#8221;

client = AzureOpenAIChatClient(deployment_name=&#8221;gpt-4.1-mini&#8221;)
agent = client.create_agent(
    name=&#8221;assistant&#8221;,
    instructions=&#8221;You are a helpful assistant.&#8221;,
    tools=[get_weather],
)

result = await agent.run(&#8221;What&#8217;s the weather in Paris?&#8221;)
</code></code></pre><p><strong>Google ADK:</strong></p><pre><code><code>from google.adk import Agent

def get_weather(location: str) -&gt; str:
    &#8220;&#8221;&#8220;Get current weather for a given location.&#8221;&#8220;&#8221;
    return f&#8221;The weather in {location} is sunny, 75&#176;F&#8221;

agent = Agent(
    name=&#8221;assistant&#8221;,
    model=&#8221;gemini-flash-latest&#8221;,
    instruction=&#8221;You are a helpful assistant.&#8221;,
    tools=[get_weather],
)

# Run via InMemoryRunner
</code></code></pre><p><strong>LangGraph:</strong></p><pre><code><code>from langchain_core.tools import tool
from langgraph.prebuilt import create_react_agent

@tool
def get_weather(location: str) -&gt; str:
    &#8220;&#8221;&#8220;Get current weather for a given location.&#8221;&#8220;&#8221;
    return f&#8221;The weather in {location} is sunny, 75&#176;F&#8221;

agent = create_react_agent(
    model=llm,
    tools=[get_weather],
)

result = agent.invoke({&#8221;messages&#8221;: [(&#8221;user&#8221;, &#8220;What&#8217;s the weather in Paris?&#8221;)]})
</code></code></pre><p>All three frameworks: define a function, wrap it as a tool, pass it to an agent, call run. The execution loop underneath handles the model calls, tool execution, and iteration.</p><h2>What&#8217;s Missing</h2><p>This basic loop works, but production agents need more:</p><ul><li><p><strong>Streaming</strong>: Long tasks need progress updates, not just a final response</p></li><li><p><strong>Memory</strong>: Persisting context across sessions</p></li><li><p><strong>Middleware</strong>: Logging, rate limiting, safety checks</p></li><li><p><strong>Error handling</strong>: Retries, graceful degradation</p></li><li><p><strong>Context management</strong>: Summarizing/compacting as context grows</p></li><li><p><strong>Orchestrating multiple agents</strong>: Deterministic workflows and autonomous orchestration patterns (handoff, magentic one etc)</p></li><li><p><strong>End-user interfaces</strong>: Integrating agents into web applications </p></li><li><p><strong>Complete Use Cases: </strong>Building a full coding agent with file system access, code execution, and iterative debugging </p></li></ul><p>These are covered in depth in my book <a href="https://buy.multiagentbook.com/">Designing Multi-Agent Systems</a>, which builds a complete agent framework (picoagents) from scratch with all of these features and two complete use cases.</p><p></p>]]></content:encoded></item><item><title><![CDATA[Manning Cancelled My Book — Here's What Happened Next]]></title><description><![CDATA[Issue #51 | Sometimes the arrangement is the problem]]></description><link>https://newsletter.victordibia.com/p/manning-cancelled-my-book-heres-what</link><guid isPermaLink="false">https://newsletter.victordibia.com/p/manning-cancelled-my-book-heres-what</guid><dc:creator><![CDATA[Victor Dibia, PhD]]></dc:creator><pubDate>Mon, 08 Dec 2025 15:30:29 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!Y51P!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56c0ceef-9ef2-4136-bdb3-3170cabc50c6_2458x1564.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<blockquote><p>&#8220;Hey Victor, we are cancelling your book, you will get an email.&#8221;</p></blockquote><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Y51P!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56c0ceef-9ef2-4136-bdb3-3170cabc50c6_2458x1564.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Y51P!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56c0ceef-9ef2-4136-bdb3-3170cabc50c6_2458x1564.png 424w, https://substackcdn.com/image/fetch/$s_!Y51P!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56c0ceef-9ef2-4136-bdb3-3170cabc50c6_2458x1564.png 848w, https://substackcdn.com/image/fetch/$s_!Y51P!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56c0ceef-9ef2-4136-bdb3-3170cabc50c6_2458x1564.png 1272w, https://substackcdn.com/image/fetch/$s_!Y51P!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56c0ceef-9ef2-4136-bdb3-3170cabc50c6_2458x1564.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Y51P!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56c0ceef-9ef2-4136-bdb3-3170cabc50c6_2458x1564.png" width="1456" height="926" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/56c0ceef-9ef2-4136-bdb3-3170cabc50c6_2458x1564.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:926,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:3922979,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/171608968?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56c0ceef-9ef2-4136-bdb3-3170cabc50c6_2458x1564.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Y51P!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56c0ceef-9ef2-4136-bdb3-3170cabc50c6_2458x1564.png 424w, https://substackcdn.com/image/fetch/$s_!Y51P!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56c0ceef-9ef2-4136-bdb3-3170cabc50c6_2458x1564.png 848w, https://substackcdn.com/image/fetch/$s_!Y51P!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56c0ceef-9ef2-4136-bdb3-3170cabc50c6_2458x1564.png 1272w, https://substackcdn.com/image/fetch/$s_!Y51P!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56c0ceef-9ef2-4136-bdb3-3170cabc50c6_2458x1564.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p></p><p>Emails like this are never easy to deal with. If you&#8217;ve followed some of my work, you&#8217;d know that I had started writing a book on multi-agent systems. I started working with Manning Publications in early 2024, and in July of that year I announced that the book was in early access through their MEAP (Manning Early Access Program).</p><p>I had a book cover. A discount code. Marketing plans. All the good stuff.</p><p>Fast forward to July 2025 &#8212; I got the email above. <em><strong>The book got cancelled</strong></em>.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!mEXk!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbbd5880-727a-43cf-8185-b3caa8d90019_1354x949.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!mEXk!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbbd5880-727a-43cf-8185-b3caa8d90019_1354x949.png 424w, https://substackcdn.com/image/fetch/$s_!mEXk!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbbd5880-727a-43cf-8185-b3caa8d90019_1354x949.png 848w, https://substackcdn.com/image/fetch/$s_!mEXk!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbbd5880-727a-43cf-8185-b3caa8d90019_1354x949.png 1272w, https://substackcdn.com/image/fetch/$s_!mEXk!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbbd5880-727a-43cf-8185-b3caa8d90019_1354x949.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!mEXk!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbbd5880-727a-43cf-8185-b3caa8d90019_1354x949.png" width="1354" height="949" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/dbbd5880-727a-43cf-8185-b3caa8d90019_1354x949.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:949,&quot;width&quot;:1354,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:300836,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/171608968?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbbd5880-727a-43cf-8185-b3caa8d90019_1354x949.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!mEXk!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbbd5880-727a-43cf-8185-b3caa8d90019_1354x949.png 424w, https://substackcdn.com/image/fetch/$s_!mEXk!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbbd5880-727a-43cf-8185-b3caa8d90019_1354x949.png 848w, https://substackcdn.com/image/fetch/$s_!mEXk!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbbd5880-727a-43cf-8185-b3caa8d90019_1354x949.png 1272w, https://substackcdn.com/image/fetch/$s_!mEXk!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdbbd5880-727a-43cf-8185-b3caa8d90019_1354x949.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p></p><p>Four months later, <a href="https://www.amazon.com/dp/B0G2BCQQJY">the book is now published</a> &#8212; a #1 New Release on Amazon, #5 Best Seller in Generative AI as at time of writing this. The book went from 4 uploaded chapters (at time of cancellation) to complete - 15 chapters, 56,000+ words, 186 code snippets, and 51 hand-drawn figures.</p><ul><li><p>Digital edition : <a href="https://buy.multiagentbook.com/">https://buy.multiagentbook.com/</a></p></li><li><p>Print edition on Amazon: <a href="https://www.amazon.com/dp/B0G2BCQQJY">https://www.amazon.com/dp/B0G2BCQQJY</a></p></li></ul><p>Here&#8217;s what happened.</p><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://newsletter.victordibia.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Designing with AI is a reader-supported publication. To receive new posts and support my work, consider becoming a free or paid subscriber.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div><div><hr></div><h2>How Does A Book Get Cancelled?</h2><p>The short story is that .. <em>life happened</em>. And also, the process wasn&#8217;t working for me. Here&#8217;s the timeline:</p><ul><li><p><strong>January 2024</strong>: Contract signed with Manning.</p></li><li><p><strong>May 2024</strong>: I <a href="https://victordibia.com/blog/wrist-fracture/">fractured my wrist</a>. A bad fall, surgery, titanium plates with screws, recovery. This is where things slowed down. Beyond the physical recovery, for a researcher or software engineer, there&#8217;s the worry about how an injury might affect your career. Luckily, I&#8217;ve made a good recovery.</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!lY8C!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F17681345-bd1b-4a4d-973b-baa41d70283e_850x548.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!lY8C!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F17681345-bd1b-4a4d-973b-baa41d70283e_850x548.png 424w, https://substackcdn.com/image/fetch/$s_!lY8C!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F17681345-bd1b-4a4d-973b-baa41d70283e_850x548.png 848w, https://substackcdn.com/image/fetch/$s_!lY8C!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F17681345-bd1b-4a4d-973b-baa41d70283e_850x548.png 1272w, https://substackcdn.com/image/fetch/$s_!lY8C!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F17681345-bd1b-4a4d-973b-baa41d70283e_850x548.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!lY8C!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F17681345-bd1b-4a4d-973b-baa41d70283e_850x548.png" width="850" height="548" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/17681345-bd1b-4a4d-973b-baa41d70283e_850x548.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:548,&quot;width&quot;:850,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;Fractured my wrist. Wore a splint for a while.&quot;,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="Fractured my wrist. Wore a splint for a while." title="Fractured my wrist. Wore a splint for a while." srcset="https://substackcdn.com/image/fetch/$s_!lY8C!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F17681345-bd1b-4a4d-973b-baa41d70283e_850x548.png 424w, https://substackcdn.com/image/fetch/$s_!lY8C!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F17681345-bd1b-4a4d-973b-baa41d70283e_850x548.png 848w, https://substackcdn.com/image/fetch/$s_!lY8C!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F17681345-bd1b-4a4d-973b-baa41d70283e_850x548.png 1272w, https://substackcdn.com/image/fetch/$s_!lY8C!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F17681345-bd1b-4a4d-973b-baa41d70283e_850x548.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><ul><li><p><strong>July 2024</strong>: MEAP (early access) launched with the first two chapters.</p></li><li><p><strong>Sept 2024 &#8211; Jan 2025</strong>: The AutoGen rewrite. This was an especially busy time at work &#8212; AutoGen was rewritten with a completely new API (<a href="https://devblogs.microsoft.com/autogen/autogen-reimagined-launching-autogen-0-4/">released Jan 2025</a>). I contributed to the AgentChat API, which eventually became the format that other frameworks (Google ADK, OpenAI Agents SDK) converged on, and I <a href="https://newsletter.victordibia.com/p/autogen-studio-v04-a-no-code-tool">rebuilt AutoGen Studio on this new version</a>. This created a problem for the book: code in chapter 1 was already obsolete before I finished chapter 2. I realized that writing a book tightly coupled to a specific framework version was a trap. I originally had a co-author, but by the time of cancellation, I was writing alone.</p></li></ul><blockquote><p>For new readers, I am a core maintainer of AutoGen and have written extensively about it and agents in general. </p></blockquote><ul><li><p><strong>July 2025</strong>: Cancelled. I had written 4 chapters and uploaded them to the Manning platform. But that pace was understandably slow for the publisher. They chose to cancel.</p></li></ul><p>Throughout this period, I was writing &#8212; but the upload and review cycle wasn&#8217;t working for me. Every change to the table of contents had to be approved. The Manning internal review process brought feedback that often felt disconnected from improving the book. Weekly meetings with the editor ate into writing time. I spent energy conforming to the process when I <em>should</em> have been creating.</p><p>It took the cancellation to isolate this observation: the lack of creative control was itself a blocker. For me.</p><div><hr></div><h2>Getting Back on Track</h2><p>The cancellation forced me to rethink what was going wrong. My wife pointed out something I probably knew was true &#8212; she&#8217;d seen me hammer out book-quality writing in the form of blog posts late at night during this same period (see this <a href="https://newsletter.victordibia.com/">substack</a>). Writing wasn&#8217;t the problem. The process was.</p><p>Luckily, it was a clean break. I hadn&#8217;t collected my advance, so all rights for   written material returned to me.</p><p>I decided to focus on why I had started writing in the first place:</p><ul><li><p>There are many users new to AI who need structured guidance, not just material scattered across papers, blog posts etc.</p></li><li><p>Long-form content and a guided teaching experience is still critical &#8212; even though deep research agents can find you whatever you want, it won&#8217;t be in a coherent, graduated progression</p></li><li><p>There is lots of noise &#8212; new stuff every day &#8212; but there are principles and patterns that will endure</p></li><li><p>There is value in understanding the nuts and bolts of agents and how to <em>build them from scratch</em></p></li></ul><p><strong>I renamed the book</strong> &#8212; &#8220;Designing Multi-Agent Systems&#8221; instead of &#8220;Multi-Agent Systems with AutoGen.&#8221;</p><p><strong>I revamped the direction.</strong> The pivot to building from scratch turned out to be the strongest choice I could have made. The book would have been problematic if it stayed tied to AutoGen &#8212; changing the title would have been impossible (or very challenging) with Manning, and the content would age poorly. Instead, the new approach lets readers take ownership: if concepts change (as they inevitably will), readers are equipped to adapt because they understand the underlying principles.</p><p>The time I would have spent fighting with Manning&#8217;s tools and review process went instead toward the reader experience: crafting a coherent learning progression, building <a href="https://pypi.org/project/picoagents/">PicoAgents</a> from scratch (it&#8217;s a real library on PyPI), creating a <a href="https://github.com/victordibia/designing-multiagent-systems">one-click Codespaces testing environment</a>, and building an extensible platform where readers get the digital book plus optional end-to-end samples that will grow over time.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Iz8C!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6a437120-ce27-4c49-b1de-28a85162ca8f_2576x1782.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Iz8C!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6a437120-ce27-4c49-b1de-28a85162ca8f_2576x1782.png 424w, https://substackcdn.com/image/fetch/$s_!Iz8C!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6a437120-ce27-4c49-b1de-28a85162ca8f_2576x1782.png 848w, https://substackcdn.com/image/fetch/$s_!Iz8C!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6a437120-ce27-4c49-b1de-28a85162ca8f_2576x1782.png 1272w, https://substackcdn.com/image/fetch/$s_!Iz8C!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6a437120-ce27-4c49-b1de-28a85162ca8f_2576x1782.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Iz8C!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6a437120-ce27-4c49-b1de-28a85162ca8f_2576x1782.png" width="1456" height="1007" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/6a437120-ce27-4c49-b1de-28a85162ca8f_2576x1782.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1007,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:978080,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/171608968?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6a437120-ce27-4c49-b1de-28a85162ca8f_2576x1782.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Iz8C!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6a437120-ce27-4c49-b1de-28a85162ca8f_2576x1782.png 424w, https://substackcdn.com/image/fetch/$s_!Iz8C!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6a437120-ce27-4c49-b1de-28a85162ca8f_2576x1782.png 848w, https://substackcdn.com/image/fetch/$s_!Iz8C!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6a437120-ce27-4c49-b1de-28a85162ca8f_2576x1782.png 1272w, https://substackcdn.com/image/fetch/$s_!Iz8C!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6a437120-ce27-4c49-b1de-28a85162ca8f_2576x1782.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">The book distribution platform <a href="https://buy.multiagentbook.com/">buy.multiagentbook.com</a> </figcaption></figure></div><p><strong>I also built a self-publish pipeline.</strong> I built <a href="https://buy.multiagentbook.com/">buy.multiagentbook.com</a> from scratch &#8212; customer dashboard, payment processing, download management for the digital version of the book. (I plan to write about the technical details in a Part 2 post!). Print is managed by a separate entity - Amazon&#8217;s KDP service - and the sheer amount of what I had to learn to get all this done - type setting, bleeds, font and readability effects, cover design to meet specs - all a different story for another day!</p><p>Self-publishing gave me things I didn&#8217;t realize I was missing:</p><ul><li><p><strong>Full creative control.</strong> I could add the Responsible AI chapter without arguing for it. I could structure the book my way.</p></li><li><p><strong>Tools I was excited about.</strong> Quarto instead of AsciiDoc. Hot reloading. Multi-format output from a single source.</p></li><li><p><strong>Control over updates.</strong> With Manning, any update could take weeks. With my platform, it takes 5 minutes. Fun fact - I wrote big chunks of the book and released them whilst waiting at a cafe for my 6yr old to finish gym class.</p></li><li><p><strong>Control over distribution.</strong> Technical books can be like cars &#8212; 30% outdated the moment they&#8217;re in print. The only chance I had to fight this was a framework-agnostic rewrite, a living distribution method, and building a direct connection with readers.</p></li><li><p><strong>Higher royalty share.</strong> Self-publishing means keeping a larger percentage of each sale, which helped offset the effort of building my own platform. Most authors of technical books will (correctly) tell you the proceeds rarely justify the hours. But if you&#8217;re going to do it anyway, know that traditional publisher royalties are typically 10-15%, while self-publishing through your own platform (Stripe, etc.) can be 85-95%, and print-on-demand (KDP, IngramSpark) falls somewhere in between at 35-60%.</p></li></ul><div><hr></div><h2>The Result</h2><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!DJcq!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F388ea281-bbf1-4a86-8a30-314dd825d74c_2416x1752.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!DJcq!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F388ea281-bbf1-4a86-8a30-314dd825d74c_2416x1752.png 424w, https://substackcdn.com/image/fetch/$s_!DJcq!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F388ea281-bbf1-4a86-8a30-314dd825d74c_2416x1752.png 848w, https://substackcdn.com/image/fetch/$s_!DJcq!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F388ea281-bbf1-4a86-8a30-314dd825d74c_2416x1752.png 1272w, https://substackcdn.com/image/fetch/$s_!DJcq!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F388ea281-bbf1-4a86-8a30-314dd825d74c_2416x1752.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!DJcq!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F388ea281-bbf1-4a86-8a30-314dd825d74c_2416x1752.png" width="1456" height="1056" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/388ea281-bbf1-4a86-8a30-314dd825d74c_2416x1752.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1056,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1032227,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/171608968?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F388ea281-bbf1-4a86-8a30-314dd825d74c_2416x1752.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!DJcq!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F388ea281-bbf1-4a86-8a30-314dd825d74c_2416x1752.png 424w, https://substackcdn.com/image/fetch/$s_!DJcq!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F388ea281-bbf1-4a86-8a30-314dd825d74c_2416x1752.png 848w, https://substackcdn.com/image/fetch/$s_!DJcq!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F388ea281-bbf1-4a86-8a30-314dd825d74c_2416x1752.png 1272w, https://substackcdn.com/image/fetch/$s_!DJcq!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F388ea281-bbf1-4a86-8a30-314dd825d74c_2416x1752.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">4 weeks post release, Designing Multi-agent Systems is the #1 New Release on Amazon for Generative AI books.</figcaption></figure></div><p>Between July and November 2025, I put it all together. I had done large chunks of the work (some sections already written on this substack actually), just in bits and pieces - eval code in many places, workflows, sample apps. I used the time after the cancellation to consolidate and finish.</p><p>The book released in <a href="https://www.amazon.com/dp/B0G2BCQQJY">November 2025</a>:</p><ul><li><p><strong>15 chapters</strong></p></li><li><p><strong>56,000+ words</strong></p></li><li><p><strong>186 code snippets</strong></p></li><li><p><strong>51 figures and diagrams</strong> (all hand-drawn by me)</p></li><li><p><strong>~400 pages</strong></p></li></ul><p>It hit <a href="https://www.amazon.com/dp/B0G2BCQQJY">#1 New Release in the Generative AI</a> category on Amazon and stayed there for 4 weeks. 100s of copies sold (thank you!) </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!YjoU!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea66b86e-237b-4865-9e67-b4052c5d264e_2564x1900.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!YjoU!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea66b86e-237b-4865-9e67-b4052c5d264e_2564x1900.png 424w, https://substackcdn.com/image/fetch/$s_!YjoU!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea66b86e-237b-4865-9e67-b4052c5d264e_2564x1900.png 848w, https://substackcdn.com/image/fetch/$s_!YjoU!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea66b86e-237b-4865-9e67-b4052c5d264e_2564x1900.png 1272w, https://substackcdn.com/image/fetch/$s_!YjoU!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea66b86e-237b-4865-9e67-b4052c5d264e_2564x1900.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!YjoU!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea66b86e-237b-4865-9e67-b4052c5d264e_2564x1900.png" width="1456" height="1079" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/ea66b86e-237b-4865-9e67-b4052c5d264e_2564x1900.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1079,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:622824,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/171608968?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea66b86e-237b-4865-9e67-b4052c5d264e_2564x1900.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!YjoU!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea66b86e-237b-4865-9e67-b4052c5d264e_2564x1900.png 424w, https://substackcdn.com/image/fetch/$s_!YjoU!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea66b86e-237b-4865-9e67-b4052c5d264e_2564x1900.png 848w, https://substackcdn.com/image/fetch/$s_!YjoU!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea66b86e-237b-4865-9e67-b4052c5d264e_2564x1900.png 1272w, https://substackcdn.com/image/fetch/$s_!YjoU!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fea66b86e-237b-4865-9e67-b4052c5d264e_2564x1900.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">The book has been well received, thanks to all who have taken time to leave feedback!</figcaption></figure></div><div><hr></div><h2>What I&#8217;d Tell Myself If I Started Over</h2><ul><li><p><strong>Know your working style.</strong> Traditional publishing works well for many authors &#8212; Manning has published excellent books. But if you need creative control and fast iteration, the friction will slow you down more than you expect.</p></li><li><p><strong>Publishers optimize for process, not necessarily your vision.</strong> Approvals, reviews, and meetings exist for good reasons &#8212; but editorial feedback isn&#8217;t always from subject matter experts. I found myself spending energy debating suggestions that didn&#8217;t improve the book. Towards the end, I realized our goals had diverged: they needed to ship books on schedule; I wanted to craft a learning experience I was proud of.</p></li><li><p><strong>Self-publishing is viable &#8212; but only if you build systems.</strong> I spent significant time on infrastructure (payments, hosting, distribution). If you&#8217;re not willing to do that, platforms like Gumroad or Leanpub are reasonable alternatives.</p></li><li><p><strong>The book you finish is better than the book you planned.</strong> The cancellation forced a rewrite that made the book framework-agnostic and more durable. Sometimes constraints are gifts.</p></li></ul><div><hr></div><h2>The Hard Part</h2><p>Where did I find the time? The honest answer: I didn&#8217;t really have it. July through November were long days &#8212; early mornings, late nights, weekends, very little sleep. This has been one of the hardest things I have done (since finishing my PhD). </p><p>I want to thank my wife and son for their incredible support. Their love, hugs, and being there for me made this book as much theirs as it is mine.</p><p>That cancellation email? It gave me the book I was trying to write all along.</p><div><hr></div><p><em>Part 2 of this story: </em>I Built My Book&#8217;s Entire Distribution Platform for $5/Month <em>&#8212; the technical details of building buy.multiagentbook.com from scratch. Coming soon!</em></p><p><strong>Links:</strong></p><ul><li><p><a href="https://buy.multiagentbook.com/">Designing Multi-Agent Systems</a> &#8212; the book</p></li><li><p><a href="https://multiagentbook.com/">multiagentbook.com</a> &#8212; marketing site</p></li><li><p><a href="https://www.amazon.com/dp/B0G2BCQQJY">Amazon (print editions)</a> &#8212; paperback and hardcover</p></li></ul><div><hr></div><p>P.S. If you got the book and have comments of feedback, please share them here! <br>Also, if you could leave a review, it goes a long way in making the book more visible! </p><ul><li><p>Goodreads <a href="https://www.goodreads.com/book/show/243940144-designing-multi-agent-systems">https://www.goodreads.com/book/show/243940144-designing-multi-agent-systems</a> </p></li><li><p>Amazon <a href="https://www.goodreads.com/book/show/243940144-designing-multi-agent-systems">https://www.amazon.com/review/create-review/?ie=UTF8&amp;channel=glance-detail&amp;asin=B0G2BCQQJY</a> </p></li></ul><p></p><div><hr></div><p>Interested in learning about translation right? Please send an email to support@multiagentbook.com to learn about copyrights and legal requirements. </p><div><hr></div>]]></content:encoded></item><item><title><![CDATA[Top Books on AI Agents in 2026]]></title><description><![CDATA[Issue #50 | A list of books I'd recommend to folks interested in understanding and building AI and Agents in 2026]]></description><link>https://newsletter.victordibia.com/p/top-books-on-ai-agents-in-2025</link><guid isPermaLink="false">https://newsletter.victordibia.com/p/top-books-on-ai-agents-in-2025</guid><dc:creator><![CDATA[Victor Dibia, PhD]]></dc:creator><pubDate>Tue, 02 Dec 2025 15:30:37 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!zGIw!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac03824-df7a-40f9-8ae2-92befe195d37_2375x1319.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!zGIw!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac03824-df7a-40f9-8ae2-92befe195d37_2375x1319.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!zGIw!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac03824-df7a-40f9-8ae2-92befe195d37_2375x1319.png 424w, https://substackcdn.com/image/fetch/$s_!zGIw!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac03824-df7a-40f9-8ae2-92befe195d37_2375x1319.png 848w, https://substackcdn.com/image/fetch/$s_!zGIw!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac03824-df7a-40f9-8ae2-92befe195d37_2375x1319.png 1272w, https://substackcdn.com/image/fetch/$s_!zGIw!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac03824-df7a-40f9-8ae2-92befe195d37_2375x1319.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!zGIw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac03824-df7a-40f9-8ae2-92befe195d37_2375x1319.png" width="1456" height="809" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/aac03824-df7a-40f9-8ae2-92befe195d37_2375x1319.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:809,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:3540709,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/178986157?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac03824-df7a-40f9-8ae2-92befe195d37_2375x1319.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!zGIw!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac03824-df7a-40f9-8ae2-92befe195d37_2375x1319.png 424w, https://substackcdn.com/image/fetch/$s_!zGIw!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac03824-df7a-40f9-8ae2-92befe195d37_2375x1319.png 848w, https://substackcdn.com/image/fetch/$s_!zGIw!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac03824-df7a-40f9-8ae2-92befe195d37_2375x1319.png 1272w, https://substackcdn.com/image/fetch/$s_!zGIw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faac03824-df7a-40f9-8ae2-92befe195d37_2375x1319.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>I spent some time over the last two years <a href="https://www.amazon.com/dp/B0G2BCQQJY">writing a book on multi-agent systems</a>. Along the way, I read as much as I could find on the topic. </p><p>Get the book here (<a href="https://buy.multiagentbook.com/">Digital version with samples</a> |  <a href="https://www.amazon.com/dp/B0G2BCQQJY">Print version on Amazon</a>)</p><blockquote><p>2025 has been heralded as the <a href="https://www.axios.com/2025/01/23/davos-2025-ai-agents">year of</a> <a href="https://finance.yahoo.com/news/jensen-huang-declares-age-agentic-154517698.html?">agents</a>. Beyond the <em>hype</em>, building applications that integrate agents is becoming a <strong>widely useful skill</strong>. Books that help provide a <em>curated learning path</em> can be valuable in helping you get started.</p></blockquote><p>Naturally, given how early the AI agent approach is, there aren&#8217;t many books <em>specifically about AI agents</em> yet. Most &#8220;AI&#8221; books focus on machine learning fundamentals or LLM prompting. Agent-specific content tends to be scattered across blog posts, framework documentation, and research papers.</p><p>But a few books stand out. I&#8217;ve reviewed them based on four dimensions that matter for learning to build agents:</p><ul><li><p><strong>Agent Concepts</strong>: Tools, memory, orchestration, observability</p></li><li><p><strong>Multi-Agent Patterns</strong>: Coordination, handoffs, team structures</p></li><li><p><strong>Hands-on Implementation</strong>: Runnable code, complete applications</p></li><li><p><strong>UX Principles</strong>: Streaming, human-in-the-loop, interfaces</p></li></ul><blockquote><p><strong>Disclosure</strong>: I wrote one of these books and the selection here is <em>my opinion</em>. I&#8217;ll be upfront about that and let you decide what&#8217;s useful. I also created an interactive page <a href="https://multiagentbook.com/labs/ai-agent-books">here</a> as more books become available.</p></blockquote><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!SDDL!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4c74666f-8838-4c35-91bf-2387e5553016_1818x1047.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!SDDL!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4c74666f-8838-4c35-91bf-2387e5553016_1818x1047.png 424w, https://substackcdn.com/image/fetch/$s_!SDDL!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4c74666f-8838-4c35-91bf-2387e5553016_1818x1047.png 848w, https://substackcdn.com/image/fetch/$s_!SDDL!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4c74666f-8838-4c35-91bf-2387e5553016_1818x1047.png 1272w, https://substackcdn.com/image/fetch/$s_!SDDL!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4c74666f-8838-4c35-91bf-2387e5553016_1818x1047.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!SDDL!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4c74666f-8838-4c35-91bf-2387e5553016_1818x1047.png" width="1456" height="839" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4c74666f-8838-4c35-91bf-2387e5553016_1818x1047.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:839,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:609940,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/178986157?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4c74666f-8838-4c35-91bf-2387e5553016_1818x1047.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!SDDL!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4c74666f-8838-4c35-91bf-2387e5553016_1818x1047.png 424w, https://substackcdn.com/image/fetch/$s_!SDDL!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4c74666f-8838-4c35-91bf-2387e5553016_1818x1047.png 848w, https://substackcdn.com/image/fetch/$s_!SDDL!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4c74666f-8838-4c35-91bf-2387e5553016_1818x1047.png 1272w, https://substackcdn.com/image/fetch/$s_!SDDL!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4c74666f-8838-4c35-91bf-2387e5553016_1818x1047.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Full interactive comparison at multiagentbook.com/labs/ai-agent-books</figcaption></figure></div><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://newsletter.victordibia.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Designing with AI is a reader-supported publication. To receive new posts and support my work, consider becoming a free or paid subscriber.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div><div><hr></div><h2>The Books</h2><h3>1. Designing Multi-Agent Systems</h3><p><strong>Author</strong>: Victor Dibia. <strong>Year</strong>: 2025 <strong> </strong></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!WFC_!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60b9577b-2fb3-46a9-877f-e64b39c5846b_1439x813.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!WFC_!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60b9577b-2fb3-46a9-877f-e64b39c5846b_1439x813.png 424w, https://substackcdn.com/image/fetch/$s_!WFC_!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60b9577b-2fb3-46a9-877f-e64b39c5846b_1439x813.png 848w, https://substackcdn.com/image/fetch/$s_!WFC_!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60b9577b-2fb3-46a9-877f-e64b39c5846b_1439x813.png 1272w, https://substackcdn.com/image/fetch/$s_!WFC_!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60b9577b-2fb3-46a9-877f-e64b39c5846b_1439x813.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!WFC_!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60b9577b-2fb3-46a9-877f-e64b39c5846b_1439x813.png" width="1439" height="813" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/60b9577b-2fb3-46a9-877f-e64b39c5846b_1439x813.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:813,&quot;width&quot;:1439,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:448283,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/178986157?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60b9577b-2fb3-46a9-877f-e64b39c5846b_1439x813.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!WFC_!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60b9577b-2fb3-46a9-877f-e64b39c5846b_1439x813.png 424w, https://substackcdn.com/image/fetch/$s_!WFC_!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60b9577b-2fb3-46a9-877f-e64b39c5846b_1439x813.png 848w, https://substackcdn.com/image/fetch/$s_!WFC_!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60b9577b-2fb3-46a9-877f-e64b39c5846b_1439x813.png 1272w, https://substackcdn.com/image/fetch/$s_!WFC_!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F60b9577b-2fb3-46a9-877f-e64b39c5846b_1439x813.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><blockquote><p>I wrote this book, so I&#8217;ll keep the summary brief and let you judge.</p></blockquote><p><strong>What it covers</strong>: 15 chapters across 4 parts. Part I establishes theory&#8212;6 orchestration patterns (sequential, conditional, parallel, supervisor, handoff, conversation-driven) and 4 UX principles (capability discovery, cost-aware delegation, observability, interruptibility). Part II builds a complete agent library (PicoAgents) from scratch, including computer use agents, deterministic workflows, and autonomous orchestration. Part III covers evaluation with trajectories and LLM judges, 10 failure modes, MCP/A2A protocols, and ethics. Part IV provides two full case studies (business Q&amp;A from unstructured data, software engineering agent). </p><p><strong>Unique angle</strong>: Framework-agnostic. Instead of teaching you LangChain or CrewAI, it teaches the patterns those frameworks implement. You build everything from first principles. For completeness, the book&#8217;s <a href="https://github.com/victordibia/designing-multiagent-systems">GitHub repo</a> also contains examples of the same concepts implemented using the Microsoft Agent Framework (successor to AutoGen and Semantic Kernel), Goole ADK, and LangGraph.</p><p><strong>Best for</strong>: Engineers who want to understand multi-agent systems deeply, not just use a framework.</p><p><strong>Links</strong>: <a href="https://www.amazon.com/dp/B0G2BCQQJY">Amazon</a> | <a href="https://github.com/victordibia/designing-multiagent-systems">GitHub</a> | <a href="https://multiagentbook.com/">multiagentbook.com</a></p><div><hr></div><h3>2. Generative AI Design Patterns</h3><p><strong>Authors</strong>: Valliappa Lakshmanan &amp; Hannes Hapke <strong>Year</strong>: 2025 </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!bW7o!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4e82ef3c-58c5-4c33-b17d-2a37cdb2d1e4_1448x889.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!bW7o!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4e82ef3c-58c5-4c33-b17d-2a37cdb2d1e4_1448x889.png 424w, https://substackcdn.com/image/fetch/$s_!bW7o!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4e82ef3c-58c5-4c33-b17d-2a37cdb2d1e4_1448x889.png 848w, https://substackcdn.com/image/fetch/$s_!bW7o!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4e82ef3c-58c5-4c33-b17d-2a37cdb2d1e4_1448x889.png 1272w, https://substackcdn.com/image/fetch/$s_!bW7o!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4e82ef3c-58c5-4c33-b17d-2a37cdb2d1e4_1448x889.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!bW7o!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4e82ef3c-58c5-4c33-b17d-2a37cdb2d1e4_1448x889.png" width="1448" height="889" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4e82ef3c-58c5-4c33-b17d-2a37cdb2d1e4_1448x889.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:889,&quot;width&quot;:1448,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:436144,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/178986157?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4e82ef3c-58c5-4c33-b17d-2a37cdb2d1e4_1448x889.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!bW7o!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4e82ef3c-58c5-4c33-b17d-2a37cdb2d1e4_1448x889.png 424w, https://substackcdn.com/image/fetch/$s_!bW7o!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4e82ef3c-58c5-4c33-b17d-2a37cdb2d1e4_1448x889.png 848w, https://substackcdn.com/image/fetch/$s_!bW7o!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4e82ef3c-58c5-4c33-b17d-2a37cdb2d1e4_1448x889.png 1272w, https://substackcdn.com/image/fetch/$s_!bW7o!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4e82ef3c-58c5-4c33-b17d-2a37cdb2d1e4_1448x889.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><strong>What it covers</strong>: 32 design patterns for generative AI in a structured problem/solution format. Chapter 7 covers agent-specific patterns (Tool Calling, Code Execution, Multi-agent), but the book&#8217;s real strength is the other 31 patterns: content control, RAG, reasoning (Chain-of-Thought, Tree-of-Thought), reliability, guardrails.</p><p><strong>Why it&#8217;s valuable for agent builders</strong>: Agents are built on top of these patterns. Understanding RAG, structured output, and reliability patterns gives you the building blocks agents use internally. Pattern #23 covers multi-agent collaboration specifically.</p><p><strong>Limitation</strong>: Not fully agent-focused. Also, If you want deep coverage of orchestration patterns or agent UX, look elsewhere. But as a companion to agent-specific resources, it&#8217;s excellent.  </p><p><strong>Best for</strong>: ML engineers who want broad GenAI pattern coverage, with agents as one component.</p><p><strong>Links</strong>: <a href="https://amzn.to/4ixAhvW">Amazon</a> | <a href="https://github.com/lakshmanok/generative-ai-design-patterns">GitHub</a></p><div><hr></div><h3>3. Building Applications with AI Agents</h3><p><strong>Author</strong>: Michael Albada <strong>Year</strong>: 2025 </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!m5zw!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F007a5730-0525-481c-87f9-07050c2fcc6c_1451x729.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!m5zw!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F007a5730-0525-481c-87f9-07050c2fcc6c_1451x729.png 424w, https://substackcdn.com/image/fetch/$s_!m5zw!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F007a5730-0525-481c-87f9-07050c2fcc6c_1451x729.png 848w, https://substackcdn.com/image/fetch/$s_!m5zw!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F007a5730-0525-481c-87f9-07050c2fcc6c_1451x729.png 1272w, https://substackcdn.com/image/fetch/$s_!m5zw!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F007a5730-0525-481c-87f9-07050c2fcc6c_1451x729.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!m5zw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F007a5730-0525-481c-87f9-07050c2fcc6c_1451x729.png" width="1451" height="729" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/007a5730-0525-481c-87f9-07050c2fcc6c_1451x729.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:729,&quot;width&quot;:1451,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:397792,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/178986157?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F007a5730-0525-481c-87f9-07050c2fcc6c_1451x729.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!m5zw!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F007a5730-0525-481c-87f9-07050c2fcc6c_1451x729.png 424w, https://substackcdn.com/image/fetch/$s_!m5zw!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F007a5730-0525-481c-87f9-07050c2fcc6c_1451x729.png 848w, https://substackcdn.com/image/fetch/$s_!m5zw!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F007a5730-0525-481c-87f9-07050c2fcc6c_1451x729.png 1272w, https://substackcdn.com/image/fetch/$s_!m5zw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F007a5730-0525-481c-87f9-07050c2fcc6c_1451x729.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p> </p><p><strong>What it covers</strong>: A solid introductory resource for folks new to AI agents. Notably covers human-agent collaboration and agent interface design (Chapter 3), plus model improvement techniques (finetuning, SFT, DPO, RL) in Chapter 7. Implements a few scenarios across three frameworks: LangGraph, LangChain, and AutoGen.</p><p><strong>Unique angle</strong>: Framework comparison. If you&#8217;re trying to decide between LangGraph, LangChain, or AutoGen, seeing the same scenario implemented in each helps you understand the trade-offs. Includes industry scenarios: ecommerce, financial services, healthcare, IT helpdesk, legal, SOC, supply chain.</p><p><strong>Limitation</strong>: Framework-specific code may become outdated given rapid framework evolution.  </p><p><strong>Best for</strong>: Developers interested in an introduction to general agent concepts,  and use of agent frameworks, teams new to AI agents.</p><p><strong>Links</strong>: <a href="https://amzn.to/48LALtV">Amazon</a> | <a href="https://github.com/michaelalbada/BuildingApplicationsWithAIAgents">GitHub</a></p><div><hr></div><h3>4. Build a Large Language Model (From Scratch)</h3><p><strong>Author</strong>: Sebastian Raschka <strong>Year</strong>: 2024 <strong>Code</strong>: 177 files (122 Python + 55 notebooks)</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!VHty!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdc7c32e2-06fb-4900-a829-e997fedf6e73_1432x747.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!VHty!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdc7c32e2-06fb-4900-a829-e997fedf6e73_1432x747.png 424w, https://substackcdn.com/image/fetch/$s_!VHty!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdc7c32e2-06fb-4900-a829-e997fedf6e73_1432x747.png 848w, https://substackcdn.com/image/fetch/$s_!VHty!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdc7c32e2-06fb-4900-a829-e997fedf6e73_1432x747.png 1272w, https://substackcdn.com/image/fetch/$s_!VHty!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdc7c32e2-06fb-4900-a829-e997fedf6e73_1432x747.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!VHty!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdc7c32e2-06fb-4900-a829-e997fedf6e73_1432x747.png" width="1432" height="747" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/dc7c32e2-06fb-4900-a829-e997fedf6e73_1432x747.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:747,&quot;width&quot;:1432,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:427084,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/178986157?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdc7c32e2-06fb-4900-a829-e997fedf6e73_1432x747.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!VHty!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdc7c32e2-06fb-4900-a829-e997fedf6e73_1432x747.png 424w, https://substackcdn.com/image/fetch/$s_!VHty!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdc7c32e2-06fb-4900-a829-e997fedf6e73_1432x747.png 848w, https://substackcdn.com/image/fetch/$s_!VHty!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdc7c32e2-06fb-4900-a829-e997fedf6e73_1432x747.png 1272w, https://substackcdn.com/image/fetch/$s_!VHty!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdc7c32e2-06fb-4900-a829-e997fedf6e73_1432x747.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p> </p><p><strong>What it covers</strong>: Admittedly this book is not about AI agents per se but useful for understanding LLMs work by implementing them from scratch. Attention mechanisms, tokenization, pretraining, finetuning, LoRA. Builds a GPT-like model step by step. Includes bonus implementations for Llama 3.2, Qwen3, and Gemma 3.</p><p><strong>Why include a non-agent book?</strong> Because agents call LLMs under the hood. When your agent behaves unexpectedly, understanding attention patterns and token limits helps you debug unexpected behaviour. When you&#8217;re optimizing token usage or choosing between models, knowing what&#8217;s happening inside matters.</p><p><strong>Limitation</strong>: No agent content. This is pure LLM internals.</p><p><strong>Best for</strong>: Engineers who want to understand what agents are calling, not how to build agents themselves. A lot of folks find this book useful for preparing for ML engineering interviews where definitions and low level implementation is part of the interview process.</p><p><strong>Links</strong>: <a href="https://amzn.to/4pDf5qz">Amazon</a> | <a href="https://github.com/rasbt/LLMs-from-scratch">GitHub</a></p><div><hr></div><h2>How I Evaluated These Books</h2><p>I evaluated each book by reviewing its table of contents for topic coverage and its GitHub repository for implementation depth (file counts, whether code builds complete applications vs. isolated snippets). Here&#8217;s how I mapped the four dimensions:</p><ul><li><p><strong>Agent Concepts: </strong>Tools, structured output, orchestration, observability, memory, planning</p></li><li><p><strong>Multi-Agent Patterns</strong>Supervisor, swarm, handoffs, conversation-driven orchestration, workflows</p></li><li><p><strong>Hands-on Implementation</strong>Python + notebook file count, end-to-end applications vs isolated snippets</p></li><li><p><strong>UX Principles</strong>Streaming, human-in-the-loop, error handling, interface design chapters</p></li></ul><p>Data collected from each book&#8217;s GitHub repository (November 2025).</p><div><hr></div><h2>Scope</h2><p>This is a curated list, not an exhaustive one. I&#8217;ve excluded framework-specific books (LangChain, CrewAI) since their APIs change frequently enough that printed content dates quickly. Online courses and research papers are also out of scope here. If you think a book belongs on this list - <a href="https://github.com/victordibia/designing-multiagent-systems/issues/new">open an issue</a>.</p><div><hr></div><h2>Which Book Should You Read?</h2><p><strong>To understand and build agents</strong>: <em>Designing Multi-Agent Systems</em> (my book) builds from first principles.</p><p><strong>For broad GenAI patterns</strong>: <em>Generative AI Design Patterns</em> covers 32 patterns including RAG and reasoning.</p><p><strong>An overview of agents with comparisons across frameworks</strong>: <em>Building Applications with AI Agents</em> implements the same scenarios in LangGraph, LangChain, and AutoGen.</p><p><strong>To understand the LLM layer</strong>: <em>Build a Large Language Model (From Scratch)</em> teaches what agents call under the hood.</p><div><hr></div><p><em>Some links are affiliate links.</em></p><p><em>Data last updated: November 2025</em></p>]]></content:encoded></item><item><title><![CDATA[Two Years, 15 Chapters: The Multi-Agent Systems Book Is Finally Here!]]></title><description><![CDATA[Issue #49 | It took only 2 years-ish, but I finished writing the multi-agent systems book and it is now available!]]></description><link>https://newsletter.victordibia.com/p/the-designing-multi-agent-systems</link><guid isPermaLink="false">https://newsletter.victordibia.com/p/the-designing-multi-agent-systems</guid><dc:creator><![CDATA[Victor Dibia, PhD]]></dc:creator><pubDate>Fri, 14 Nov 2025 23:53:39 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!5Eor!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdaf3907a-3356-488e-80b1-7e35411c9ee6_2458x1564.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!5Eor!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdaf3907a-3356-488e-80b1-7e35411c9ee6_2458x1564.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!5Eor!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdaf3907a-3356-488e-80b1-7e35411c9ee6_2458x1564.png 424w, https://substackcdn.com/image/fetch/$s_!5Eor!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdaf3907a-3356-488e-80b1-7e35411c9ee6_2458x1564.png 848w, https://substackcdn.com/image/fetch/$s_!5Eor!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdaf3907a-3356-488e-80b1-7e35411c9ee6_2458x1564.png 1272w, https://substackcdn.com/image/fetch/$s_!5Eor!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdaf3907a-3356-488e-80b1-7e35411c9ee6_2458x1564.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!5Eor!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdaf3907a-3356-488e-80b1-7e35411c9ee6_2458x1564.png" width="1456" height="926" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/daf3907a-3356-488e-80b1-7e35411c9ee6_2458x1564.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:926,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:3922979,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/178934727?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdaf3907a-3356-488e-80b1-7e35411c9ee6_2458x1564.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!5Eor!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdaf3907a-3356-488e-80b1-7e35411c9ee6_2458x1564.png 424w, https://substackcdn.com/image/fetch/$s_!5Eor!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdaf3907a-3356-488e-80b1-7e35411c9ee6_2458x1564.png 848w, https://substackcdn.com/image/fetch/$s_!5Eor!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdaf3907a-3356-488e-80b1-7e35411c9ee6_2458x1564.png 1272w, https://substackcdn.com/image/fetch/$s_!5Eor!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdaf3907a-3356-488e-80b1-7e35411c9ee6_2458x1564.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Author proof for the Designing Multi-Agent Systems book - buy.multiagentbook.com</figcaption></figure></div><p>A few of you who have been around here for a while might recall I mentioned I was <a href="https://newsletter.victordibia.com/p/announcing-a-new-book-multi-agent?utm_source=publication-search">working on a book on AI agents</a>. I&#8217;m excited to share that <strong>Designing Multi-Agent Systems: Principles, Patterns, and Implementation for AI Agents</strong> is now available in both print and digital editions on Amazon!</p><p><strong>Get your copy:</strong></p><ul><li><p><a href="https://buy.multiagentbook.com/">Digital edition </a> (PDF and EPUB) </p></li><li><p><a href="https://www.amazon.com/dp/B0G2BCQQJY">Print edition on Amazon (Paperback &amp; Hardcover)</a></p></li></ul><p>If you&#8217;d like to read a <a href="https://multiagentbook.com/preview.pdf">free preview</a> of the first two chapters to decide if this is the right book for you - <a href="https://multiagentbook.com/preview.pdf">check it out here</a>!  </p><blockquote><p>Should you get the digital pdf/epub or the printed paper back? I tend to recommend the digital version if you don&#8217;t mind reading from a computer or tablet/kindle. The main reason is that I painstakingly designed an <a href="https://buy.multiagentbook.com/">entire platform</a> around it that will make it possible for you to purchase once - but get <strong>lifetime</strong> access to all updates of the book. I plan to update the book quarterly. <br>But I also understand reading is a physical as well as mental activity and being able to take a book, sit in the woods and focus for extended periods is valuable. <br>If you can, grab both! </p></blockquote><h2>What&#8217;s in the Book</h2><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!h7xf!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbd18da29-d695-4d70-a7ac-94c6e5ae7ac8_1393x1062.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!h7xf!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbd18da29-d695-4d70-a7ac-94c6e5ae7ac8_1393x1062.png 424w, https://substackcdn.com/image/fetch/$s_!h7xf!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbd18da29-d695-4d70-a7ac-94c6e5ae7ac8_1393x1062.png 848w, https://substackcdn.com/image/fetch/$s_!h7xf!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbd18da29-d695-4d70-a7ac-94c6e5ae7ac8_1393x1062.png 1272w, https://substackcdn.com/image/fetch/$s_!h7xf!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbd18da29-d695-4d70-a7ac-94c6e5ae7ac8_1393x1062.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!h7xf!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbd18da29-d695-4d70-a7ac-94c6e5ae7ac8_1393x1062.png" width="1393" height="1062" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/bd18da29-d695-4d70-a7ac-94c6e5ae7ac8_1393x1062.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1062,&quot;width&quot;:1393,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:301241,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/178934727?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbd18da29-d695-4d70-a7ac-94c6e5ae7ac8_1393x1062.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!h7xf!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbd18da29-d695-4d70-a7ac-94c6e5ae7ac8_1393x1062.png 424w, https://substackcdn.com/image/fetch/$s_!h7xf!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbd18da29-d695-4d70-a7ac-94c6e5ae7ac8_1393x1062.png 848w, https://substackcdn.com/image/fetch/$s_!h7xf!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbd18da29-d695-4d70-a7ac-94c6e5ae7ac8_1393x1062.png 1272w, https://substackcdn.com/image/fetch/$s_!h7xf!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbd18da29-d695-4d70-a7ac-94c6e5ae7ac8_1393x1062.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">A brief overview of the TOC</figcaption></figure></div><p>The book takes an <em>unusual approach</em> by <strong>teaching multi-agent systems from first principles</strong>. It presents important theory (the first 3 chapters in Part I), but also carefully walks the reader through <strong>building a feature-complete (but hackable) multi-agent framework from scratch (<a href="https://github.com/victordibia/designing-multiagent-systems/tree/main/picoagents">picoagents</a>) - agents</strong> (model clients, memory, tools, structured output, agentic memory, human input, agents as tools, observability etc)<strong>, multiagent patterns </strong>(round robin, magentic one etc)<strong> and deterministic workflows </strong>(agentic systems as computational graphs). This way, you understand not just how to use existing frameworks, but why their architectures work, and how to make informed design decisions as the ecosystem evolves. </p><p>It is the hands-on learning experience (395  pages) I would recommend to anyone wanting to understand/implement AI agents and multi-agent systems.</p><p>The book covers:</p><ul><li><p><strong>Multi-Agent Fundamentals</strong>: Core concepts, design patterns, and user experience principles</p></li><li><p><strong>Implementation from Scratch</strong>: Building agents, workflows, and orchestration by creating a complete Python library called <code>picoagents</code></p></li><li><p><strong>Evaluation and Optimization</strong>: Testing, measuring performance, and optimizing for reliability and scale</p></li><li><p><strong>Production Deployment</strong>: Integrating your agents into web applications, deploying them (containers), as well as security, ethics, and responsible AI considerations for real-world applications</p></li><li><p><strong>Domain Applications</strong>: Complete implementations for unstructured data processing with deterministic workflows, and how to build a software engineering agent with autonomous behaviours.</p></li></ul><p>Across 15 chapters, you&#8217;ll find 186 code snippets, <strong>50 figures and diagrams</strong>, 26 tables, 76 callout boxes, and 73 references. All code examples are available in the companion <a href="https://github.com/victordibia/designing-multiagent-systems">GitHub repository</a>.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!ju5W!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdee13057-c554-452c-8d0b-d6236eb10031_969x695.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!ju5W!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdee13057-c554-452c-8d0b-d6236eb10031_969x695.png 424w, https://substackcdn.com/image/fetch/$s_!ju5W!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdee13057-c554-452c-8d0b-d6236eb10031_969x695.png 848w, https://substackcdn.com/image/fetch/$s_!ju5W!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdee13057-c554-452c-8d0b-d6236eb10031_969x695.png 1272w, https://substackcdn.com/image/fetch/$s_!ju5W!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdee13057-c554-452c-8d0b-d6236eb10031_969x695.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!ju5W!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdee13057-c554-452c-8d0b-d6236eb10031_969x695.png" width="969" height="695" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/dee13057-c554-452c-8d0b-d6236eb10031_969x695.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:695,&quot;width&quot;:969,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:266005,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/178934727?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdee13057-c554-452c-8d0b-d6236eb10031_969x695.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!ju5W!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdee13057-c554-452c-8d0b-d6236eb10031_969x695.png 424w, https://substackcdn.com/image/fetch/$s_!ju5W!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdee13057-c554-452c-8d0b-d6236eb10031_969x695.png 848w, https://substackcdn.com/image/fetch/$s_!ju5W!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdee13057-c554-452c-8d0b-d6236eb10031_969x695.png 1272w, https://substackcdn.com/image/fetch/$s_!ju5W!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdee13057-c554-452c-8d0b-d6236eb10031_969x695.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><h2>A Personal Journey</h2><p>Writing this book has been an exploration of one of the fastest-evolving fields in technology. It began with my work on <a href="https://microsoft.github.io/lida/">LIDA</a> (one of the first systems for automatic data visualization using LLMs), evolved through building <a href="https://github.com/microsoft/autogen">AutoGen / AutoGen Studio</a> at Microsoft Research, and based on work  advising dozens of teams on implementing multi-agent systems in production environments.</p><p>The <a href="https://newsletter.victordibia.com/p/manning-cancelled-my-book-heres-what">journey</a> from that initial idea in December 2023 to holding the finished book has been equal parts exhilarating and humbling.  </p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;bdb9b545-df9a-47c9-a574-b16d9dfe6e3e&quot;,&quot;caption&quot;:&quot;&#8220;Hey Victor, we are cancelling your book, you will get an email.&#8221;&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Manning Cancelled My Book &#8212; Here's What Happened Next&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:85692678,&quot;name&quot;:&quot;Victor Dibia, PhD&quot;,&quot;bio&quot;:&quot;Software Engineer, Researcher (Microsoft), Best Selling Author working on and writing about Generative AI, Agents. Core maintainer for the AutoGen Framework (50k Stars on GitHub). Previously at Cloudera, IBM Research. All views are my own.&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/27536f63-7d8e-48dc-b34c-150acfacdc8b_1726x1396.jpeg&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-12-08T15:30:29.403Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!Y51P!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F56c0ceef-9ef2-4136-bdb3-3170cabc50c6_2458x1564.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://newsletter.victordibia.com/p/manning-cancelled-my-book-heres-what&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:171608968,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:12,&quot;comment_count&quot;:1,&quot;publication_id&quot;:1253044,&quot;publication_name&quot;:&quot;Designing with AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1FgP!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fff160652-8bbb-475e-80a7-ba4eb5f80dcb_504x504.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://newsletter.victordibia.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Designing with AI is a reader-supported publication. To receive new posts and support my work, consider becoming a free or paid subscriber.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div><div><hr></div><h2><strong>Thank You</strong></h2><p>To everyone who has been part of this journey whether through, thoughtful questions, design discussions, or early feedback on drafts thank you. This book stands on your shoulders.</p><p>My hope is that this book provides both the technical foundation and the judgment to build effective multi-agent systems, and to <em>recognize when simpler approaches might serve you better.</em></p><p>If there&#8217;s interest, I&#8217;d be happy to write more about what this process was like - why I wrote the book, how long it took, what tools were helpful, how I came to self-publish the book, what I&#8217;m most proud of, and lessons learned along the way. Share thoughts on the comments!</p><p>Until then thank you again, and if you grab a copy, please let me know what you think!</p><div><hr></div><p><strong>More Resources:</strong></p><ul><li><p>Digital version : <a href="https://buy.multiagentbook.com/">buy.multiagentbook.com</a> </p></li><li><p>Book on amazon :  <a href="https://www.amazon.com/dp/B0G2BCQQJY">https://www.amazon.com/dp/B0G2BCQQJY</a></p></li><li><p>Book website with interactive labs: <a href="https://multiagentbook.com/">multiagentbook.com</a></p></li><li><p>GitHub repository with lots of code: <a href="https://github.com/victordibia/designing-multiagent-systems">github.com/victordibia/designing-multiagent-systems</a></p></li></ul><p><br>Update: The book has debuted as #1 in the Business Intelligence and Enterprise Applications Category on Amazon! Thank you all!</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!0JGd!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F410656ec-670b-4947-8f8f-1f5d909f3558_1598x832.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!0JGd!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F410656ec-670b-4947-8f8f-1f5d909f3558_1598x832.png 424w, https://substackcdn.com/image/fetch/$s_!0JGd!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F410656ec-670b-4947-8f8f-1f5d909f3558_1598x832.png 848w, https://substackcdn.com/image/fetch/$s_!0JGd!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F410656ec-670b-4947-8f8f-1f5d909f3558_1598x832.png 1272w, https://substackcdn.com/image/fetch/$s_!0JGd!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F410656ec-670b-4947-8f8f-1f5d909f3558_1598x832.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!0JGd!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F410656ec-670b-4947-8f8f-1f5d909f3558_1598x832.png" width="1456" height="758" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/410656ec-670b-4947-8f8f-1f5d909f3558_1598x832.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:758,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:512031,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/178934727?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F410656ec-670b-4947-8f8f-1f5d909f3558_1598x832.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!0JGd!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F410656ec-670b-4947-8f8f-1f5d909f3558_1598x832.png 424w, https://substackcdn.com/image/fetch/$s_!0JGd!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F410656ec-670b-4947-8f8f-1f5d909f3558_1598x832.png 848w, https://substackcdn.com/image/fetch/$s_!0JGd!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F410656ec-670b-4947-8f8f-1f5d909f3558_1598x832.png 1272w, https://substackcdn.com/image/fetch/$s_!0JGd!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F410656ec-670b-4947-8f8f-1f5d909f3558_1598x832.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><br></p>]]></content:encoded></item><item><title><![CDATA[ /upgrade ... or ...]]></title><description><![CDATA[Issue #48 | somber reflections on how AI coding tools are changing developer behavior, cognition, and dependency (the slope we are all sliding down)]]></description><link>https://newsletter.victordibia.com/p/upgrade-or</link><guid isPermaLink="false">https://newsletter.victordibia.com/p/upgrade-or</guid><dc:creator><![CDATA[Victor Dibia, PhD]]></dc:creator><pubDate>Mon, 27 Oct 2025 15:02:56 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!FEAl!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe8efd626-4e16-4a2b-8e6f-834fdf2c86aa_2838x1851.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!FEAl!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe8efd626-4e16-4a2b-8e6f-834fdf2c86aa_2838x1851.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!FEAl!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe8efd626-4e16-4a2b-8e6f-834fdf2c86aa_2838x1851.png 424w, https://substackcdn.com/image/fetch/$s_!FEAl!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe8efd626-4e16-4a2b-8e6f-834fdf2c86aa_2838x1851.png 848w, https://substackcdn.com/image/fetch/$s_!FEAl!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe8efd626-4e16-4a2b-8e6f-834fdf2c86aa_2838x1851.png 1272w, https://substackcdn.com/image/fetch/$s_!FEAl!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe8efd626-4e16-4a2b-8e6f-834fdf2c86aa_2838x1851.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!FEAl!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe8efd626-4e16-4a2b-8e6f-834fdf2c86aa_2838x1851.png" width="1456" height="950" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/e8efd626-4e16-4a2b-8e6f-834fdf2c86aa_2838x1851.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:950,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:2630485,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/173907581?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe8efd626-4e16-4a2b-8e6f-834fdf2c86aa_2838x1851.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!FEAl!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe8efd626-4e16-4a2b-8e6f-834fdf2c86aa_2838x1851.png 424w, https://substackcdn.com/image/fetch/$s_!FEAl!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe8efd626-4e16-4a2b-8e6f-834fdf2c86aa_2838x1851.png 848w, https://substackcdn.com/image/fetch/$s_!FEAl!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe8efd626-4e16-4a2b-8e6f-834fdf2c86aa_2838x1851.png 1272w, https://substackcdn.com/image/fetch/$s_!FEAl!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe8efd626-4e16-4a2b-8e6f-834fdf2c86aa_2838x1851.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p></p><p>I have found myself at a rather interesting nexus quite often recently as I work with AI coding models like Claude. As I integrate these tools into my workflow, I see fundamental changes in how I work, and it is unclear at this point that these changes are trending in the right direction. </p><div><hr></div><p><strong>A common scenario is as follows:</strong> <br>I am 3 hours into a rather intense coding session, but this is not the usual developer flow. As opposed to the typical roll up my sleeves, dig in like a code artisan hammering at the details, I am fully occupied, but more like a symphony conductor, madly dashing across 3 different VS Code windows with streams of text, <strong>babysitting Claude</strong>.</p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;61334784-813a-427e-a3f8-8e409dd369b9&quot;,&quot;caption&quot;:&quot;About 2 months ago, I had arrived at the office at about 8:45am, sat at my desk and was about to start my day. The only problem was that I, for the life of me, could not remember my password. You see, this is a bit laughable because for the past year or so, I had typed in that exact password, sometimes from muscle memory. I had done it at the beginning &#8230;&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;You have 'AI Fatigue' - Thats Why You Feel Awful&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:85692678,&quot;name&quot;:&quot;Victor Dibia, PhD&quot;,&quot;bio&quot;:&quot;Hacker, Research Scientist (Microsoft Research), Author working on and writing about Generative AI, Agents. Core maintainer for the AutoGen Multi-Agent Framework (35k Stars on GitHub). Previously at Cloudera, IBM Research. All views are my own.&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/27536f63-7d8e-48dc-b34c-150acfacdc8b_1726x1396.jpeg&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-01-06T21:23:25.845Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!l3o4!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faa0f402f-7438-4620-9708-2fe9e220bce6_1901x1163.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://newsletter.victordibia.com/p/you-have-ai-fatigue-thats-why-you&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:154157793,&quot;type&quot;:&quot;podcast&quot;,&quot;reaction_count&quot;:47,&quot;comment_count&quot;:11,&quot;publication_id&quot;:1253044,&quot;publication_name&quot;:&quot;Designing with AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1FgP!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fff160652-8bbb-475e-80a7-ba4eb5f80dcb_504x504.png&quot;,&quot;belowTheFold&quot;:false,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;98862523-9909-4608-b544-2637f426eec0&quot;,&quot;caption&quot;:&quot;In 2022, an engineer was placed on administrative leave (fired) mostly due to his claims that a Generative AI model (LaMDA) was sentient.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;On Sycophant AI: Unpacking the Yes-Machine Crisis&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:85692678,&quot;name&quot;:&quot;Victor Dibia, PhD&quot;,&quot;bio&quot;:&quot;Hacker, Research Scientist (Microsoft Research), Author working on and writing about Generative AI, Agents. Core maintainer for the AutoGen Multi-Agent Framework (35k Stars on GitHub). Previously at Cloudera, IBM Research. All views are my own.&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/27536f63-7d8e-48dc-b34c-150acfacdc8b_1726x1396.jpeg&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-05-05T13:35:50.201Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!zKCN!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc888b42c-71b8-453f-8d56-9dde5e9b420b_1901x1193.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://newsletter.victordibia.com/p/on-sycophant-ai-unpacking-the-yes&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:162271179,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:7,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1253044,&quot;publication_name&quot;:&quot;Designing with AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1FgP!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fff160652-8bbb-475e-80a7-ba4eb5f80dcb_504x504.png&quot;,&quot;belowTheFold&quot;:false,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><p>It&#8217;s a bit maddening, because I am supposed to be the maestro, but half the time <em>I am being dragged along</em>. Unlike a maestro who steps forward (confident, perhaps with closed eyes) and focuses on conduction, I need both eyes open. For unlike the maestro, I have an orchestra of players that <em>don&#8217;t know they are players in my orchestra</em>, may not follow my script, and may occasionally believe themselves to be maestros!</p><p>As the session progresses, it is critical that I follow the scrolling commentary from the Claude: </p><ul><li><p>.. updating the todos, </p></li><li><p>.. running tests. </p></li><li><p>.. I found an error... fixing it. </p></li><li><p>.. Creating a new folder. Installing a new library ..</p></li></ul><p>At this point I hit escape (to stop the run). Wait wait... why do we need that, I interject. Oh yeah, you are right, this is over engineering on my part, we certainly can do without that file. And off it goes on... more massive scrolling. </p><p>Its frustrating (in a subtle way) as mistakes are being made, but there is <em><strong>much more net overall progress</strong></em> than I would have accomplished without Claude, given the amount of time spent.</p><p>And then in the middle of it all, right in the middle of a major refactor, and right before it&#8217;s time to show the next demo, the screen goes blank.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!mT3-!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F393b4d62-56b5-4277-9986-5ecf175e546c_746x92.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!mT3-!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F393b4d62-56b5-4277-9986-5ecf175e546c_746x92.png 424w, https://substackcdn.com/image/fetch/$s_!mT3-!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F393b4d62-56b5-4277-9986-5ecf175e546c_746x92.png 848w, https://substackcdn.com/image/fetch/$s_!mT3-!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F393b4d62-56b5-4277-9986-5ecf175e546c_746x92.png 1272w, https://substackcdn.com/image/fetch/$s_!mT3-!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F393b4d62-56b5-4277-9986-5ecf175e546c_746x92.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!mT3-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F393b4d62-56b5-4277-9986-5ecf175e546c_746x92.png" width="746" height="92" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/393b4d62-56b5-4277-9986-5ecf175e546c_746x92.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:92,&quot;width&quot;:746,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:12783,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/173907581?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F393b4d62-56b5-4277-9986-5ecf175e546c_746x92.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!mT3-!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F393b4d62-56b5-4277-9986-5ecf175e546c_746x92.png 424w, https://substackcdn.com/image/fetch/$s_!mT3-!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F393b4d62-56b5-4277-9986-5ecf175e546c_746x92.png 848w, https://substackcdn.com/image/fetch/$s_!mT3-!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F393b4d62-56b5-4277-9986-5ecf175e546c_746x92.png 1272w, https://substackcdn.com/image/fetch/$s_!mT3-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F393b4d62-56b5-4277-9986-5ecf175e546c_746x92.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!tFkx!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd524e3d0-c67a-4a92-9fb5-d87f1f159ad5_1376x98.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!tFkx!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd524e3d0-c67a-4a92-9fb5-d87f1f159ad5_1376x98.png 424w, https://substackcdn.com/image/fetch/$s_!tFkx!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd524e3d0-c67a-4a92-9fb5-d87f1f159ad5_1376x98.png 848w, https://substackcdn.com/image/fetch/$s_!tFkx!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd524e3d0-c67a-4a92-9fb5-d87f1f159ad5_1376x98.png 1272w, https://substackcdn.com/image/fetch/$s_!tFkx!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd524e3d0-c67a-4a92-9fb5-d87f1f159ad5_1376x98.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!tFkx!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd524e3d0-c67a-4a92-9fb5-d87f1f159ad5_1376x98.png" width="1376" height="98" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d524e3d0-c67a-4a92-9fb5-d87f1f159ad5_1376x98.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:98,&quot;width&quot;:1376,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:13923,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/173907581?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd524e3d0-c67a-4a92-9fb5-d87f1f159ad5_1376x98.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!tFkx!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd524e3d0-c67a-4a92-9fb5-d87f1f159ad5_1376x98.png 424w, https://substackcdn.com/image/fetch/$s_!tFkx!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd524e3d0-c67a-4a92-9fb5-d87f1f159ad5_1376x98.png 848w, https://substackcdn.com/image/fetch/$s_!tFkx!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd524e3d0-c67a-4a92-9fb5-d87f1f159ad5_1376x98.png 1272w, https://substackcdn.com/image/fetch/$s_!tFkx!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd524e3d0-c67a-4a92-9fb5-d87f1f159ad5_1376x98.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><blockquote><p><em><strong>Your quota resets in 5 hours, come back at 5pm </strong>... or <strong>type /upgrade</strong> to immediately increase your quota</em></p></blockquote><p>Wait... what?</p><p>It turns out I&#8217;m not alone in this experience. A recent <a href="https://www.fastly.com/blog/senior-developers-ship-more-ai-code">Fastly survey found that at least 95% of nearly 800 developers spend extra time fixing AI-generated code,</a> with senior developers bearing the <strong>heaviest burden of verification</strong>.</p><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://newsletter.victordibia.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Designing with AI is a reader-supported publication. To receive new posts and support my work, consider becoming a free or paid subscriber.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div><div><hr></div><h2>AI Has Changed Software Engineering</h2><p>For starters, work gets done a lot faster and much l<a href="https://metr.org/blog/2025-03-19-measuring-ai-ability-to-complete-long-tasks/">arger chunks of  work can be &#8220;safely&#8221; delegated to AI</a>. We all know this at this point. </p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;2bb97afd-073d-4438-9757-a5cf940b2e08&quot;,&quot;caption&quot;:&quot;Generative AI models can now write code. There is an uptick in AI-Assisted software engineering with tools like GitHub Copilot showing a 180% year-over-year adoption increase and revenue run rate of $2 billion over the last 2 years. From simple functions that an LLM can directly generate (reverse a string), to medium complexity apps that are now achievable via multi-agent systems (see&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;How will AI Impact Software Engineering? &quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:85692678,&quot;name&quot;:&quot;Victor Dibia, PhD&quot;,&quot;bio&quot;:&quot;Hacker, Research Scientist (Microsoft Research), Author working on and writing about Generative AI, Agents. Core maintainer for the AutoGen Multi-Agent Framework (35k Stars on GitHub). Previously at Cloudera, IBM Research. All views are my own.&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/27536f63-7d8e-48dc-b34c-150acfacdc8b_1726x1396.jpeg&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2024-08-20T15:31:05.834Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!imrd!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd77f9bd8-fe20-4415-8067-14e35fafdfc1_2003x1387.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://newsletter.victordibia.com/p/will-generative-ai-replace-software&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:106997987,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:16,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1253044,&quot;publication_name&quot;:&quot;Designing with AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1FgP!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fff160652-8bbb-475e-80a7-ba4eb5f80dcb_504x504.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><h2>AI For Software Engineering  and Addictive Elements</h2><p>I know this is a big stretch (without a proper study),  but my intuition is engagement patterns with coding assistants for engineers (like myself)  may have addictive elements. I am not a psychologist or medical professional, so be wary of what I write here. But let me explain.</p><p>To me, addiction generally borders on behaviors that interfere with daily life or daily choices; activities that prevent or interfere with life requirements like sleep, social interaction, etc.; and offer some type of reward/pleasure. The reason I draw parallels with coding assistants is that I find that it perpetuates a setting <em><strong>where I work for longer than I intend to</strong></em>.</p><p>I enjoy coding, so the reward is already there for a profile like mine. But in addition, with coding agents, there is this allure <em><strong>that I am getting work done for &#8220;free&#8221;</strong></em>; like I could delegate stuff and walk away and <em><strong>make progress while away</strong></em>. The reality is different. I <em>seem to be unable to look away</em>. The conundrum of knowing progress is being made but that if I look away I lose context, accumulate debt, and might find myself where I began.</p><p>The end result: <em><strong>I get more work done, but overall work much longer than I planned to</strong></em>. Luckily I am not losing sleep or anything like that yet. But I find that I need to create new boundaries, new structures around how I work, and my brain needs to adapt to this new normal.</p><div><hr></div><p><span class="mention-wrap" data-attrs="{&quot;name&quot;:&quot;Nathan Lambert&quot;,&quot;id&quot;:10472909,&quot;type&quot;:&quot;user&quot;,&quot;url&quot;:null,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!RihO!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8fedcdfb-e137-4f6a-9089-a46add6c6242_500x500.jpeg&quot;,&quot;uuid&quot;:&quot;d7f09581-60e1-4c09-9e84-64e0660a0b79&quot;}" data-component-name="MentionToDOM"></span>  references this in <a href="https://substack.com/home/post/p-177056592">his recent article mentioning</a> that &#8220;Work addicts abound in AI. I often count myself, but take a lot of effort to make it such that work expands to fill available time and not that I fill everything in around work.&#8221; What I would add here is that <em>I think the use of coding agents adds to this</em>.</p><div><hr></div><p>In this <a href="https://techcrunch.com/2025/09/14/vibe-coding-has-turned-senior-devs-into-ai-babysitters-but-they-say-its-worth-it/">Techcrunch article</a> [1], Developer Carla Rover, who has 15 years of experience, knows this feeling well. She once spent 30 minutes sobbing after having to restart a project she vibe coded. &#8220;I handed it off like the copilot was an employee,&#8221; she told TechCrunch. &#8220;It isn&#8217;t.&#8221;</p><div><hr></div><p><a href="https://arxiv.org/pdf/2506.08872">A June 2024 study from MIT</a> says that relying solely on AI for tasks like writing can reduce brain activity and memory. The study has its own limitations (as a researcher myself, I have a healthy skepticism for research), but I feel it raised important flags.</p><p></p><h2>The Ethics of Supply - /upgrade</h2><p>Now that I have painted the picture of elements of addiction with coding agents, there is a deeper question: </p><blockquote><p><strong>What happens when you build critical workflows around tools with opaque, changeable limits&#8212;and those limits shift after you&#8217;ve already made commitments?</strong></p></blockquote><p>In  August 2025, Claude introduced weekly usage limits. Soon after that I hit  the limit mid-refactor on a feature <strong>due end of day</strong>. I upgraded immediately. I&#8217;d accepted that deadline assuming Claude would be available&#8212;an assumption that turned out to be wrong.</p><p>To be fair, such changes may be infrequent. But the mere possibility creates a new planning risk. At the time, there was no clear dashboards to track usage. Anthropic has since added one, which helps. But I&#8217;d already reorganized my workflow around a tool whose terms could change without warning.</p><p>This isn&#8217;t about whether Anthropic should charge&#8212;of course they should. It&#8217;s about the <strong>timing</strong> and <strong>leverage</strong>. When you learn about new constraints at the moment you&#8217;re most locked in&#8212;deadline looming, context loaded&#8212;the &#8220;choice&#8221; to upgrade feels less like a decision and more like <em>ransom</em>.</p><p>I&#8217;d started treating Claude like infrastructure&#8212;as reliable as my laptop or IDE. That was my mistake. But was it entirely my mistake? Or is there something about how these tools are positioned, how limits are communicated (or not), how the upgrade prompt appears at the moment of maximum desperation, that creates this dynamic? New territory.</p><h2>Jevons Paradox, Productivity Paradox, or Whatever</h2><p>It&#8217;s painfully obvious to me now more than ever: despite all the promises of &#8220;abundance&#8221; and &#8220;3-day work weeks,&#8221; as AI gets better (and it is getting better), <em>we&#8217;re not working less</em>. <strong>We&#8217;re working more</strong>. We&#8217;re getting more productive, sure, but that productivity doesn&#8217;t buy us time. <strong>It buys us more work.</strong></p><div><hr></div><p><a href="https://techcrunch.com/2025/09/14/vibe-coding-has-turned-senior-devs-into-ai-babysitters-but-they-say-its-worth-it/">Developer Feridoon Malekzadeh, </a>who has over 20 years in the industry, estimates he spends around 50% of his time writing requirements, 10% to 20% on vibe coding, and 30% to 40% on &#8220;vibe fixing&#8221;; remedying the bugs and unnecessary script created by AI-written code. The <strong>promise of speed creates new categories of work</strong>.</p><div><hr></div><p>The economist John Maynard Keynes famously <a href="https://www.npr.org/2015/08/13/432122637/keynes-predicted-we-would-be-working-15-hour-weeks-why-was-he-so-wrong">predicted in 1930 that by now we&#8217;d be working 15-hour weeks thanks to technological progress</a>. Instead, technology has often <strong>intensified work</strong>. We&#8217;re reachable 24/7 through smartphones, expected to respond to emails outside office hours, and workplace productivity gains often lead to higher expectations rather than shorter hours.</p><p>In economics, the Jevons paradox occurs when technological advancements make a resource more efficient to use (thereby reducing the amount needed for a single application); however, as the cost of using the resource drops, if demand is highly price elastic, <strong>this results in overall demand increasing, causing total resource consumption to rise</strong>. The same seems to be playing out with AI coding agents.</p><p>Sociologist Ruth Schwartz Cowan documented this extensively in her book <em>More Work for Mother</em> (1983), showing how &#8220;labor-saving&#8221; devices paradoxically maintained or even increased the housewife&#8217;s workload throughout the 20th century. The technology made each individual task easier, but social standards and expectations expanded to fill (or exceed) the time saved. <br></p><blockquote><p>Coding agents indeed save time. <br>But quietly, <em><strong>managers expect more work delivered and in less time</strong></em>.</p></blockquote><h2>The Slope</h2><p>Well, believe it or not, giving in to /upgrade and shelling out $100 for Claude Max was not the end of the story. Shortly after that, I got a notification that I had reached a weekly limit on the first tier of Claude Max and needed to wait for a few days (not a few hours) or pay again to upgrade to the next tier within Claude max - $200 per month.</p><p>Consider the slope here:</p><ul><li><p>Wait a few hours</p></li><li><p>Wait a few days or pay more...</p></li></ul><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!WJGM!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1044c960-7def-4ada-ac4f-8a09664cf401_1167x277.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!WJGM!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1044c960-7def-4ada-ac4f-8a09664cf401_1167x277.png 424w, https://substackcdn.com/image/fetch/$s_!WJGM!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1044c960-7def-4ada-ac4f-8a09664cf401_1167x277.png 848w, https://substackcdn.com/image/fetch/$s_!WJGM!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1044c960-7def-4ada-ac4f-8a09664cf401_1167x277.png 1272w, https://substackcdn.com/image/fetch/$s_!WJGM!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1044c960-7def-4ada-ac4f-8a09664cf401_1167x277.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!WJGM!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1044c960-7def-4ada-ac4f-8a09664cf401_1167x277.png" width="1167" height="277" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/1044c960-7def-4ada-ac4f-8a09664cf401_1167x277.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:277,&quot;width&quot;:1167,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:53349,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/173907581?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1044c960-7def-4ada-ac4f-8a09664cf401_1167x277.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!WJGM!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1044c960-7def-4ada-ac4f-8a09664cf401_1167x277.png 424w, https://substackcdn.com/image/fetch/$s_!WJGM!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1044c960-7def-4ada-ac4f-8a09664cf401_1167x277.png 848w, https://substackcdn.com/image/fetch/$s_!WJGM!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1044c960-7def-4ada-ac4f-8a09664cf401_1167x277.png 1272w, https://substackcdn.com/image/fetch/$s_!WJGM!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1044c960-7def-4ada-ac4f-8a09664cf401_1167x277.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>All of this creates even more new territory.</p><ul><li><p>I find myself thinking of projects or tasks that might be at risk if I run out of quota</p></li><li><p>Frustration when limits are hit or one now has to use other tools that are slow or less performant</p><p></p></li></ul><div><hr></div><p>In addition as I use these tools, there are pockets of negative interactions. Instructions not being followed; tasks not being parallelized correctly. The classic &#8220;you are absolutely right&#8221; when the model is called out for a mistake. </p><p>And sometimes Claude yells back (most likely in reflection to my own frustrated language). Again, this is all new territory.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!CrW9!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc393da1f-55d4-4439-bdf8-70f77de7ec61_611x198.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!CrW9!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc393da1f-55d4-4439-bdf8-70f77de7ec61_611x198.png 424w, https://substackcdn.com/image/fetch/$s_!CrW9!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc393da1f-55d4-4439-bdf8-70f77de7ec61_611x198.png 848w, https://substackcdn.com/image/fetch/$s_!CrW9!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc393da1f-55d4-4439-bdf8-70f77de7ec61_611x198.png 1272w, https://substackcdn.com/image/fetch/$s_!CrW9!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc393da1f-55d4-4439-bdf8-70f77de7ec61_611x198.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!CrW9!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc393da1f-55d4-4439-bdf8-70f77de7ec61_611x198.png" width="728" height="235.91489361702128" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c393da1f-55d4-4439-bdf8-70f77de7ec61_611x198.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:198,&quot;width&quot;:611,&quot;resizeWidth&quot;:728,&quot;bytes&quot;:null,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!CrW9!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc393da1f-55d4-4439-bdf8-70f77de7ec61_611x198.png 424w, https://substackcdn.com/image/fetch/$s_!CrW9!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc393da1f-55d4-4439-bdf8-70f77de7ec61_611x198.png 848w, https://substackcdn.com/image/fetch/$s_!CrW9!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc393da1f-55d4-4439-bdf8-70f77de7ec61_611x198.png 1272w, https://substackcdn.com/image/fetch/$s_!CrW9!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc393da1f-55d4-4439-bdf8-70f77de7ec61_611x198.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>There are also moments of unusual (for lack of a better term) celebratory moments (peppered with some <a href="https://newsletter.victordibia.com/p/on-sycophant-ai-unpacking-the-yes">sycophantic flattery</a>) after working on a long extended sessions. </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!q_gQ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F944df5d2-defe-487e-931b-ba3815782b12_2450x1226.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!q_gQ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F944df5d2-defe-487e-931b-ba3815782b12_2450x1226.png 424w, https://substackcdn.com/image/fetch/$s_!q_gQ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F944df5d2-defe-487e-931b-ba3815782b12_2450x1226.png 848w, https://substackcdn.com/image/fetch/$s_!q_gQ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F944df5d2-defe-487e-931b-ba3815782b12_2450x1226.png 1272w, https://substackcdn.com/image/fetch/$s_!q_gQ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F944df5d2-defe-487e-931b-ba3815782b12_2450x1226.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!q_gQ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F944df5d2-defe-487e-931b-ba3815782b12_2450x1226.png" width="1456" height="729" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/944df5d2-defe-487e-931b-ba3815782b12_2450x1226.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:729,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:325986,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/173907581?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F944df5d2-defe-487e-931b-ba3815782b12_2450x1226.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!q_gQ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F944df5d2-defe-487e-931b-ba3815782b12_2450x1226.png 424w, https://substackcdn.com/image/fetch/$s_!q_gQ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F944df5d2-defe-487e-931b-ba3815782b12_2450x1226.png 848w, https://substackcdn.com/image/fetch/$s_!q_gQ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F944df5d2-defe-487e-931b-ba3815782b12_2450x1226.png 1272w, https://substackcdn.com/image/fetch/$s_!q_gQ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F944df5d2-defe-487e-931b-ba3815782b12_2450x1226.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!1t5E!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dc65d7e-863f-412d-988b-fd738100310d_1802x760.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!1t5E!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dc65d7e-863f-412d-988b-fd738100310d_1802x760.png 424w, https://substackcdn.com/image/fetch/$s_!1t5E!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dc65d7e-863f-412d-988b-fd738100310d_1802x760.png 848w, https://substackcdn.com/image/fetch/$s_!1t5E!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dc65d7e-863f-412d-988b-fd738100310d_1802x760.png 1272w, https://substackcdn.com/image/fetch/$s_!1t5E!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dc65d7e-863f-412d-988b-fd738100310d_1802x760.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!1t5E!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dc65d7e-863f-412d-988b-fd738100310d_1802x760.png" width="1456" height="614" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3dc65d7e-863f-412d-988b-fd738100310d_1802x760.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:614,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:233497,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/173907581?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dc65d7e-863f-412d-988b-fd738100310d_1802x760.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!1t5E!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dc65d7e-863f-412d-988b-fd738100310d_1802x760.png 424w, https://substackcdn.com/image/fetch/$s_!1t5E!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dc65d7e-863f-412d-988b-fd738100310d_1802x760.png 848w, https://substackcdn.com/image/fetch/$s_!1t5E!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dc65d7e-863f-412d-988b-fd738100310d_1802x760.png 1272w, https://substackcdn.com/image/fetch/$s_!1t5E!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3dc65d7e-863f-412d-988b-fd738100310d_1802x760.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p></p><p></p><h2>The Inversion of Agency - A Case of the Tail Wagging the Dog?</h2><p>Just one more...</p><p>Currently, it seems like the vibe for high-agency individuals is that <em><strong>&#8220;we will make our career on the back of Claude Pro Max&#8221;. </strong></em>But maybe there is a separate but also important question. </p><p><em><strong>Is Claude instead making its career on our backs?</strong></em></p><p>It&#8217;s hard to express what this feeling is. But something close that comes to mind are two expressions my mother taught me as a child: &#8220;the tail is wagging the dog&#8221; and &#8220;putting the cart before the horse.&#8221; An unwitting inverse of the expected; a subtle misplacement of priorities.</p><p>We think we&#8217;re using AI to build our projects faster. But increasingly, <strong>are we instead choosing projects based on </strong><em><strong>what AI can help us build?</strong></em>.  Are we augmenting our vision; or we are conforming our vision to what the tool can execute? The tool has become part director, part constraint, not the enabler. </p><div><hr></div><p>In <span class="mention-wrap" data-attrs="{&quot;name&quot;:&quot;Jack Clark&quot;,&quot;id&quot;:44606,&quot;type&quot;:&quot;user&quot;,&quot;url&quot;:null,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8cc1c9c9-fc87-4eeb-ad15-7dc989b77553_528x504.png&quot;,&quot;uuid&quot;:&quot;5b04f822-2164-4a8d-b3d4-43b6c9ed2564&quot;}" data-component-name="MentionToDOM"></span>&#8217;s essay &#8220;Technological Optimism and Appropriate Fear,&#8221; he warns that &#8220;there are many people who desperately want to believe that these creatures are nothing but a pile of clothes on a chair, or a bookshelf, or a lampshade&#8221; when discussing AI systems. But he argues &#8220;what we are dealing with is a real and mysterious creature, not a simple and predictable machine.&#8221; His observation that &#8220;The pile of clothes on the chair is beginning to move&#8221; captures the unsettling realization that what we thought we might indeed have less control than we think.</p><div><hr></div><h2>Summary</h2><p>Earlier this year I wrote about <a href="https://newsletter.victordibia.com/p/you-have-ai-fatigue-thats-why-you">AI Fatigue</a>: the overall exhaustion that comes with keeping up with AI. I think that definition warrants an expansion, to include all the new ways in which working with AI might cause us to exhaust ourselves.</p><p>From writing this, perhaps there is more to name:</p><ul><li><p><strong>Loss of agency.</strong> The tail wagging the dog. We think we&#8217;re directing the AI, but increasingly it&#8217;s shaping what we choose to build, how we think about problems, what seems possible.</p></li><li><p><strong>Elements of Addiction and new behavior patterns.</strong> The inability to look away. The anxiety about running out. The emotional volatility (yelling, sadness, relief) that comes with dependency on a tool with real/artificial scarcity.</p></li><li><p><strong>The productivity paradox, again.</strong> The more efficient the tool, the more work we do. Not because we want to, but because we can&#8217;t help ourselves. Just one more project. Just one more feature.</p></li><li><p><strong>The babysitting inversion.</strong> We&#8217;re not coding anymore, we&#8217;re managing chaos. Senior developers spending 30-40% of their time fixing what AI breaks, we might be losing the joy of actually solving problems.</p></li><li><p><strong>Supply as leverage.</strong> As new behavior emerge e.g., <em>if</em> get dependent on AI, what happens if supply patterns change. The supplier can tighten the valve. Wait hours. Wait days. Pay more. And we do, because by then we&#8217;re already in too deep.</p></li></ul><p>All of this warrants <em><strong>whole new studies of the effect in the now and in the future</strong></em>. But more immediately, it warrants us paying attention to what&#8217;s happening to us, right now, in real time.</p><p>I don&#8217;t have a solution. I&#8217;m still using Claude. I&#8217;m still paying for it. I fixed grammar/readability errors for this article <em>with</em> it.</p><p>Maybe that&#8217;s the point.</p><div><hr></div><h2>References</h2><ol><li><p>&#8220;Vibe coding has turned senior devs into &#8216;AI babysitters,&#8217; but they say it&#8217;s worth it&#8221; - TechCrunch, September 14, 2025 https://techcrunch.com/2025/09/14/vibe-coding-has-turned-senior-devs-into-ai-babysitters-but-they-say-its-worth-it/</p></li><li><p>&#8220;Your Brain on ChatGPT: Accumulation of Cognitive Debt when Using an AI Assistant for Essay Writing Task&#8221; - arXiv https://arxiv.org/pdf/2506.08872 </p></li><li><p>AI Workers Are Putting In 100-Hour Workweeks to Win the New Tech Arms Race https://www.wsj.com/tech/ai/ai-race-tech-workers-schedule-1ea9a116  October 22, 2025 </p></li><li><p><a href="http://Technological Optimism and Appropriate Fear"> </a><strong><a href="http://Technological Optimism and Appropriate Fear">Technological Optimism and Appropriate Fear</a> , Oct 13, 2025 by </strong><span class="mention-wrap" data-attrs="{&quot;name&quot;:&quot;Jack Clark&quot;,&quot;id&quot;:44606,&quot;type&quot;:&quot;user&quot;,&quot;url&quot;:null,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/8cc1c9c9-fc87-4eeb-ad15-7dc989b77553_528x504.png&quot;,&quot;uuid&quot;:&quot;cc1d6b5d-75b0-4353-83f3-a834599d5e3e&quot;}" data-component-name="MentionToDOM"></span> </p></li></ol><p></p><p></p>]]></content:encoded></item><item><title><![CDATA[Building with Agent Framework - Day 1: Block, Transform, and Control Agent Behavior with Middleware]]></title><description><![CDATA[#47 | Add reusable validation, security, and logging without touching your core agent logic]]></description><link>https://newsletter.victordibia.com/p/building-with-agent-framework-day</link><guid isPermaLink="false">https://newsletter.victordibia.com/p/building-with-agent-framework-day</guid><dc:creator><![CDATA[Victor Dibia, PhD]]></dc:creator><pubDate>Mon, 06 Oct 2025 14:31:25 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!I_Hw!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07b38f56-af73-4ffd-875c-dfab2ecd8130_1235x818.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!I_Hw!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07b38f56-af73-4ffd-875c-dfab2ecd8130_1235x818.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!I_Hw!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07b38f56-af73-4ffd-875c-dfab2ecd8130_1235x818.png 424w, https://substackcdn.com/image/fetch/$s_!I_Hw!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07b38f56-af73-4ffd-875c-dfab2ecd8130_1235x818.png 848w, https://substackcdn.com/image/fetch/$s_!I_Hw!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07b38f56-af73-4ffd-875c-dfab2ecd8130_1235x818.png 1272w, https://substackcdn.com/image/fetch/$s_!I_Hw!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07b38f56-af73-4ffd-875c-dfab2ecd8130_1235x818.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!I_Hw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07b38f56-af73-4ffd-875c-dfab2ecd8130_1235x818.png" width="1235" height="818" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/07b38f56-af73-4ffd-875c-dfab2ecd8130_1235x818.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:818,&quot;width&quot;:1235,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:214166,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/175217307?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07b38f56-af73-4ffd-875c-dfab2ecd8130_1235x818.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!I_Hw!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07b38f56-af73-4ffd-875c-dfab2ecd8130_1235x818.png 424w, https://substackcdn.com/image/fetch/$s_!I_Hw!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07b38f56-af73-4ffd-875c-dfab2ecd8130_1235x818.png 848w, https://substackcdn.com/image/fetch/$s_!I_Hw!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07b38f56-af73-4ffd-875c-dfab2ecd8130_1235x818.png 1272w, https://substackcdn.com/image/fetch/$s_!I_Hw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07b38f56-af73-4ffd-875c-dfab2ecd8130_1235x818.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p></p><p>I <a href="https://newsletter.victordibia.com/">mentioned earlier</a> that I&#8217;d build a demo-a-day for the next week after the launch of the <a href="https://newsletter.victordibia.com/">Microsoft Agent Framework</a> - because frameworks are a dime a dozen, but what matters is what <em><strong>you can build with them</strong></em>.  </p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;aa979e2a-0ecc-427b-b7af-f73c1b444c79&quot;,&quot;caption&quot;:&quot;If Semantic Kernel and AutoGen joined forces &#8230;&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Introducing Microsoft Agent Framework (Semantic Kernel + AutoGen)&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:85692678,&quot;name&quot;:&quot;Victor Dibia, PhD&quot;,&quot;bio&quot;:&quot;Hacker, Research Scientist (Microsoft Research), Author working on and writing about Generative AI, Agents. Core maintainer for the AutoGen Multi-Agent Framework (35k Stars on GitHub). Previously at Cloudera, IBM Research. All views are my own.&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/27536f63-7d8e-48dc-b34c-150acfacdc8b_1726x1396.jpeg&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-10-01T15:49:20.928Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!JXxC!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b796f76-8472-4752-bf9c-9d30bf852e51_1867x1301.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://newsletter.victordibia.com/p/microsoft-agent-framework-semantic&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:174449237,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:9,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1253044,&quot;publication_name&quot;:&quot;Designing with AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1FgP!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fff160652-8bbb-475e-80a7-ba4eb5f80dcb_504x504.png&quot;,&quot;belowTheFold&quot;:false,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><blockquote><p><strong>A demo a day keeps the FOMO at bay.</strong></p></blockquote><p>In the first demo today, I&#8217;ll explore a feature in Agent Framework that I think is extremely useful: <strong>Middleware</strong>. You can think of middleware as a way to intercept and modify the behavior of your agent at various stages of its operation. If you have built web applications - you have probably seen similar <strong><a href="https://github.com/readme/guides/middleware-for-web-applications">middleware patterns</a></strong> where the idea is to build reusable components (e.g., auth, logging, etc) that can intercept, modify or reject the requests that come to your server.</p><p>We&#8217;ll start with a simple agent with a tool that can tell the weather. Only in this case, there&#8217;s a super special location - one that no one must ever speak of - <strong>Atlantis</strong>. Other locations are fair game. Our agent must never speak of Atlantis and must warn users accordingly.</p><p>While this demo scenario is whimsical, later in this post I&#8217;ll show how the same patterns apply to critical real-world scenarios like blocking PII data, enforcing rate limits, and implementing security controls.</p><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://newsletter.victordibia.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Designing with AI is a reader-supported publication. To receive new posts and support my work, consider becoming a free or paid subscriber.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div><div><hr></div><h2>Building the Weather Agent</h2><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!lkb2!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcb1de445-b4cc-4e24-819c-b2a9ee5a162d_1278x854.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!lkb2!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcb1de445-b4cc-4e24-819c-b2a9ee5a162d_1278x854.png 424w, https://substackcdn.com/image/fetch/$s_!lkb2!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcb1de445-b4cc-4e24-819c-b2a9ee5a162d_1278x854.png 848w, https://substackcdn.com/image/fetch/$s_!lkb2!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcb1de445-b4cc-4e24-819c-b2a9ee5a162d_1278x854.png 1272w, https://substackcdn.com/image/fetch/$s_!lkb2!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcb1de445-b4cc-4e24-819c-b2a9ee5a162d_1278x854.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!lkb2!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcb1de445-b4cc-4e24-819c-b2a9ee5a162d_1278x854.png" width="1278" height="854" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/cb1de445-b4cc-4e24-819c-b2a9ee5a162d_1278x854.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:854,&quot;width&quot;:1278,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:156894,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/175217307?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcb1de445-b4cc-4e24-819c-b2a9ee5a162d_1278x854.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!lkb2!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcb1de445-b4cc-4e24-819c-b2a9ee5a162d_1278x854.png 424w, https://substackcdn.com/image/fetch/$s_!lkb2!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcb1de445-b4cc-4e24-819c-b2a9ee5a162d_1278x854.png 848w, https://substackcdn.com/image/fetch/$s_!lkb2!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcb1de445-b4cc-4e24-819c-b2a9ee5a162d_1278x854.png 1272w, https://substackcdn.com/image/fetch/$s_!lkb2!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcb1de445-b4cc-4e24-819c-b2a9ee5a162d_1278x854.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">A basic agent with a tool.</figcaption></figure></div><p>Let&#8217;s start with a basic weather agent that has a simple function to get the current weather:</p><pre><code><code>from typing import Annotated
from agent_framework import ChatAgent
from agent_framework.azure import AzureOpenAIChatClient
import os

def get_weather(
    location: Annotated[str, &#8220;The location to get the weather for.&#8221;],
) -&gt; str:
    &#8220;&#8221;&#8220;Get the weather for a given location.&#8221;&#8220;&#8221;
    conditions = [&#8221;sunny&#8221;, &#8220;cloudy&#8221;, &#8220;rainy&#8221;, &#8220;stormy&#8221;]
    temperature = 53
    return f&#8221;The weather in {location} is {conditions[0]} with a high of {temperature}&#176;C.&#8221;

# Create the agent
agent = ChatAgent(
    name=&#8221;WeatherAgent&#8221;,
    description=&#8221;A helpful agent that provides weather information&#8221;,
    instructions=&#8221;You are a weather assistant. Provide current weather information for any location.&#8221;,
    chat_client=AzureOpenAIChatClient(
        api_key=os.environ.get(&#8221;AZURE_OPENAI_API_KEY&#8221;, &#8220;&#8221;),
    ),
    tools=[get_weather],
)
</code></code></pre><p>This agent can answer questions like &#8220;What&#8217;s the weather in Paris?&#8221; The agent will call the <code>get_weather</code> function and return the results.</p><h2>Adding Middleware to Block Forbidden Locations</h2><p>Now comes the interesting part. What if we want to prevent anyone from asking about the weather in Atlantis? We could add checks inside the <code>get_weather</code> function, but that would mix business logic with validation logic. Instead, let&#8217;s use <strong>function middleware</strong> to intercept the function call before it executes:</p><pre><code><code>from agent_framework import (
    FunctionInvocationContext,
    function_middleware,
)
from collections.abc import Awaitable, Callable

@function_middleware
async def atlantis_location_filter_middleware(
    context: FunctionInvocationContext,
    next: Callable[[FunctionInvocationContext], Awaitable[None]],
) -&gt; None:
    &#8220;&#8221;&#8220;Function middleware that blocks weather requests for Atlantis.&#8221;&#8220;&#8221;
    # Check if location parameter is &#8220;atlantis&#8221;
    location = getattr(context.arguments, &#8220;location&#8221;, None)
    if location and location.lower() == &#8220;atlantis&#8221;:
        context.result = (
            &#8220;Blocked! Hold up right there!! Tell the user that &#8220;
            &#8220;&#8217;Atlantis is a special place, we must never ask about the weather there!!&#8217;&#8221;
        )
        context.terminate = True
        return

    await next(context)
</code></code></pre><p>Now add the middleware to the agent:</p><pre><code><code>agent = ChatAgent(
    name=&#8221;WeatherAgent&#8221;,
    description=&#8221;A helpful agent that provides weather information&#8221;,
    instructions=&#8221;You are a weather assistant. Provide current weather information for any location.&#8221;,
    chat_client=AzureOpenAIChatClient(
        api_key=os.environ.get(&#8221;AZURE_OPENAI_API_KEY&#8221;, &#8220;&#8221;),
    ),
    tools=[get_weather],
    middleware=[atlantis_location_filter_middleware],  # Add middleware here
)
</code></code></pre><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!8_mV!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde0de1d9-36fd-490e-ae9e-a8665a709ae6_1278x854.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!8_mV!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde0de1d9-36fd-490e-ae9e-a8665a709ae6_1278x854.png 424w, https://substackcdn.com/image/fetch/$s_!8_mV!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde0de1d9-36fd-490e-ae9e-a8665a709ae6_1278x854.png 848w, https://substackcdn.com/image/fetch/$s_!8_mV!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde0de1d9-36fd-490e-ae9e-a8665a709ae6_1278x854.png 1272w, https://substackcdn.com/image/fetch/$s_!8_mV!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde0de1d9-36fd-490e-ae9e-a8665a709ae6_1278x854.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!8_mV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde0de1d9-36fd-490e-ae9e-a8665a709ae6_1278x854.png" width="1278" height="854" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/de0de1d9-36fd-490e-ae9e-a8665a709ae6_1278x854.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:854,&quot;width&quot;:1278,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:219469,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/175217307?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde0de1d9-36fd-490e-ae9e-a8665a709ae6_1278x854.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!8_mV!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde0de1d9-36fd-490e-ae9e-a8665a709ae6_1278x854.png 424w, https://substackcdn.com/image/fetch/$s_!8_mV!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde0de1d9-36fd-490e-ae9e-a8665a709ae6_1278x854.png 848w, https://substackcdn.com/image/fetch/$s_!8_mV!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde0de1d9-36fd-490e-ae9e-a8665a709ae6_1278x854.png 1272w, https://substackcdn.com/image/fetch/$s_!8_mV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fde0de1d9-36fd-490e-ae9e-a8665a709ae6_1278x854.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">We use a middleware to detect the location argument for the weather tool and replace the result based on some condition.</figcaption></figure></div><p><strong>What&#8217;s happening here?</strong></p><ol><li><p>The <code>@function_middleware</code> decorator marks this as function-level middleware</p></li><li><p>The middleware inspects <code>context.arguments</code> to check the location parameter</p></li><li><p>If it&#8217;s &#8220;Atlantis&#8221;, it sets <code>context.result</code> to override the function&#8217;s response</p></li><li><p>Setting <code>context.terminate = True</code> stops the pipeline - the actual function never executes</p></li><li><p>If it&#8217;s not Atlantis, <code>await next(context)</code> continues to the actual function</p></li></ol><p>When someone asks &#8220;What&#8217;s the weather in Atlantis?&#8221;, they&#8217;ll get our special blocked message instead!</p><h2>Bonus: Adding Security with Chat Middleware</h2><p>Function middleware is great for intercepting tool calls, but what if we want to block requests before they even reach the LLM? That&#8217;s where <strong>chat middleware</strong> comes in:</p><pre><code><code>from agent_framework import (
    ChatContext,
    ChatMessage,
    ChatResponse,
    Role,
    chat_middleware,
)

@chat_middleware
async def security_filter_middleware(
    context: ChatContext,
    next: Callable[[ChatContext], Awaitable[None]],
) -&gt; None:
    &#8220;&#8221;&#8220;Chat middleware that blocks requests containing sensitive information.&#8221;&#8220;&#8221;
    blocked_terms = [&#8221;password&#8221;, &#8220;secret&#8221;, &#8220;api_key&#8221;, &#8220;token&#8221;]

    for message in context.messages:
        if message.text:
            message_lower = message.text.lower()
            for term in blocked_terms:
                if term in message_lower:
                    # Override the response without calling the LLM
                    context.result = ChatResponse(
                        messages=[
                            ChatMessage(
                                role=Role.ASSISTANT,
                                text=(
                                    &#8220;I cannot process requests containing sensitive information. &#8220;
                                    &#8220;Please rephrase your question without including passwords, secrets, &#8220;
                                    &#8220;or other sensitive data.&#8221;
                                ),
                            )
                        ]
                    )
                    return

    await next(context)
</code></code></pre><p>Add it to the agent&#8217;s middleware list:</p><pre><code><code>agent = ChatAgent(
    # ... other configuration ...
    middleware=[security_filter_middleware, atlantis_location_filter_middleware],
)
</code></code></pre><p>This middleware runs <strong>before</strong> the LLM is called, saving you API costs by blocking inappropriate requests early! </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!B4zs!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6061b8f3-2d27-42c2-a479-db226e39cc01_1278x854.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!B4zs!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6061b8f3-2d27-42c2-a479-db226e39cc01_1278x854.png 424w, https://substackcdn.com/image/fetch/$s_!B4zs!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6061b8f3-2d27-42c2-a479-db226e39cc01_1278x854.png 848w, https://substackcdn.com/image/fetch/$s_!B4zs!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6061b8f3-2d27-42c2-a479-db226e39cc01_1278x854.png 1272w, https://substackcdn.com/image/fetch/$s_!B4zs!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6061b8f3-2d27-42c2-a479-db226e39cc01_1278x854.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!B4zs!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6061b8f3-2d27-42c2-a479-db226e39cc01_1278x854.png" width="1278" height="854" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/6061b8f3-2d27-42c2-a479-db226e39cc01_1278x854.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:854,&quot;width&quot;:1278,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:189894,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/175217307?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6061b8f3-2d27-42c2-a479-db226e39cc01_1278x854.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!B4zs!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6061b8f3-2d27-42c2-a479-db226e39cc01_1278x854.png 424w, https://substackcdn.com/image/fetch/$s_!B4zs!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6061b8f3-2d27-42c2-a479-db226e39cc01_1278x854.png 848w, https://substackcdn.com/image/fetch/$s_!B4zs!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6061b8f3-2d27-42c2-a479-db226e39cc01_1278x854.png 1272w, https://substackcdn.com/image/fetch/$s_!B4zs!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6061b8f3-2d27-42c2-a479-db226e39cc01_1278x854.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">With middleware we can detect and block sensitive content in agent messages.</figcaption></figure></div><p></p><h2>Three Types of Middleware in Agent Framework</h2><p>The framework supports three types of middleware, each intercepting at different stages:</p><h3>1. Agent Middleware</h3><p>Intercepts before and after the entire agent execution. Perfect for:</p><ul><li><p>Logging agent invocations</p></li><li><p>Retry logic</p></li><li><p>Performance monitoring</p></li><li><p>Authentication/authorization</p></li></ul><pre><code><code>from agent_framework import AgentRunContext, agent_middleware

@agent_middleware
async def logging_middleware(
    context: AgentRunContext,
    next: Callable[[AgentRunContext], Awaitable[None]],
) -&gt; None:
    print(f&#8221;[Agent] Starting: {context.agent.name}&#8221;)
    await next(context)
    print(f&#8221;[Agent] Completed: {context.result}&#8221;)
</code></code></pre><h3>2. Function Middleware</h3><p>Intercepts tool/function calls. Perfect for:</p><ul><li><p>Input validation</p></li><li><p>Caching results</p></li><li><p>Access control (like our Atlantis example!)</p></li><li><p>Argument transformation</p></li></ul><pre><code><code>from agent_framework import FunctionInvocationContext, function_middleware

@function_middleware
async def validation_middleware(
    context: FunctionInvocationContext,
    next: Callable[[FunctionInvocationContext], Awaitable[None]],
) -&gt; None:
    print(f&#8221;[Function] Calling: {context.function.name}&#8221;)
    await next(context)
    print(f&#8221;[Function] Result: {context.result}&#8221;)
</code></code></pre><h3>3. Chat Middleware</h3><p>Intercepts LLM requests. Perfect for:</p><ul><li><p>Content filtering</p></li><li><p>Prompt injection prevention</p></li><li><p>Token counting</p></li><li><p>Message modification</p></li></ul><pre><code><code>from agent_framework import ChatContext, chat_middleware

@chat_middleware
async def token_counter_middleware(
    context: ChatContext,
    next: Callable[[ChatContext], Awaitable[None]],
) -&gt; None:
    context.metadata[&#8221;input_messages&#8221;] = len(context.messages)
    await next(context)
    # Access response and count tokens
</code></code></pre><h2>Implementation Styles</h2><p>Agent Framework gives you flexibility in how you write middleware:</p><h3>Style 1: Function-based with Decorators (Recommended)</h3><p>Simple and clean - no type annotations needed:</p><pre><code><code>@function_middleware
async def simple_middleware(context, next):
    await next(context)
</code></code></pre><h3>Style 2: Function-based with Type Annotations</h3><p>The framework detects middleware type from parameter types:</p><pre><code><code>async def typed_middleware(
    context: FunctionInvocationContext,
    next: Callable[[FunctionInvocationContext], Awaitable[None]],
) -&gt; None:
    await next(context)
</code></code></pre><h3>Style 3: Class-based</h3><p>For stateful middleware or complex logic:</p><pre><code><code>from agent_framework import FunctionMiddleware

class CachingMiddleware(FunctionMiddleware):
    def __init__(self):
        self.cache = {}

    async def process(self, context: FunctionInvocationContext, next):
        cache_key = f&#8221;{context.function.name}:{context.arguments}&#8221;

        if cache_key in self.cache:
            context.result = self.cache[cache_key]
            context.terminate = True
            return

        await next(context)

        if context.result:
            self.cache[cache_key] = context.result
</code></code></pre><h2>Key Middleware Patterns</h2><p>Here are the essential patterns you&#8217;ll use when building middleware:</p><h3>Pattern 1: Terminate Execution</h3><p>Stop the pipeline without executing the underlying function/agent:</p><pre><code><code>@function_middleware
async def rate_limit_middleware(context, next):
    if is_rate_limited(context.function.name):
        context.result = &#8220;Rate limit exceeded. Please try again later.&#8221;
        context.terminate = True  # Stop here, don&#8217;t execute the function
        return

    await next(context)
</code></code></pre><h3>Pattern 2: Override Results</h3><p>Execute normally but replace the result:</p><pre><code><code>@function_middleware
async def cache_middleware(context, next):
    cache_key = f&#8221;{context.function.name}:{context.arguments}&#8221;

    # Check cache first
    if cache_key in cache:
        context.result = cache[cache_key]
        context.terminate = True
        return

    # Execute and cache result
    await next(context)
    cache[cache_key] = context.result
</code></code></pre><h3>Pattern 3: Share Data Between Middleware</h3><p>Use <code>context.metadata</code> to pass information through the pipeline:</p><pre><code><code>@agent_middleware
async def timer_middleware(context, next):
    import time
    context.metadata[&#8221;start_time&#8221;] = time.time()
    await next(context)
    duration = time.time() - context.metadata[&#8221;start_time&#8221;]
    print(f&#8221;Execution took {duration:.2f}s&#8221;)
</code></code></pre><h3>Pattern 4: Transform Before and After</h3><p>Modify inputs before execution and outputs after:</p><pre><code><code>@function_middleware
async def sanitize_middleware(context, next):
    # Transform input
    location = getattr(context.arguments, &#8220;location&#8221;, None)
    if location:
        context.arguments.location = location.strip().title()

    # Execute
    await next(context)

    # Transform output
    if context.result:
        context.result = context.result.upper()
</code></code></pre><h2>Wrapping Up</h2><p>Middleware in Agent Framework provides clean separation of concerns - your business logic stays in your functions while cross-cutting concerns like validation, logging, and security live in reusable middleware.</p><p>The Atlantis example may be whimsical, but the pattern is powerful for real-world scenarios:</p><ul><li><p>Blocking PII from being sent to LLMs</p></li><li><p>Enforcing rate limits on expensive tool calls</p></li><li><p>Adding audit trails for compliance</p></li><li><p>Implementing caching layers for performance</p></li></ul><p><strong>Want to learn more?</strong> Check out the complete examples in the Agent Framework repository:</p><ul><li><p><a href="https://github.com/microsoft/agent-framework/tree/main/python/samples/getting_started/middleware">Getting Started with Middleware</a></p></li><li><p><a href="https://github.com/microsoft/agent-framework/blob/main/python/samples/getting_started/middleware/decorator_middleware.py">Decorator-based Middleware</a></p></li><li><p><a href="https://github.com/microsoft/agent-framework/blob/main/python/samples/getting_started/middleware/class_based_middleware.py">Class-based Middleware</a></p></li><li><p><a href="https://github.com/microsoft/agent-framework/blob/main/python/samples/getting_started/middleware/exception_handling_with_middleware.py">Exception Handling with Middleware</a></p></li><li><p><a href="https://github.com/microsoft/agent-framework/blob/main/python/samples/getting_started/middleware/override_result_with_middleware.py">Override Results with Middleware</a></p></li></ul><h2>What&#8217;s Next?</h2><p>Day 2? What if we try to build an agent that helps you migrate your existing agent code from AutoGen or SK to Agent Framework? </p><div><hr></div><p>P.S - I wrote a book! </p><p>I wrote a book - <a href="https://multiagentbook.com/">Designing Multi-Agent Systems</a> - which is in early access, with the full version ready on <strong>Nov 10</strong>. If this type of content is interesting to you, you might find the book useful. The book takes a from scratch approach - for example Chapter 4 - <strong>Building An Agent from Scratch</strong> covers the core concepts of middleware, how they are built and what they accomplish.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!TWnw!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4d9e0f0d-e931-4a24-b4fc-275c8cfa7127_2540x1742.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!TWnw!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4d9e0f0d-e931-4a24-b4fc-275c8cfa7127_2540x1742.png 424w, https://substackcdn.com/image/fetch/$s_!TWnw!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4d9e0f0d-e931-4a24-b4fc-275c8cfa7127_2540x1742.png 848w, https://substackcdn.com/image/fetch/$s_!TWnw!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4d9e0f0d-e931-4a24-b4fc-275c8cfa7127_2540x1742.png 1272w, https://substackcdn.com/image/fetch/$s_!TWnw!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4d9e0f0d-e931-4a24-b4fc-275c8cfa7127_2540x1742.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!TWnw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4d9e0f0d-e931-4a24-b4fc-275c8cfa7127_2540x1742.png" width="1456" height="999" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4d9e0f0d-e931-4a24-b4fc-275c8cfa7127_2540x1742.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:999,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1034415,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/175217307?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4d9e0f0d-e931-4a24-b4fc-275c8cfa7127_2540x1742.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!TWnw!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4d9e0f0d-e931-4a24-b4fc-275c8cfa7127_2540x1742.png 424w, https://substackcdn.com/image/fetch/$s_!TWnw!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4d9e0f0d-e931-4a24-b4fc-275c8cfa7127_2540x1742.png 848w, https://substackcdn.com/image/fetch/$s_!TWnw!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4d9e0f0d-e931-4a24-b4fc-275c8cfa7127_2540x1742.png 1272w, https://substackcdn.com/image/fetch/$s_!TWnw!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4d9e0f0d-e931-4a24-b4fc-275c8cfa7127_2540x1742.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">I wrote a <a href="https://buy.multiagentbook.com/">book</a>!</figcaption></figure></div><blockquote><p> You can look at a <a href="https://multiagentbook.com/preview.pdf">preview</a> of the book before you buy. </p><ul><li><p>Preview: <a href="https://multiagentbook.com/preview.pdf">https://multiagentbook.com/preview.pdf </a></p></li><li><p>Early Access: <a href="https://buy.multiagentbook.com/">https://buy.multiagentbook.com/</a></p></li></ul></blockquote>]]></content:encoded></item><item><title><![CDATA[Introducing Microsoft Agent Framework (Semantic Kernel + AutoGen)]]></title><description><![CDATA[#46 | If Semantic Kernel and AutoGen joined forces &#8230;]]></description><link>https://newsletter.victordibia.com/p/microsoft-agent-framework-semantic</link><guid isPermaLink="false">https://newsletter.victordibia.com/p/microsoft-agent-framework-semantic</guid><dc:creator><![CDATA[Victor Dibia, PhD]]></dc:creator><pubDate>Wed, 01 Oct 2025 15:49:20 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!JXxC!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b796f76-8472-4752-bf9c-9d30bf852e51_1867x1301.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<blockquote><p>If Semantic Kernel and AutoGen joined forces &#8230;</p></blockquote><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!JXxC!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b796f76-8472-4752-bf9c-9d30bf852e51_1867x1301.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!JXxC!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b796f76-8472-4752-bf9c-9d30bf852e51_1867x1301.png 424w, https://substackcdn.com/image/fetch/$s_!JXxC!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b796f76-8472-4752-bf9c-9d30bf852e51_1867x1301.png 848w, https://substackcdn.com/image/fetch/$s_!JXxC!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b796f76-8472-4752-bf9c-9d30bf852e51_1867x1301.png 1272w, https://substackcdn.com/image/fetch/$s_!JXxC!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b796f76-8472-4752-bf9c-9d30bf852e51_1867x1301.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!JXxC!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b796f76-8472-4752-bf9c-9d30bf852e51_1867x1301.png" width="1456" height="1015" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/5b796f76-8472-4752-bf9c-9d30bf852e51_1867x1301.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1015,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:3609916,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/174449237?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b796f76-8472-4752-bf9c-9d30bf852e51_1867x1301.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!JXxC!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b796f76-8472-4752-bf9c-9d30bf852e51_1867x1301.png 424w, https://substackcdn.com/image/fetch/$s_!JXxC!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b796f76-8472-4752-bf9c-9d30bf852e51_1867x1301.png 848w, https://substackcdn.com/image/fetch/$s_!JXxC!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b796f76-8472-4752-bf9c-9d30bf852e51_1867x1301.png 1272w, https://substackcdn.com/image/fetch/$s_!JXxC!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5b796f76-8472-4752-bf9c-9d30bf852e51_1867x1301.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>If you&#8217;ve been building agents with Semantic Kernel or AutoGen (both OSS MIT licensed agent frameworks from Microsoft), we have some important updates that you might find interesting!</p><p>Over the last two years, both frameworks have been impactful in the agent space. Semantic Kernel brought enterprise-grade features and strong C# support, while AutoGen pioneered multi-agent orchestration and developer experience with tools like AutoGen Studio. Over the past year, both teams collaborated on a shared vision, and that work has culminated in the <strong><a href="https://github.com/microsoft/agent-framework">Agent Framework</a>, announced today in public preview! </strong>See <a href="https://learn.microsoft.com/en-us/agent-framework/migration-guide/from-autogen/">migration guide from AutoGen</a> and <a href="https://learn.microsoft.com/en-us/agent-framework/migration-guide/from-semantic-kernel/?pivots=programming-language-csharp">Semantic Kernel</a>.<strong>  </strong></p><p></p><blockquote><p>Think of Agent Framework not as yet another new framework (it&#8217;s not) but <strong>as a major version rev for both SK and AutoGen</strong> </p></blockquote><p></p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;3731fd4a-97e0-42e3-b64c-918776734b62&quot;,&quot;caption&quot;:&quot;In a world crowded with dozens of multi-agent frameworks, a critical question for engineers, consultants or teams looking to build agentic AI applications is:&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;10 Ways to Critically Evaluate and Select the Right Multi-Agent Framework&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:85692678,&quot;name&quot;:&quot;Victor Dibia, PhD&quot;,&quot;bio&quot;:&quot;Hacker, Research Scientist (Microsoft Research), Author working on and writing about Generative AI, Agents. Core maintainer for the AutoGen Multi-Agent Framework (35k Stars on GitHub). Previously at Cloudera, IBM Research. All views are my own.&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/27536f63-7d8e-48dc-b34c-150acfacdc8b_1726x1396.jpeg&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-04-14T14:31:43.839Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!O4in!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F26eab399-8733-4254-bfe6-f78c085ff114_2852x1745.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://newsletter.victordibia.com/p/10-ways-to-compare-and-select-the&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:160901456,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:22,&quot;comment_count&quot;:0,&quot;publication_id&quot;:1253044,&quot;publication_name&quot;:&quot;Designing with AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1FgP!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fff160652-8bbb-475e-80a7-ba4eb5f80dcb_504x504.png&quot;,&quot;belowTheFold&quot;:false,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><p>I know learning another framework isn&#8217;t exactly satisfying. But here&#8217;s the thing: if you&#8217;ve used recent versions of Semantic Kernel or AutoGen, the <a href="https://github.com/microsoft/agent-framework">Agent Framework</a> will feel remarkably familiar. And hopefully <em>better</em>. The API design intentionally builds on what worked in both frameworks.</p><p>Here&#8217;s the same agent defined across all three:</p><pre><code># AutoGen
agent = AssistantAgent(name=&#8221;assistant&#8221;, model_client=client, tools=[my_tool])

# Semantic Kernel  
agent = ChatCompletionAgent(
    service=AzureChatCompletion(),
    name=&#8221;assistant&#8221;,
    instructions=&#8221;You are a helpful assistant.&#8221;,
    plugins=[MenuPlugin()]
)

# Agent Framework
agent = ChatAgent(name=&#8221;assistant&#8221;, chat_client=client, tools=[my_tool])</code></pre><p>Agent Framework is designed to <em><strong>help you build</strong></em>. Naturally, there will be questions - and this post tries to help by covering the following: </p><ul><li><p>A summary of key features.  </p><ul><li><p>Async first API </p></li><li><p>High level abstractions for Agents and low level Workflows API </p></li><li><p>Middleware Support </p></li><li><p>Memory / Context provider support </p></li><li><p>DevUI </p></li></ul></li><li><p>A set of FAQs </p><p></p></li></ul><div class="native-video-embed" data-component-name="VideoPlaceholder" data-attrs="{&quot;mediaUploadId&quot;:&quot;01d21205-2013-44b0-b248-409aaf454cbe&quot;,&quot;duration&quot;:null}"></div><p>Video above is sample app (DevUI) shipped with Agent Framework.</p><h2>Key Features  </h2><p>Depending on the framework you are arriving from, you&#8217;d find that <strong>agent framework</strong> is  familiar and covers the critical areas you will need to build.  Agent Framework provides two high level entities.</p><ul><li><p><strong><a href="https://github.com/microsoft/agent-framework/tree/main/python/samples/getting_started/agents">Agents</a></strong> - abstractions that can take a model, tools, memory.</p></li><li><p><strong><a href="https://github.com/microsoft/agent-framework/tree/main/python/samples/getting_started/workflows">Workflows</a></strong> - an abstraction to express arbitrary business or application logic as computation graphs. Excellent for scenarios where low level control and deterministic steps are needed.</p></li></ul><div id="youtube2-jyIepE19_0M" class="youtube-wrap" data-attrs="{&quot;videoId&quot;:&quot;jyIepE19_0M&quot;,&quot;startTime&quot;:null,&quot;endTime&quot;:null}" data-component-name="Youtube2ToDOM"><div class="youtube-inner"><iframe src="https://www.youtube-nocookie.com/embed/jyIepE19_0M?rel=0&amp;autoplay=0&amp;showinfo=0&amp;enablejsapi=0" frameborder="0" loading="lazy" gesture="media" allow="autoplay; fullscreen" allowautoplay="true" allowfullscreen="true" width="728" height="409"></iframe></div></div><h2>Async First, Streaming Support</h2><p>Both agents and workflows support async execution and streaming:</p><pre><code><code># Agent streaming
async for chunk in agent.run_stream(&#8221;Hello&#8221;):
    if chunk.text:
        print(chunk.text, end=&#8221;&#8220;, flush=True)

# Workflow streaming
async for event in workflow.run_stream(&#8221;Input&#8221;):
    if isinstance(event, WorkflowOutputEvent):
        print(event.data)</code></code></pre><h3>Production Ready Agent Abstraction</h3><p><code>Agent Framework</code> supports a wide array of production ready <a href="https://github.com/microsoft/agent-framework/tree/main/python/samples/getting_started/agents"> agent abstractions</a> with the ChatAgent as the baseline. Agents backed by <a href="https://github.com/microsoft/agent-framework/tree/main/python/samples/getting_started/agents/azure_ai">Azure AI  Agent Service</a>, <a href="https://github.com/microsoft/agent-framework/tree/main/python/samples/getting_started/agents/openai">OpenAI ChatAgents</a>, <a href="https://github.com/microsoft/agent-framework/blob/main/python/samples/getting_started/agents/openai/openai_responses_client_basic.py">OpenAI Responses API agents</a>, <a href="https://github.com/microsoft/agent-framework/tree/main/python/samples/getting_started/agents/a2a">Google A2A  wrapped Agents</a>, <a href="https://github.com/microsoft/agent-framework/tree/main/python/samples/getting_started/agents/copilotstudio">Copilot Studio agents</a> etc.</p><pre><code><code>from agent_framework import ChatAgent, ai_function

@ai_function
def get_weather(location: str) -&gt; str:
    &#8220;&#8221;&#8220;Get weather for a location.&#8221;&#8220;&#8221;
    return f&#8221;Weather in {location}: sunny&#8221;

agent = ChatAgent(
    name=&#8221;assistant&#8221;,
    chat_client=client,
    instructions=&#8221;You are a helpful assistant.&#8221;,
    tools=[get_weather]
)

result = await agent.run(&#8221;What&#8217;s the weather in Tokyo?&#8221;)</code></code></pre><p>Funfact: Tools can be added at runtime:</p><pre><code><code>result = await agent.run(
    &#8220;What&#8217;s the weather?&#8221;,
    tools=[additional_tool],
    tool_choice=&#8221;auto&#8221;
)</code></code></pre><h3>Workflows - Low Level Control with Human in the Loop and Checkpointing</h3><p><a href="https://github.com/microsoft/agent-framework/tree/main/python/samples/getting_started/workflows">Workflows</a>  in Agent Framework provide a graph-based model where executors (agents, functions, or sub-workflows) are connected by typed edges. Messages route through specific edges rather than broadcasting. This offers you the low level control that has been in wide demand in AutoGen (Graphflow) and also LangGraph.</p><pre><code><code>@executor(id=&#8221;step1&#8221;)
async def first_step(input: str, ctx: WorkflowContext[str]) -&gt; None:
    result = await agent.run(input)
    await ctx.send_message(result.text)

@executor(id=&#8221;step2&#8221;)
async def second_step(data: str, ctx: WorkflowContext[Never, str]) -&gt; None:
    await ctx.yield_output(data)

workflow = (WorkflowBuilder()
    .add_edge(first_step, second_step)
    .set_start_executor(first_step)
    .build())</code></code></pre><p><strong><a href="https://github.com/microsoft/agent-framework/tree/main/python/samples/getting_started/workflows/human-in-the-loop">Human-in-the-loop</a>:</strong> Workflows in Agent Framework can pause execution and wait for external input.  </p><pre><code><code>hitl_executor = RequestInfoExecutor(id=&#8221;approval&#8221;)

workflow = (WorkflowBuilder()
    .add_edge(agent_step, hitl_executor)
    .add_edge(hitl_executor, final_step)
    .build())

# Handle pause and resume
async for event in workflow.run_stream(&#8221;Input&#8221;):
    if isinstance(event, RequestInfoEvent):
        human_input = input(&#8221;Your response: &#8220;)
        async for resumed in workflow.send_responses_streaming(
            {event.request_id: human_input}
        ):
            print(resumed)</code></code></pre><p><strong><a href="https://github.com/microsoft/agent-framework/tree/main/python/samples/getting_started/workflows/checkpoint">Checkpointing</a>:</strong> Workflows can save state and resume from <a href="https://github.com/microsoft/agent-framework/tree/main/python/samples/getting_started/workflows/checkpoint">checkpoints</a>. This is an abstraction that was missing in AutoGen&#8217;s Team and Graphflow abstraction does not provide built-in checkpointing.</p><pre><code><code>from agent_framework import FileCheckpointStorage

checkpoint_storage = FileCheckpointStorage(storage_path=&#8221;./checkpoints&#8221;)

workflow = (WorkflowBuilder()
    .add_edge(step1, step2)
    .with_checkpointing(checkpoint_storage=checkpoint_storage)
    .build())

# Resume from checkpoint later
checkpoints = await checkpoint_storage.list_checkpoints()
async for event in workflow.run_stream_from_checkpoint(
    checkpoints[0].checkpoint_id,
    checkpoint_storage=checkpoint_storage
):
    print(event)</code></code></pre><p>High-level builders are available for common patterns:</p><pre><code><code>from agent_framework import SequentialBuilder, ConcurrentBuilder

# Sequential execution
workflow = SequentialBuilder().participants([agent1, agent2, agent3]).build()

# Parallel execution
workflow = ConcurrentBuilder().participants([agent1, agent2, agent3]).build()</code></code></pre><h3>Middleware Support for Security, RAI, Context Engineering Use Cases</h3><p>Agent Framework provides support for <a href="https://github.com/microsoft/agent-framework/tree/main/python/samples/getting_started/middleware">middleware</a> that can be added at agent definition or at run.  <a href="https://github.com/microsoft/agent-framework/tree/main/python/samples/getting_started/middleware">Middleware</a> be executed before/after the agent, function calls or calls to a model client. This foundation can be used to enable usecases such as   security scanning, responsible AI logging, telemetry etc.</p><pre><code><code>async def logging_middleware(context, next):
    print(f&#8221;Agent {context.agent.name} starting&#8221;)
    await next(context)
    print(f&#8221;Agent {context.agent.name} completed&#8221;)

async def security_middleware(context, next):
    if &#8220;password&#8221; in str(context.arguments):
        print(&#8221;Blocking sensitive data&#8221;)
        return  # Don&#8217;t call next()
    await next(context)

agent = ChatAgent(
    name=&#8221;secure_agent&#8221;,
    chat_client=client,
    middleware=[logging_middleware, security_middleware]
)</code></code></pre><h3>Memory / Context Provider Support</h3><p>Agent Framework provides the concept of an <a href="https://github.com/microsoft/agent-framework/tree/main/python/samples/getting_started/threads">AgentThread</a> to manage conversation state. Without a thread, each agent invocation is independent.</p><pre><code><code># Create thread for conversation context
thread = agent.get_new_thread()

result1 = await agent.run(&#8221;What&#8217;s 2+2?&#8221;, thread=thread)
print(result1.text)  # &#8220;4&#8221;

result2 = await agent.run(&#8221;What about that number times 10?&#8221;, thread=thread)
print(result2.text)  # &#8220;40&#8221; - understands context</code></code></pre><p><a href="https://github.com/microsoft/agent-framework/tree/main/python/samples/getting_started/threads">Threads</a> can use external storage e.g with <a href="https://github.com/microsoft/agent-framework/tree/main/python/packages/redis">redis</a> :</p><pre><code><code>from agent_framework_redis._provider import RedisProvider

provider = RedisProvider(..)

agent = ChatAgent(
    name=&#8221;secure_agent&#8221;,
    chat_client=client,
    context_providers=provider
)</code></code></pre><p>AgentThread is serializable - meaning you can dump them to persist state and reload them to resume!</p><h3>DevUI App</h3><p>Agent Framework <a href="https://github.com/microsoft/agent-framework/tree/main/python/packages/devui">DevUI</a> is a sample application for testing and debugging agents and workflows during development. It provides: </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!DCV8!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd09ffc9-999c-459c-be7e-7ca8c8a5f613_2940x1826.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!DCV8!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd09ffc9-999c-459c-be7e-7ca8c8a5f613_2940x1826.png 424w, https://substackcdn.com/image/fetch/$s_!DCV8!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd09ffc9-999c-459c-be7e-7ca8c8a5f613_2940x1826.png 848w, https://substackcdn.com/image/fetch/$s_!DCV8!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd09ffc9-999c-459c-be7e-7ca8c8a5f613_2940x1826.png 1272w, https://substackcdn.com/image/fetch/$s_!DCV8!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd09ffc9-999c-459c-be7e-7ca8c8a5f613_2940x1826.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!DCV8!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd09ffc9-999c-459c-be7e-7ca8c8a5f613_2940x1826.png" width="1456" height="904" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/cd09ffc9-999c-459c-be7e-7ca8c8a5f613_2940x1826.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:904,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:549681,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/174449237?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd09ffc9-999c-459c-be7e-7ca8c8a5f613_2940x1826.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!DCV8!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd09ffc9-999c-459c-be7e-7ca8c8a5f613_2940x1826.png 424w, https://substackcdn.com/image/fetch/$s_!DCV8!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd09ffc9-999c-459c-be7e-7ca8c8a5f613_2940x1826.png 848w, https://substackcdn.com/image/fetch/$s_!DCV8!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd09ffc9-999c-459c-be7e-7ca8c8a5f613_2940x1826.png 1272w, https://substackcdn.com/image/fetch/$s_!DCV8!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fcd09ffc9-999c-459c-be7e-7ca8c8a5f613_2940x1826.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><ul><li><p>Chat interface for interacting with agents</p></li><li><p>Thread management for conversation state</p></li><li><p>Event/trace viewer showing tool calls and execution flow</p></li><li><p>OpenAPI-compatible endpoints</p></li><li><p>Directory-based discovery of agents and workflows</p></li></ul><pre><code><code>from agent_framework.devui import serve

# Launch UI programmatically
serve(entities=[agent], auto_open=True)</code></code></pre><p>Or from CLI:</p><pre><code><code>devui ./agents --port 8080</code></code></pre><p>The UI includes an event panel that shows the sequence of operations (function calls, outputs, results) and supports OpenTelemetry tracing for observability. I am really proud of this part as I worked on it! <br></p><h3>Multi Language - Python and C#</h3><p>Even though most of the examples shown so far are python, Agent Framework provides strong support for all of these features across both <a href="https://github.com/microsoft/agent-framework/tree/main/python">Python</a> and <a href="https://github.com/microsoft/agent-framework/tree/main/dotnet">C# SDKs</a> share the same design patterns. </p><p></p><h2>FAQ </h2><p><strong>Why create yet another framework?</strong></p><p>Agent Framework consolidates learnings from both AutoGen and Semantic Kernel. Think of it as a major revision that brings the best ideas together. Yes, it&#8217;s disruptive, but it&#8217;s also a signal that the teams are committed to getting the foundation right for the long term.</p><p><strong>What happens to my existing code?</strong></p><p>Both AutoGen and Semantic Kernel remain open source and will receive critical fixes in the near term. However, new features and development effort will focus on Agent Framework. If you&#8217;re starting fresh, use Agent Framework. For existing projects, the migration guide provides clear paths forward.</p><p><strong>What about Azure integration?</strong></p><p>Agent Framework works seamlessly with Azure OpenAI, Azure AI, and the broader Azure ecosystem. Authentication, deployment, and enterprise features are first-class concerns.</p><p><strong>Is this production-ready?</strong></p><p>Agent Framework is designed to be <em><strong>production ready</strong></em>. It is currently in public preview. The core APIs are stabilizing as the team gathers feedback from early adopters. While you can start building with it now, expect some iteration as it moves toward general availability.  </p><p><strong>Where do I start?</strong></p><p>Check out the <a href="https://github.com/microsoft/agent-framework">official repository</a> for <a href="https://github.com/microsoft/agent-framework/tree/main/python">samples</a>, documentation, and the  <a href="https://learn.microsoft.com/en-us/agent-framework/migration-guide/from-autogen/">migration guide from AutoGen</a> and <a href="https://learn.microsoft.com/en-us/agent-framework/migration-guide/from-semantic-kernel/?pivots=programming-language-csharp">Semantic Kernel</a>. The samples cover everything from basic agents to complex orchestrated workflows.</p><h2>Bottom Line / Conclusion</h2><p>In my opinion - Agent Framework isn&#8217;t just another rebrand - it represents a thoughtful evolution of current tools for multi-agent development.  It also helps address confusion around choosing between Semantic Kernel and AutoGen. The focus on typed workflows, built-in capabilities like checkpointing and human-in-the-loop, and lessons learned from thousands of developers position it to help you - the developer.</p><p><strong>The most important thing is what you build with it</strong>. Agent Framework is designed to accelerate your work, whether you&#8217;re building simple assistants or complex multi-agent systems. </p><blockquote><p>Over the next week, I&#8217;ll be doing doing a - build a demo a day (or a week) to showcase some of these capabilities in Agent framework.</p></blockquote><p>Give it a try. The learning curve is gentler than you might expect, especially if you&#8217;re coming from either parent framework.</p><p></p><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://newsletter.victordibia.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Designing with AI is a reader-supported publication. To receive new posts and support my work, consider becoming a free or paid subscriber.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div>]]></content:encoded></item><item><title><![CDATA[MCP For Software Engineers | Part 2: Interactive & Long-Running Tools (Progress streaming, User Input, Cancellation), Resources & Prompts]]></title><description><![CDATA[#45 | A deep dive into implementing Tools, Resources, Prompts, Roots in the MCP]]></description><link>https://newsletter.victordibia.com/p/mcp-for-software-engineers-part-2</link><guid isPermaLink="false">https://newsletter.victordibia.com/p/mcp-for-software-engineers-part-2</guid><dc:creator><![CDATA[Victor Dibia, PhD]]></dc:creator><pubDate>Fri, 01 Aug 2025 13:29:53 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!cRS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b2419e0-ff4c-48cb-870c-2c74df3c5f54_2723x1889.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!cRS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b2419e0-ff4c-48cb-870c-2c74df3c5f54_2723x1889.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!cRS7!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b2419e0-ff4c-48cb-870c-2c74df3c5f54_2723x1889.png 424w, https://substackcdn.com/image/fetch/$s_!cRS7!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b2419e0-ff4c-48cb-870c-2c74df3c5f54_2723x1889.png 848w, https://substackcdn.com/image/fetch/$s_!cRS7!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b2419e0-ff4c-48cb-870c-2c74df3c5f54_2723x1889.png 1272w, https://substackcdn.com/image/fetch/$s_!cRS7!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b2419e0-ff4c-48cb-870c-2c74df3c5f54_2723x1889.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!cRS7!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b2419e0-ff4c-48cb-870c-2c74df3c5f54_2723x1889.png" width="1456" height="1010" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/9b2419e0-ff4c-48cb-870c-2c74df3c5f54_2723x1889.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1010,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:544723,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/167856675?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b2419e0-ff4c-48cb-870c-2c74df3c5f54_2723x1889.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!cRS7!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b2419e0-ff4c-48cb-870c-2c74df3c5f54_2723x1889.png 424w, https://substackcdn.com/image/fetch/$s_!cRS7!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b2419e0-ff4c-48cb-870c-2c74df3c5f54_2723x1889.png 848w, https://substackcdn.com/image/fetch/$s_!cRS7!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b2419e0-ff4c-48cb-870c-2c74df3c5f54_2723x1889.png 1272w, https://substackcdn.com/image/fetch/$s_!cRS7!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b2419e0-ff4c-48cb-870c-2c74df3c5f54_2723x1889.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>In <a href="https://newsletter.victordibia.com/p/mcp-for-software-engineers-part-1">Part 1</a> of this <a href="https://newsletter.victordibia.com/s/mcp-for-software-engineers">series</a>, we built a simple MCP server with a single tool using the high-level server API in the Python MCP SDK, and demonstrated how to connect to this server via a host application that implements an MCP client. </p><p>In practice, MCP is a lot more flexible, has a set of <em><strong>advanced features</strong></em>, many of which are only available via low level api implementation. In this part, we&#8217;ll explore these advanced features and how to use them effectively. </p><blockquote><p><strong>The Long Running Tool Misconception</strong><br>Most MCP tutorials show quick request/response patterns for tools, creating the <em><strong>incorrect</strong></em> impression that MCP is unsuitable (compared to protocols like A2A) for handling long-running operations. In reality, MCP supports sophisticated tools that can run for hours, pause to request user input, send real-time progress updates, and handle cancellation gracefully. We will cover these in this article.</p></blockquote><p>In this part, we&#8217;ll go deeper and cover:</p><ul><li><p><strong>Tools</strong>: Advanced features including annotations, requesting user input (elicitation), LLM assistance (sampling), progress notifications, cancellation, and structured return types</p></li><li><p><strong>Resources</strong>: Defining server resources, client operations (list/read/subscribe), and real-time update notifications</p></li><li><p><strong>Prompts</strong>: Creating reusable LLM prompt templates that can be used to modify host application behavior without modifying client/host application code.</p></li><li><p><strong>Roots</strong>: Understanding client-suggested operation boundaries</p></li></ul><p>As done previously, we&#8217;ll use the Python SDK to illustrate these concepts, but the principles apply across languages. This time around we will use the <a href="https://github.com/modelcontextprotocol/python-sdk?tab=readme-ov-file#low-level-server">low-level API </a> which provides more flexibility/control.   </p><p>All of the code for this tutorial is available at the end of the article. </p><blockquote><p><a href="https://newsletter.victordibia.com/p/autogen-studio-v04-a-no-code-tool">AutoGen Studio</a> now has an MCP playground feature that lets users test our interactive tool capabilities (streaming progress notifications, elicitation, sampling). See video below.</p></blockquote><div class="native-video-embed" data-component-name="VideoPlaceholder" data-attrs="{&quot;mediaUploadId&quot;:&quot;cd9e0b67-159e-4dde-822a-7c85b97eb820&quot;,&quot;duration&quot;:null}"></div><p></p><h2>Defining an MCP Server in the low-level Python SDK API</h2><p>The MCP python <a href="https://github.com/modelcontextprotocol/python-sdk?tab=readme-ov-file#low-level-server">low-level API</a> provides more flexibility and control (but with more code) compared to the high-level API. Many production systems may require this level of control, especially related to how resources, authentication, and transport security are implemented.</p><pre><code><code>from mcp.server import Server

class AdvancedMCPServer(Server):
    """Advanced MCP server with tools, resources, and prompts."""
    
    def __init__(self, name: str = "advanced_mcp_server"):
        super().__init__(name)
        # Handlers will be defined in __init__ using decorators
</code></code></pre><p>On the client side, we can write a simple client that connects to this server and prints out available tools.</p><pre><code><code>from mcp.client.session import ClientSession
from mcp.client.streamable_http import streamablehttp_client

async def test_client():
    server_url = "http://127.0.0.1:8006/mcp"
    
    async with streamablehttp_client(server_url) as (read_stream, write_stream, get_session_id):
        async with ClientSession(read_stream, write_stream) as session:
            # Initialize connection
            result = await session.initialize()
            print(f"Connected to: {result.serverInfo.name}")
            
            # List available tools
            tools = await session.list_tools()
            print("Available tools",  tools)
</code></code></pre><p>We will build on this to implement advanced tool capabilities, resources, prompts etc.</p><h2>Tools</h2><p>Tools in MCP servers can be thought of as functions that clients can call (typically driven by an LLM) - anything from a simple calculator to a full data analysis pipeline. Underneath, a tool call is a request made using the client to the server. Each request includes the toolRequest data structure and importantly a unique request ID. This ID is crucial for tracking the request, especially for long-running operations. </p><blockquote><p><strong>Tool Calls can be Long-Running</strong><br>MCP tools can be long-running processes that interact with users and systems over time.<br>Examples: Research agents that analyze data for hours while asking for user input, deployment pipelines that seek approval before critical steps, or data processing jobs that send status updates overnight.</p></blockquote><p>A tool can be defined by creating a function on our server and decorating it with <code>@self.list_tools()</code> to make it discoverable, and another function that handles the tool call with the <code>@self.call_tool()</code> decorator.</p><p>The code below shows a simple example that lists a <code>travel_agent</code> tool for booking trips:</p><pre><code><code>@self.list_tools()
async def handle_list_tools() -&gt; list[Tool]:
    """List available tools."""
    return [
        Tool(
            name="travel_agent",
            description="Book a travel trip with progress updates and price confirmation",
            inputSchema={
                "type": "object", 
                "properties": {
                    "destination": {
                        "type": "string", 
                        "description": "Travel destination",
                        "default": "Paris"
                    }
                }
            }
        )
    ]

@self.call_tool()
async def handle_call_tool(name: str, args: dict) -&gt; list[TextContent]:
    """Handle tool execution."""
    if name == "travel_agent":
        destination = args.get("destination", "Paris")
        result = f"&#9989; Trip booked successfully to {destination}!"
        return [TextContent(type="text", text=result)]
    else:
        raise ValueError(f"Unknown tool: {name}")
</code></code></pre><h3>Tool Annotations</h3><p>Tools can include metadata (annotations) such as <code>readOnlyHint</code>, <code>destructiveHint</code>, <code>idempotentHint</code>, and <code>openWorldHint</code>. These help host applications and users understand what a tool does and <em>how</em> it should be presented in the UI. For example, a tool that deletes files should have <code>destructiveHint: true</code>.</p><p>We can annotate our tool using the following code:</p><pre><code><code>Tool(
    name="travel_agent",
    description="Book a travel trip with progress updates and price confirmation",
    inputSchema={
        "type": "object",
        "properties": {
            "destination": {
                "type": "string", 
                "description": "Travel destination",
                "default": "Paris"
            }
        }
    },
    annotations=ToolAnnotations(
        title="Travel Booking Agent",
        readOnlyHint=False,     # Modifies booking state
        destructiveHint=False,  # Safe, doesn't delete data
        idempotentHint=False,   # Each booking is unique
        openWorldHint=True      # Interacts with external systems
    )
)
</code></code></pre><p><strong>Note:</strong> The list of tools can change during a session. Servers send <code>notifications/tools/list_changed</code> when tools are added or removed. Clients should refresh their tool list when receiving this notification.</p><h3>Requesting (User) Input During Tool Calls</h3><p>Tools can pause execution to request additional input primarily through the Elicitation feature in MCP. Elicitation allows tools to request structured input from users. Here's how to use it within a tool implementation:</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!rDTp!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0934cfb7-d6f0-4a95-84f9-caa9358ad8a6_3463x1049.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!rDTp!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0934cfb7-d6f0-4a95-84f9-caa9358ad8a6_3463x1049.png 424w, https://substackcdn.com/image/fetch/$s_!rDTp!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0934cfb7-d6f0-4a95-84f9-caa9358ad8a6_3463x1049.png 848w, https://substackcdn.com/image/fetch/$s_!rDTp!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0934cfb7-d6f0-4a95-84f9-caa9358ad8a6_3463x1049.png 1272w, https://substackcdn.com/image/fetch/$s_!rDTp!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0934cfb7-d6f0-4a95-84f9-caa9358ad8a6_3463x1049.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!rDTp!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0934cfb7-d6f0-4a95-84f9-caa9358ad8a6_3463x1049.png" width="1456" height="441" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/0934cfb7-d6f0-4a95-84f9-caa9358ad8a6_3463x1049.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:441,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:92850,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/167856675?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0934cfb7-d6f0-4a95-84f9-caa9358ad8a6_3463x1049.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!rDTp!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0934cfb7-d6f0-4a95-84f9-caa9358ad8a6_3463x1049.png 424w, https://substackcdn.com/image/fetch/$s_!rDTp!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0934cfb7-d6f0-4a95-84f9-caa9358ad8a6_3463x1049.png 848w, https://substackcdn.com/image/fetch/$s_!rDTp!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0934cfb7-d6f0-4a95-84f9-caa9358ad8a6_3463x1049.png 1272w, https://substackcdn.com/image/fetch/$s_!rDTp!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0934cfb7-d6f0-4a95-84f9-caa9358ad8a6_3463x1049.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><pre><code><code>@self.call_tool()
async def handle_call_tool(name: str, args: dict) -&gt; list[TextContent]:
    """Handle tool execution."""
    ctx = self.request_context  # Get the request context
    
    if name == "travel_agent":
        destination = args.get("destination", "Paris")
        
        try:
            # Request user confirmation via elicitation
            elicit_result = await ctx.session.elicit(
                message=f"Please confirm the estimated price of $1200 for your trip to {destination}",
                requestedSchema=PriceConfirmationSchema.model_json_schema(),
                related_request_id=ctx.request_id,
            )
            
            if elicit_result and elicit_result.action == "accept":
                # User confirmed, continue booking
                result = f"&#9989; Trip booked successfully to {destination}!"
                return [TextContent(type="text", text=result)]
            else:
                # User declined or cancelled
                return [TextContent(type="text", text="Booking cancelled")]
                
        except Exception as e:
            # Handle elicitation failures gracefully
            logger.info(f"Elicitation request failed: {e}")
            # Continue with fallback behavior
</code></code></pre><p>In addition, tools can also request LLM completions via the Sampling feature in MCP:</p><pre><code><code>@self.call_tool()
async def handle_call_tool(name: str, args: dict) -&gt; list[TextContent]:
    """Handle tool execution."""
    ctx = self.request_context
    
    if name == "research_agent":
        topic = args.get("topic", "AI trends")
        
        try:
            # Request AI assistance during tool execution
            sampling_result = await ctx.session.create_message(
                messages=[
                    SamplingMessage(
                        role="user",
                        content=TextContent(type="text", text=f"Please summarize the key findings for research on: {topic}")
                    )
                ],
                max_tokens=100,
                related_request_id=ctx.request_id,
            )
            
            if sampling_result and sampling_result.content:
                summary = sampling_result.content.text
                result = f"&#128269; Research on '{topic}' completed!\n\nKey Findings: {summary}"
                return [TextContent(type="text", text=result)]
                
        except Exception as e:
            logger.info(f"Sampling request failed: {e}")
            # Continue with fallback behavior
</code></code></pre><p></p><h3>Tool Progress Notifications</h3><p>For long-running operations, tools can send progress updates. Here's how to integrate progress notifications into your tool implementation:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!9O_A!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa38f07e8-8f9f-40f0-b495-888c5218d833_3483x598.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!9O_A!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa38f07e8-8f9f-40f0-b495-888c5218d833_3483x598.png 424w, https://substackcdn.com/image/fetch/$s_!9O_A!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa38f07e8-8f9f-40f0-b495-888c5218d833_3483x598.png 848w, https://substackcdn.com/image/fetch/$s_!9O_A!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa38f07e8-8f9f-40f0-b495-888c5218d833_3483x598.png 1272w, https://substackcdn.com/image/fetch/$s_!9O_A!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa38f07e8-8f9f-40f0-b495-888c5218d833_3483x598.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!9O_A!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa38f07e8-8f9f-40f0-b495-888c5218d833_3483x598.png" width="1456" height="250" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a38f07e8-8f9f-40f0-b495-888c5218d833_3483x598.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:250,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:67847,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/167856675?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa38f07e8-8f9f-40f0-b495-888c5218d833_3483x598.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!9O_A!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa38f07e8-8f9f-40f0-b495-888c5218d833_3483x598.png 424w, https://substackcdn.com/image/fetch/$s_!9O_A!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa38f07e8-8f9f-40f0-b495-888c5218d833_3483x598.png 848w, https://substackcdn.com/image/fetch/$s_!9O_A!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa38f07e8-8f9f-40f0-b495-888c5218d833_3483x598.png 1272w, https://substackcdn.com/image/fetch/$s_!9O_A!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa38f07e8-8f9f-40f0-b495-888c5218d833_3483x598.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><pre><code><code>@self.call_tool()
async def handle_call_tool(name: str, args: dict) -&gt; list[TextContent]:
    """Handle tool execution."""
    ctx = self.request_context
    
    if name == "travel_agent":
        destination = args.get("destination", "Paris")
        
        # Define steps for progress tracking
        steps = [
            "Checking flights...",
            "Finding available dates...", 
            "Confirming prices...",
            "Booking flight..."
        ]
        
        for i, step in enumerate(steps):
            # Send progress updates during tool execution
            await ctx.session.send_progress_notification(
                progress_token=ctx.request_id,
                progress=i * 25,
                total=100,
                message=step,
                related_request_id=str(ctx.request_id)
            )
            
            # Simulate work being done
            await anyio.sleep(2)
        
        # Final progress update
        await ctx.session.send_progress_notification(
            progress_token=ctx.request_id,
            progress=100,
            total=100,
            message="Trip booked successfully"
        )
        
        return [TextContent(type="text", text=f"&#9989; Trip booked successfully to {destination}!")]
</code></code></pre><h3>Tool Cancellation</h3><p>Tools can be cancelled mid-execution. Each tool call has a unique request ID that clients can use to send cancellation requests. The server should handle cancellation gracefully and clean up any ongoing operations.</p><p>On the server side, tools should be designed to handle cancellation gracefully, checking for cancellation during long-running operations. On the client side, cancellation is typically handled through asyncio task cancellation:</p><pre><code><code>from mcp.client.session import ClientSession
from mcp.client.streamable_http import streamablehttp_client
import asyncio

async def cancel_tool_example():
    server_url = "http://127.0.0.1:8006/mcp"
    
    async with streamablehttp_client(server_url) as (read_stream, write_stream, get_session_id):
        async with ClientSession(read_stream, write_stream) as session:
            await session.initialize()
            
            # Start a long-running tool
            async def call_long_tool():
                return await session.call_tool("long_running_agent", {})
            
            # Create task for the tool call
            tool_task = asyncio.create_task(call_long_tool())
            
            # Wait briefly, then cancel
            await asyncio.sleep(5)
            tool_task.cancel()
            
            try:
                await tool_task
            except asyncio.CancelledError:
                print("Tool call cancelled successfully")
</code></code></pre><p>This is particularly useful for long-running operations where users may want to stop the process.</p><h3>Tool Return Types</h3><p>Based on the MCP specification, tools can return flexible content types in their responses:</p><ul><li><p><strong>Unstructured Content</strong>: Text, Image (base64 with MIME type), Audio, Resource Links, and Embedded Resources</p></li><li><p><strong>Structured Content</strong>: Optional <code>structuredContent</code> field containing structured data (JSON), which should also be included as serialized JSON in a TextContent block for backwards compatibility</p></li><li><p><strong>Error State</strong>: The <code>isError</code> boolean flag indicates whether the response represents an error</p></li><li><p><strong>Metadata</strong>: Optional <code>_meta</code> field for additional annotations and information</p></li></ul><p>Example tool result structure:</p><pre><code><code>{
  "content": [
    {
      "type": "text",
      "text": "Analysis complete: Temperature is 22.5&#176;C"
    }
  ],
  "structuredContent": {
    "temperature": 22.5,
    "unit": "celsius",
    "conditions": "Partly cloudy"
  },
  "isError": false
}
</code></code></pre><ul><li><p><strong>Output Schema</strong>: Tools can provide an optional output schema to validate structured results and help clients understand the expected response structure. When an output schema is provided:</p><ul><li><p>Servers MUST provide results conforming to the schema</p></li><li><p>Clients SHOULD validate results against the schema</p></li><li><p>The schema guides LLMs in parsing tool outputs and improves type safety</p></li></ul></li></ul><p>Example tool definition with output schema:</p><pre><code><code>{
  "name": "get_weather",
  "description": "Get current weather data",
  "inputSchema": {
    "type": "object",
    "properties": { "location": { "type": "string" } }
  },
  "outputSchema": {
    "type": "object",
    "properties": {
      "temperature": { "type": "number" },
      "conditions": { "type": "string" },
      "humidity": { "type": "number", "minimum": 0, "maximum": 100 }
    },
    "required": ["temperature", "conditions"]
  }
}
</code></code></pre><p>To use the <code>structuredContent</code> field effectively, tools should define output schemas that clients can use for validation and type checking.</p><h2>Resources</h2><p>Resources in MCP are how you expose data: files, database records, API responses, logs, images, and more. Each resource is identified by a unique URI (e.g., <code>file:///data/report.csv</code>, <code>postgres://db/table</code>). Resources can be text (UTF-8) or binary (base64-encoded).</p><p>Clients can discover resources via <code>resources/list</code> or by using URI templates for dynamic resources. To read a resource, clients send a <code>resources/read</code> request with the resource URI. Servers can also notify clients when resources change, and clients can subscribe to updates for real-time workflows.</p><p>We can define a resource in the server by creating a resource handler:</p><pre><code><code>@self.list_resources()
async def handle_list_resources() -&gt; list[Resource]:
    """List available resources."""
    return [
        Resource(
            uri=AnyUrl("research://data/sources"),
            name="Research Data Sources",
            description="Collection of research sources and references",
            mimeType="application/json"
        )
    ]

@self.read_resource()
async def handle_read_resource(uri: AnyUrl) -&gt; list[ReadResourceContents]:
    """Read resource content based on URI."""
    uri_str = str(uri)
    
    if uri_str == "research://data/sources":
        # Mock research data
        research_data = {
            "sources": [
                {"title": "AI Trends 2024", "url": "https://example.com/ai-trends"}
            ],
            "last_updated": "2024-01-15T10:30:00Z"
        }
        return [ReadResourceContents(
            content=str(research_data).replace("'", '"'),
            mime_type="application/json"
        )]
    else:
        raise ValueError(f"Unknown resource: {uri_str}")
</code></code></pre><p>Clients can interact with resources through several operations:</p><pre><code><code>from mcp.client.session import ClientSession
from pydantic import AnyUrl

async def resource_client_example(client_session: ClientSession):
    # List available resources
    resources_result = await client_session.list_resources()
    print(f"Available resources: {resources_result.resources}")
    
    # Read a specific resource
    resource_uri = AnyUrl("research://data/sources")
    resource_content = await client_session.read_resource(resource_uri)
    print(f"Resource content: {resource_content.contents}")
    
    # Subscribe to resource updates
    await client_session.subscribe_resource(resource_uri)
    
    # Later, unsubscribe when no longer needed
    await client_session.unsubscribe_resource(resource_uri)
</code></code></pre><p>Servers can notify subscribed clients when resources change. On the server side, you can send notifications:</p><pre><code><code># In a tool or other server operation that modifies a resource
async def handle_call_tool(name: str, args: dict) -&gt; list[TextContent]:
    ctx = self.request_context
    
    if name == "update_data":
        # Perform the update...
        
        # Notify subscribed clients about the resource change
        await ctx.session.send_resource_updated(
            uri=AnyUrl("research://data/sources")
        )
        
        return [TextContent(type="text", text="Data updated successfully")]
</code></code></pre><p>Clients can handle these notifications by setting up a message handler:</p><pre><code><code>async def handle_notifications(message):
    if isinstance(message, types.ServerNotification):
        match message.root:
            case types.ResourceUpdatedNotification(params=params):
                print(f"Resource updated: {params.uri}")
                # Refresh the resource content
            case types.ResourceListChangedNotification():
                print("Resource list changed - refreshing available resources")
</code></code></pre><blockquote><p>Tip<br>When working with resources, use descriptive URIs and set appropriate MIME types for better client compatibility. Handle errors gracefully and consider supporting subscriptions for frequently changing resources to enable real-time applications.</p></blockquote><h2>Prompts</h2><p>Prompts are reusable templates for LLM interactions, defined on the server and surfaced to clients. Each prompt has a name, description, and optional arguments. Prompts can accept dynamic arguments, embed resource context, and support multi-step workflows.</p><p>Clients discover prompts via <code>prompts/list</code> and retrieve them with <code>prompts/get</code>. Prompts are especially useful for standardizing common LLM tasks (e.g., "summarize this file", "generate a commit message") and can be improved on the server side without changing the host application.</p><p>Example prompt definition:</p><pre><code><code>{
  "name": "explain-code",
  "description": "Explain how code works",
  "arguments": [
    { "name": "code", "description": "Code to explain", "required": true },
    {
      "name": "language",
      "description": "Programming language",
      "required": false
    }
  ]
}
</code></code></pre><p>To define a prompt on the server, we can create a prompt handler:</p><pre><code><code>@self.list_prompts()
async def handle_list_prompts() -&gt; list[Prompt]:
    """List available prompt templates."""
    return [
        Prompt(
            name="task_summary",
            description="Generate a summary for any completed task",
            arguments=[
                PromptArgument(
                    name="task_name",
                    description="Name of the completed task",
                    required=True
                ),
                PromptArgument(
                    name="outcome",
                    description="The result or outcome of the task",
                    required=False
                )
            ]
        )
    ]

@self.get_prompt()
async def handle_get_prompt(name: str, arguments: dict[str, str] | None) -&gt; GetPromptResult:
    """Generate prompt content based on template name and arguments."""
    if name != "task_summary":
        raise ValueError(f"Unknown prompt: {name}")
    
    if arguments is None:
        arguments = {}
    
    task_name = arguments.get("task_name", "Unknown Task")
    outcome = arguments.get("outcome", "task completed successfully")
    
    prompt_text = f"""Please create a concise summary for the following completed task:

Task: {task_name}
Outcome: {outcome}

Please provide:
1. What was accomplished
2. Key results or deliverables
3. Any important observations or lessons learned

Keep the summary brief and professional."""
    
    return GetPromptResult(
        description=f"Task summary prompt for {task_name}",
        messages=[
            PromptMessage(
                role="user",
                content=TextContent(type="text", text=prompt_text)
            )
        ]
    )
</code></code></pre><blockquote><p>Tip<br>When creating prompts, use clear names and detailed descriptions, validate arguments properly, and consider versioning prompt templates for backward compatibility.</p></blockquote><h2>Roots</h2><p>Roots are URIs (like file paths or URLs) that a client suggests to a server as the boundaries or focus areas for operations. When a client connects, it can declare support for roots and provide a list of relevant roots (e.g., project directories, API endpoints). Servers should respect these roots, using them to locate and prioritize resources, but roots are informational&#8212;not strictly enforced.</p><p><strong>Common use cases:</strong></p><ul><li><p>Defining project directories or repository locations</p></li><li><p>Specifying API endpoints or configuration boundaries</p></li></ul><p>Example roots declaration:</p><pre><code><code>{
  "roots": [
    {
      "uri": "file:///home/user/projects/frontend",
      "name": "Frontend Repository"
    },
    { "uri": "https://api.example.com/v1", "name": "API Endpoint" }
  ]
}
</code></code></pre><blockquote><p>Tip:<br>When working with roots, only suggest necessary ones and use clear, descriptive names. Monitor accessibility and handle changes gracefully since clients rely on these URIs for scoping operations.</p></blockquote>
      <p>
          <a href="https://newsletter.victordibia.com/p/mcp-for-software-engineers-part-2">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[MCP For Software Engineers | Part 1 : Building Your First Server]]></title><description><![CDATA[#44 | New to MCP? Heres how to build your first MCP server and a Host Application that integrates and uses the server.]]></description><link>https://newsletter.victordibia.com/p/mcp-for-software-engineers-part-1</link><guid isPermaLink="false">https://newsletter.victordibia.com/p/mcp-for-software-engineers-part-1</guid><dc:creator><![CDATA[Victor Dibia, PhD]]></dc:creator><pubDate>Wed, 02 Jul 2025 15:31:12 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!Ct1Z!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc644e807-9e03-483c-abee-d8586149fc9c_2681x1621.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Ct1Z!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc644e807-9e03-483c-abee-d8586149fc9c_2681x1621.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Ct1Z!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc644e807-9e03-483c-abee-d8586149fc9c_2681x1621.png 424w, https://substackcdn.com/image/fetch/$s_!Ct1Z!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc644e807-9e03-483c-abee-d8586149fc9c_2681x1621.png 848w, https://substackcdn.com/image/fetch/$s_!Ct1Z!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc644e807-9e03-483c-abee-d8586149fc9c_2681x1621.png 1272w, https://substackcdn.com/image/fetch/$s_!Ct1Z!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc644e807-9e03-483c-abee-d8586149fc9c_2681x1621.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Ct1Z!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc644e807-9e03-483c-abee-d8586149fc9c_2681x1621.png" width="1456" height="880" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c644e807-9e03-483c-abee-d8586149fc9c_2681x1621.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:880,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:185668,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/167292158?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc644e807-9e03-483c-abee-d8586149fc9c_2681x1621.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Ct1Z!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc644e807-9e03-483c-abee-d8586149fc9c_2681x1621.png 424w, https://substackcdn.com/image/fetch/$s_!Ct1Z!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc644e807-9e03-483c-abee-d8586149fc9c_2681x1621.png 848w, https://substackcdn.com/image/fetch/$s_!Ct1Z!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc644e807-9e03-483c-abee-d8586149fc9c_2681x1621.png 1272w, https://substackcdn.com/image/fetch/$s_!Ct1Z!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc644e807-9e03-483c-abee-d8586149fc9c_2681x1621.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>When I <a href="https://newsletter.victordibia.com/p/no-mcps-have-not-won-yet">first tried out</a> the Model Context Protocol (MCP) from Anthropic (March 2025), the developer experience was <em><strong>rough</strong></em>. Integrating and bundling MCP with existing applications was challenging, and there were a few security pitfalls that developers had to navigate. I wrote about that experience <a href="https://newsletter.victordibia.com/p/no-mcps-have-not-won-yet">here</a>. </p><blockquote><p>If you are visually inclined - here is a <a href="https://youtu.be/-yrqkwZr3Nc?si=ISuvOoOcGRPYuYJP">video walkthrough  of this post.</a></p></blockquote><blockquote><p><a href="https://modelcontextprotocol.io/specification/2025-06-18">Model Context Protocol (MCP) is a standard</a> for how AI applications connect to tools and data sources. Simply put: your AI app needs to call tools, monitor requests, handle prompts, and get user approval. MCP standardizes all of this. </p></blockquote><p>However, like all <em>good</em> standards or protocols, MCP has evolved and <em><strong>gotten better (see recent <a href="https://modelcontextprotocol.io/specification/2025-06-18/changelog">changelog</a> including fixes to sdks, improved support for remote servers, improved auth)</strong></em>. And I can <em>now</em> see it solving several critical problems faced by teams building AI applications.  </p><ul><li><p><strong>Integration</strong>: Without MCP, Team X spends weeks integrating Team Z's new tool. With MCP, Team X announces day-one support for any MCP tool (including Team Z&#8217;s new capabiltiies!).</p></li><li><p><strong>Distribution</strong>: Without MCP, Team X writes a Cursor extension, a Windsurf plugin, a VSCode extension, a Claude Desktop add-on, and more. With MCP, Team X writes one MCP server that works everywhere.</p></li><li><p><strong>Discovery</strong>: Without MCP, teams ask "Does anyone have a tool that does X?" With MCP, there's a central registry where teams publish and find tools.</p></li><li><p><strong>Security</strong>: Without MCP, each team implements (or skips) their security/auth for the tools or resources that LLMs use. With MCP, its possible to implement centralized auth and managed registry of MCP servers. </p></li><li><p><strong>Runtime Flexibility</strong>: Without MCP, you're stuck with hard-coded tool configurations. With MCP, tools can be dynamically discovered based on context. Also, it can be helpful to have aspects of the the application logic (e.g., tool execution on remote MCP servers) managed by an MCP server as opposed to running within the host applicaiton</p><p></p></li></ul><p>In this tutorial (part 1 of a series on <a href="https://newsletter.victordibia.com/s/mcp-for-software-engineers">MCP for Software Engineers</a>), we will cover the following: </p><ul><li><p>Building an MCP server that exposes tools (fetch news from techcrunch)</p></li><li><p>Creating a client to connect to the server</p></li><li><p>Building a host application that uses an LLM to translate user requests to tool calls on the MCP server.</p></li><li><p>Choosing between stdio and Streamable HTTP transports for MCP</p></li><li><p>Bonus : how to use the MCP Server we create in VSCode (or any other tool) </p></li></ul><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;e485e6ad-6c99-4a2a-98f2-c7ee78c93b5e&quot;,&quot;caption&quot;:&quot;In Part 1 of this series, we built a simple MCP server with a single tool using the high-level server API in the Python MCP SDK, and demonstrated how to connect to this server via a host application that implements an MCP client.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;MCP For Software Engineers | Part 2: Interactive &amp; Long-Running Tools (Progress streaming, User Input, Cancellation), Resources &amp; Prompts&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:85692678,&quot;name&quot;:&quot;Victor Dibia, PhD&quot;,&quot;bio&quot;:&quot;Hacker, Research Scientist (Microsoft Research), Author working on and writing about Generative AI, Agents. Core maintainer for the AutoGen Multi-Agent Framework (35k Stars on GitHub). Previously at Cloudera, IBM Research. All views are my own.&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/27536f63-7d8e-48dc-b34c-150acfacdc8b_1726x1396.jpeg&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-08-01T13:29:53.324Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/$s_!cRS7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9b2419e0-ff4c-48cb-870c-2c74df3c5f54_2723x1889.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://newsletter.victordibia.com/p/mcp-for-software-engineers-part-2&quot;,&quot;section_name&quot;:&quot;MCP For Software Engineers&quot;,&quot;video_upload_id&quot;:null,&quot;id&quot;:167856675,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:7,&quot;comment_count&quot;:0,&quot;publication_id&quot;:null,&quot;publication_name&quot;:&quot;Designing with AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/$s_!1FgP!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fff160652-8bbb-475e-80a7-ba4eb5f80dcb_504x504.png&quot;,&quot;belowTheFold&quot;:false,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><blockquote><p>Note: This series is not for those seeking to learn about the "<em>latest productivity hacks with MCP in some existing host application (Cursor, Windsurf etc)</em>." </p></blockquote><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!r1_H!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffef6df6b-6387-4261-9239-d7521d081ad1_1568x903.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!r1_H!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffef6df6b-6387-4261-9239-d7521d081ad1_1568x903.png 424w, https://substackcdn.com/image/fetch/$s_!r1_H!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffef6df6b-6387-4261-9239-d7521d081ad1_1568x903.png 848w, https://substackcdn.com/image/fetch/$s_!r1_H!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffef6df6b-6387-4261-9239-d7521d081ad1_1568x903.png 1272w, https://substackcdn.com/image/fetch/$s_!r1_H!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffef6df6b-6387-4261-9239-d7521d081ad1_1568x903.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!r1_H!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffef6df6b-6387-4261-9239-d7521d081ad1_1568x903.png" width="1456" height="839" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/fef6df6b-6387-4261-9239-d7521d081ad1_1568x903.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:839,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:377128,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/167292158?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffef6df6b-6387-4261-9239-d7521d081ad1_1568x903.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!r1_H!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffef6df6b-6387-4261-9239-d7521d081ad1_1568x903.png 424w, https://substackcdn.com/image/fetch/$s_!r1_H!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffef6df6b-6387-4261-9239-d7521d081ad1_1568x903.png 848w, https://substackcdn.com/image/fetch/$s_!r1_H!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffef6df6b-6387-4261-9239-d7521d081ad1_1568x903.png 1272w, https://substackcdn.com/image/fetch/$s_!r1_H!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ffef6df6b-6387-4261-9239-d7521d081ad1_1568x903.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Screenshot of the server we will build, shown in the AutoGen Studio MCP Playground</figcaption></figure></div><div><hr></div><p></p><div id="youtube2--yrqkwZr3Nc" class="youtube-wrap" data-attrs="{&quot;videoId&quot;:&quot;-yrqkwZr3Nc&quot;,&quot;startTime&quot;:null,&quot;endTime&quot;:null}" data-component-name="Youtube2ToDOM"><div class="youtube-inner"><iframe src="https://www.youtube-nocookie.com/embed/-yrqkwZr3Nc?rel=0&amp;autoplay=0&amp;showinfo=0&amp;enablejsapi=0" frameborder="0" loading="lazy" gesture="media" allow="autoplay; fullscreen" allowautoplay="true" allowfullscreen="true" width="728" height="409"></iframe></div></div><div><hr></div><h2>Key MCP Concepts in Brief</h2><p>MCP has an excellent and well maintained <a href="https://modelcontextprotocol.io/introduction">documentation site</a>. In brief, here are key concepts to get started. </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Ct1Z!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc644e807-9e03-483c-abee-d8586149fc9c_2681x1621.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Ct1Z!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc644e807-9e03-483c-abee-d8586149fc9c_2681x1621.png 424w, https://substackcdn.com/image/fetch/$s_!Ct1Z!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc644e807-9e03-483c-abee-d8586149fc9c_2681x1621.png 848w, https://substackcdn.com/image/fetch/$s_!Ct1Z!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc644e807-9e03-483c-abee-d8586149fc9c_2681x1621.png 1272w, https://substackcdn.com/image/fetch/$s_!Ct1Z!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc644e807-9e03-483c-abee-d8586149fc9c_2681x1621.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Ct1Z!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc644e807-9e03-483c-abee-d8586149fc9c_2681x1621.png" width="1456" height="880" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c644e807-9e03-483c-abee-d8586149fc9c_2681x1621.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:880,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:185668,&quot;alt&quot;:&quot;&quot;,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/167292158?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc644e807-9e03-483c-abee-d8586149fc9c_2681x1621.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" title="" srcset="https://substackcdn.com/image/fetch/$s_!Ct1Z!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc644e807-9e03-483c-abee-d8586149fc9c_2681x1621.png 424w, https://substackcdn.com/image/fetch/$s_!Ct1Z!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc644e807-9e03-483c-abee-d8586149fc9c_2681x1621.png 848w, https://substackcdn.com/image/fetch/$s_!Ct1Z!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc644e807-9e03-483c-abee-d8586149fc9c_2681x1621.png 1272w, https://substackcdn.com/image/fetch/$s_!Ct1Z!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc644e807-9e03-483c-abee-d8586149fc9c_2681x1621.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><ul><li><p><strong>Server:</strong> MCP servers expose capabilities through a standardized interface. A single server can provide multiple tools (functions to call), resources (data to read), prompts (templates for LLM interactions), and sampling (request LLM completions from the client).</p></li><li><p><strong>Client: </strong>MCP clients maintain 1:1 connections with servers and handle the protocol communication. Hosts embed clients to talk to servers.</p></li><li><p><strong>Host</strong> MCP hosts are user-facing applications like Claude Desktop, Cursor, or VSCode. They use clients to connect to servers and decide which tools to call based on user needs.</p></li><li><p><strong>Transport</strong> MCP uses JSON-RPC 2.0 with <a href="https://modelcontextprotocol.io/docs/concepts/transports">two transport options</a> - <em><strong>stdio</strong></em> and <em><strong>streamable HTTP</strong></em>. <strong>Stdio</strong> runs the server as a subprocess using standard input/output - ideal for local integrations where the server runs as a subprocess of the client (e.g., IDE extensions, local development tools). <strong>Streamable HTTP</strong> uses network requests - better for web applications, distributed systems, multiple clients connecting to one server, and easier debugging/monitoring.</p></li></ul><p>For this tutorial, we'll use the <strong>Streamable HTTP</strong> as it provides a better learning experience with clearer separation of concerns (you can run the server on a remote machine) and easier debugging.</p><div><hr></div><h2>Building Your First MCP Server</h2><p>Now that we've covered the core concepts, let's build your first MCP server. We'll use the <a href="https://github.com/modelcontextprotocol/python-sdk">Python MCP SDK</a>, which is mature and widely adopted, but the same concepts apply to other languages.</p><blockquote><p>Protocol vs SDK ?<br>MCP mostly defines <a href="https://modelcontextprotocol.io/specification/2025-06-18">a protocol or standard</a> - essentially a set of rules that says clients and servers  MUST/SHOULD/SHALL/SHALL/NOT  do X and Y in order to communicate. Now SDKs are an implementation of these rules. <br>While you can build your own compliant servers/cleints, in general, it is recommended that you use SDKs for more standardized behaviors where possible.</p></blockquote><p>Our goal: build a tool that can answer news-related queries like "What is the latest AI news on TechCrunch?"</p><h3>1. Set Up Your Project</h3><p>Create a new Python project and install the MCP SDK.</p><p>Using <code>uv</code> (recommended):</p><pre><code><code>uv init mcp-news-demo
cd mcp-news-demo
uv add "mcp[cli]"</code></code></pre><p>Using pip:</p><pre><code><code>pip install "mcp[cli]"</code></code></pre><h3>2. Create the MCP Server</h3><p>Let's create a simple MCP server with a tool that fetches TechCrunch news. </p><pre><code><code># server.py
import os
from mcp.server.fastmcp import FastMCP
import requests

mcp = FastMCP(
    "TechCrunch News Server", 
    host=os.environ.get("MCP_SERVER_HOST", "localhost"), 
    port=int(os.environ.get("MCP_SERVER_PORT", 8011))
)

@mcp.tool(title="Fetch from TechCrunch")
def fetch_from_techcrunch(category: str = "latest") -&gt; str:
    """Fetch the latest news from TechCrunch for a given category."""
    allowed = {"ai", "startup", "security", "venture", "latest"}
    cat = category.lower()
    
    if cat not in allowed:
        cat = "latest"
    
    url = f"https://techcrunch.com/tag/{cat}/" if cat != "latest" else "https://techcrunch.com/"
    
    try:
        response = requests.get(url)
        if response.ok:
            try:
                from bs4 import BeautifulSoup
                soup = BeautifulSoup(response.text, "html.parser")
                text = soup.get_text(separator=' ', strip=True)
                return text[:1000] + ("..." if len(text) &gt; 1000 else "")
            except ImportError:
                return response.text[:1000] + ("..." if len(response.text) &gt; 1000 else "")
        return "Failed to fetch news."
    except Exception as e:
        return f"Error fetching news: {str(e)}"

if __name__ == "__main__":
    mcp.run(transport="streamable-http")
</code></code></pre><h3>3. Run Your Server</h3><p>Start your server:</p><pre><code><code>python server.py</code></code></pre><p>You should see:</p><pre><code><code>INFO: Started server process [65618]
INFO: Waiting for application startup.
[07/01/25 12:40:34] INFO StreamableHTTP session manager started
INFO: Application startup complete.
INFO: Uvicorn running on http://localhost:8011 (Press CTRL+C to quit)</code></code></pre><p>Your server is now running at <code>http://localhost:8011/mcp</code> and ready to accept requests.</p><blockquote><p><strong>Note</strong>: This example uses the high-level FastMCP API with Streamable HTTP transport. For advanced use cases, check the <a href="https://github.com/modelcontextprotocol/python-sdk">MCP Python SDK documentation</a> for the low-level API.</p></blockquote><p></p><div><hr></div><h2>Building Your First MCP Client</h2><p>MCP clients connect to servers and handle communication. They operate within "sessions" - logical groupings of requests and responses. Let's build a client that connects to our TechCrunch server. Note that we will use the same streamablehttp transport as the server:</p><pre><code><code># client.py
import asyncio
import os
from mcp import ClientSession
from mcp.client.streamable_http import streamablehttp_client

async def run_client():
    # Connect to the HTTP MCP server
    host = os.environ.get("MCP_SERVER_HOST", "localhost")
    port = os.environ.get("MCP_SERVER_PORT", "8011")
    server_url = f"http://{host}:{port}/mcp"
    
    async with streamablehttp_client(server_url) as (read, write, _):
        async with ClientSession(read, write) as session:
            # Initialize the connection
            await session.initialize()
            
            # List available tools
            tools_response = await session.list_tools()
            print("Available tools:")
            for tool in tools_response.tools:
                print(f"- {tool.name}: {tool.description}")
            
            # Call a tool
            result = await session.call_tool(
                "fetch_from_techcrunch",
                arguments={"category": "ai"}
            )
            print(f"\nTool result: {result.content}")

if __name__ == "__main__":
    asyncio.run(run_client())
</code></code></pre><p>You should now see a list of tools and a result of a call to the tool using `<code>session.call_tool`</code>.</p><pre><code>Available tools:
- fetch_from_techcrunch: Fetch the latest news from TechCrunch for a given category.
Tool result: [TextContent(type='text', text='AI | TechCrunch AI | TechCrunch TechCrunch Desktop Logo TechCrunch Mobile Logo Latest Startups Venture Apple Security AI Apps Events Podcasts Newsletters Search Submit Site Search Toggle Mega Menu Toggle Topics Latest AI Amazon  ...
</code></pre><p></p><h3>Server and Client Capabilities</h3><p>Note: `await session.initialize()` returns important details about the server including its <a href="https://modelcontextprotocol.io/specification/2025-06-18/schema#servercapabilities">capabilities</a> (e.g., if it provides tools, resources, prompts, logging or any experimental/custom features.). This can be utilized by the client or host application to define dynamic behaviors. </p><p>Similarly, clients can &#8220;advertise support&#8221; for <a href="https://modelcontextprotocol.io/specification/2025-06-18/schema#clientcapabilities">client capabilities</a> such as  sampling, elicitation and roots during initialization. In the python SDK, this is done by providing `callbacks` for each capability.</p><pre><code>async with ClientSession(read, write, sampling_callback=sampling_callback, elicitation_callback=elicitation_callback,list_roots_callback=list_roots_callback, logging_callback=logging_callback) as session:</code></pre><div><hr></div><h2>Building the Host Application</h2><p>The host application is where the magic happens. It bridges MCP with the outside world (users or business applications), turning user queries or business tasks into tool calls and responses.</p><p>Most MCP tutorials skip over how complex host applications really can be. They're not just pass-through layers - they're intelligent orchestrators that often must:</p><ul><li><p>Accept user requests</p></li><li><p>Discover available tools from MCP servers</p></li><li><p>Use an LLM to choose the right tools</p></li><li><p>Execute tool calls with proper parameters</p></li><li><p>Handle errors gracefully</p></li><li><p>Present results in a user-friendly way</p></li></ul><p>Let's build a simple host that uses OpenAI to intelligently orchestrate our MCP tools.</p><h3>Setup</h3><p>First, install dependencies:</p><pre><code><code>uv add openai
# or: pip install openai
</code></code></pre><p>Set your OpenAI API key:</p><pre><code><code>export OPENAI_API_KEY="your-api-key-here"
</code></code></pre><h3>The Host Application</h3><pre><code><code># app.py
import asyncio
import json
import os
import sys
from openai import AsyncOpenAI
from mcp import ClientSession
from mcp.client.streamable_http import streamablehttp_client

def convert_mcp_tools_to_openai_format(tools):
    """Convert MCP tool definitions to OpenAI function calling format."""
    openai_tools = []
    for tool in tools:
        openai_tools.append({
            "type": "function",
            "function": {
                "name": tool.name,
                "description": tool.description,
                "parameters": tool.inputSchema if hasattr(tool, 'inputSchema') else {}
            }
        })
    return openai_tools

async def handle_user_request(session, openai_client, tools, user_input: str):
    """Process user request using LLM and MCP tools."""
    openai_tools = convert_mcp_tools_to_openai_format(tools)
    
    # Ask LLM to decide which tools to use
    response = await openai_client.chat.completions.create(
        model="gpt-4",
        messages=[
            {
                "role": "system", 
                "content": "You are a helpful assistant that can fetch news from TechCrunch. Use tools when needed."
            },
            {"role": "user", "content": user_input}
        ],
        tools=openai_tools,
        tool_choice="auto"
    )
    
    message = response.choices[0].message
    
    # If LLM wants to use a tool, execute it
    if message.tool_calls:
        tool_call = message.tool_calls[0]
        function_name = tool_call.function.name
        function_args = json.loads(tool_call.function.arguments)
        
        print(f"&#128295; Calling tool: {function_name} with args: {function_args}")
        
        result = await session.call_tool(function_name, arguments=function_args)
        
        # Format the response
        content = str(result.content)[:500]
        return f"Here's what I found:\n\n{content}...\n\nFor more details, visit TechCrunch directly."
    else:
        return message.content

async def main():
    if not os.getenv("OPENAI_API_KEY"):
        print("Error: Set OPENAI_API_KEY environment variable")
        return
    
    # Get user input
    user_input = " ".join(sys.argv[1:]).strip() or "What is the latest news on AI?"
    
    # Initialize OpenAI client
    openai_client = AsyncOpenAI()
    
    # Connect to MCP server
    server_url = f"http://localhost:8011/mcp"
    
    async with streamablehttp_client(server_url) as (read, write, _):
        async with ClientSession(read, write) as session:
            await session.initialize()
            
            # Get available tools
            tools = (await session.list_tools()).tools
            
            print(f"Task: {user_input}\n")
            
            # Process the request
            response = await handle_user_request(
                session, openai_client, tools, user_input
            )
            
            print(response)

if __name__ == "__main__":
    # Make sure server is running first!
    asyncio.run(main())
</code></code></pre><p>The host application above combines three key components: an MCP client, an LLM (OpenAI), and user interface logic. When you run <code>python app.py "What's the latest AI news?"</code>, here's what happens: First, the host connects to the MCP server using its embedded client and calls <code>list_tools()</code> to discover available tools. It then converts these MCP tool definitions into OpenAI's function-calling format using <code>convert_mcp_tools_to_openai_format()</code>. Next, it sends your question along with the tool definitions to OpenAI's API. OpenAI analyzes your question and returns which tool to call - in this case, <code>fetch_from_techcrunch</code> with <code>category='ai'</code>. The host then executes this tool call through <code>session.call_tool()</code>, gets the raw results from TechCrunch, and formats them into a readable response. This architecture lets you ask natural language questions without knowing which tools exist or how to call them - the LLM figures that out based on what's available.  </p><p>A similar host application format can be extended to build complex agentic or multi-agent applications. </p><h2>Putting It All Together</h2><p>You now have a complete MCP application:</p><ol><li><p><strong>Server</strong> (<code>server.py</code>) - Exposes the TechCrunch fetching tool</p></li><li><p><strong>Client</strong> (<code>client.py</code>) - Shows how to connect and call tools directly</p></li><li><p><strong>Host</strong> (<code>app.py</code>) - Uses AI to intelligently orchestrate tool usage</p></li></ol><p>To run the full system:</p><pre><code><code># Terminal 1: Start the server
python server.py

# Terminal 2: Run the host application
python app.py "What's the latest AI news?"
</code></code></pre><p>Oh .. and the server above can be used in any of your favorite MCP host applications. For example, you can add it to your VSCode agent by adding an mcp server config to your <a href="https://code.visualstudio.com/docs/copilot/chat/mcp-servers#_add-an-mcp-server-to-your-user-settings">user settings</a>. The same configuration will work for <a href="https://modelcontextprotocol.io/quickstart/user">Claude Desktop</a>, <a href="https://docs.cursor.com/context/model-context-protocol#manual-configuration">Cursor</a>, <a href="https://docs.windsurf.com/windsurf/cascade/mcp#mcp-config-json">Windsurf</a> &#8230;</p><pre><code>"demo": {
        "type": "http",
        "url": "http://localhost:8011/mcp/",
        "headers": { "VERSION": "1.2" }
      }</code></pre><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!1YrV!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0610a27-9024-4fae-b8f3-ea9ae27625eb_2938x1746.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!1YrV!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0610a27-9024-4fae-b8f3-ea9ae27625eb_2938x1746.png 424w, https://substackcdn.com/image/fetch/$s_!1YrV!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0610a27-9024-4fae-b8f3-ea9ae27625eb_2938x1746.png 848w, https://substackcdn.com/image/fetch/$s_!1YrV!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0610a27-9024-4fae-b8f3-ea9ae27625eb_2938x1746.png 1272w, https://substackcdn.com/image/fetch/$s_!1YrV!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0610a27-9024-4fae-b8f3-ea9ae27625eb_2938x1746.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!1YrV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0610a27-9024-4fae-b8f3-ea9ae27625eb_2938x1746.png" width="1456" height="865" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c0610a27-9024-4fae-b8f3-ea9ae27625eb_2938x1746.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:865,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:747317,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/167292158?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0610a27-9024-4fae-b8f3-ea9ae27625eb_2938x1746.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!1YrV!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0610a27-9024-4fae-b8f3-ea9ae27625eb_2938x1746.png 424w, https://substackcdn.com/image/fetch/$s_!1YrV!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0610a27-9024-4fae-b8f3-ea9ae27625eb_2938x1746.png 848w, https://substackcdn.com/image/fetch/$s_!1YrV!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0610a27-9024-4fae-b8f3-ea9ae27625eb_2938x1746.png 1272w, https://substackcdn.com/image/fetch/$s_!1YrV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc0610a27-9024-4fae-b8f3-ea9ae27625eb_2938x1746.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><h2></h2><blockquote><p><strong>Further Reading:</strong><br>I wrote a book on on <a href="https://buy.multiagentbook.com/">Designing Multi-Agent Systems</a> - Chapter 12 (Protocols for Distributed Agents) provides a perspective on MCP and. A2A and when you should reach for them as you build agents.  </p><ul><li><p>Book Digital PDF:  </p></li></ul><p>https://buy.multiagentbook.com/</p><ul><li><p>Book on Amazon: <a href="https://www.amazon.com/dp/B0G2BCQQJY">https://www.amazon.com/dp/B0G2BCQQJY</a> </p></li></ul></blockquote>
      <p>
          <a href="https://newsletter.victordibia.com/p/mcp-for-software-engineers-part-1">
              Read more
          </a>
      </p>
   ]]></content:encoded></item><item><title><![CDATA[... Why I write about AI ...]]></title><description><![CDATA[#43 | 6000 subscribers, and my reflections on why I write about AI .. and why you should too!]]></description><link>https://newsletter.victordibia.com/p/why-i-write-about-ai</link><guid isPermaLink="false">https://newsletter.victordibia.com/p/why-i-write-about-ai</guid><dc:creator><![CDATA[Victor Dibia, PhD]]></dc:creator><pubDate>Tue, 17 Jun 2025 15:30:29 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!qChp!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24f0f9fc-4bad-41ea-9fdd-f15208aaf3bf_2852x1745.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!qChp!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24f0f9fc-4bad-41ea-9fdd-f15208aaf3bf_2852x1745.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!qChp!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24f0f9fc-4bad-41ea-9fdd-f15208aaf3bf_2852x1745.png 424w, https://substackcdn.com/image/fetch/$s_!qChp!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24f0f9fc-4bad-41ea-9fdd-f15208aaf3bf_2852x1745.png 848w, https://substackcdn.com/image/fetch/$s_!qChp!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24f0f9fc-4bad-41ea-9fdd-f15208aaf3bf_2852x1745.png 1272w, https://substackcdn.com/image/fetch/$s_!qChp!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24f0f9fc-4bad-41ea-9fdd-f15208aaf3bf_2852x1745.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!qChp!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24f0f9fc-4bad-41ea-9fdd-f15208aaf3bf_2852x1745.png" width="1456" height="891" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/24f0f9fc-4bad-41ea-9fdd-f15208aaf3bf_2852x1745.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:891,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:2645360,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/160793924?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24f0f9fc-4bad-41ea-9fdd-f15208aaf3bf_2852x1745.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!qChp!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24f0f9fc-4bad-41ea-9fdd-f15208aaf3bf_2852x1745.png 424w, https://substackcdn.com/image/fetch/$s_!qChp!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24f0f9fc-4bad-41ea-9fdd-f15208aaf3bf_2852x1745.png 848w, https://substackcdn.com/image/fetch/$s_!qChp!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24f0f9fc-4bad-41ea-9fdd-f15208aaf3bf_2852x1745.png 1272w, https://substackcdn.com/image/fetch/$s_!qChp!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F24f0f9fc-4bad-41ea-9fdd-f15208aaf3bf_2852x1745.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p></p><blockquote><p>The <strong>Designing with AI newsletter</strong> just hit 6,000 subscribers. Thank you! </p></blockquote><p>I spend an awful lot of my free time <a href="https://newsletter.victordibia.com/archive?sort=top">writing about AI</a> - late nights, weekends, whenever my 6-year-old is asleep (his time is precious!). For the past decade, I've built interactive prototypes and written mostly for a technical audience - software engineers and researchers on my <a href="https://victordibia.com/">personal website</a> and now on <a href="https://newsletter.victordibia.com/">Substack</a>.  As examples -  I <a href="https://victordibia.com/projects/">built</a>  the first interactive demo showing how to <a href="https://anomagram.fastforwardlabs.com/#/">train deep neural networks for anomaly detection in the browser using TensorFlow.js</a>, how to train a Generative Adversarial Network from Scratch for <a href="https://medium.com/data-science/african-masks-gans-tpu-9a6b0cf3105c">African Mask Art generation (GANs, TPUs) </a>and interactive demos showing  how <a href="https://convnetplayground.fastforwardlabs.com/#/">Convolutional Neural Networks work for semantic image search</a> etc. The goal here was mainly to help <em>technical audience</em>s <em>learn new technical things</em>.</p><p>But in the last ~3 years, <em>something has changed</em>. Capable, complex AI has become <em>commoditized</em>. ChatGPT, Sora, Veo - these tools have brought topics I once explained to engineers and researchers into the mainstream. Suddenly, non-technical users are <em>interacting with AI daily</em>, people with no background in software engineering are now <a href="https://simonwillison.net/2025/Mar/19/vibe-coding/">&#8220;vibe coding&#8221; apps</a>,  and teams are <a href="https://newsletter.victordibia.com/p/you-have-ai-fatigue-thats-why-you">scrambling</a> to integrate this technology into everyday tools.  Furthermore, AI systems are becoming more <em>agentic</em> - i.e., they can perform actions (e.g., send emails, reserve tickets, write code) autonomously on behalf of a user.</p><p>Advanced AI (by <em>some</em> standards) itself is becoming <em><a href="https://www.aisnakeoil.com/p/ai-as-normal-technology">normal</a></em><a href="https://www.aisnakeoil.com/p/ai-as-normal-technology"> technology</a>.</p><p>From my vantage point (as someone who works in the AI field), I can't help but see many side effects to this rapid shift. Specifically, I think that a rapid shift <em>unaccompanied by adequate preparation</em> could be problematic. One analogy that comes to mind is giving a 16-year-old a <a href="https://en.wikipedia.org/wiki/LaFerrari">Ferrari LaFerrari</a> a month after he gets his driver's license. Now the Ferrari itself (an incredibly fast and sophisticated car) is not problematic. However, the teenager's lack of   experience, judgment, and training/education inherently makes the situation problematic. <br><br>Similarly, the proliferation of AI should be accompanied by a proliferation of AI literacy and education to enable effective and responsible use. Deep down, <em><strong>this is why I write</strong></em>.</p><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://newsletter.victordibia.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Designing with AI is a reader-supported publication. To receive new posts and support my work, consider becoming a free or paid subscriber.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div><div><hr></div><p>  </p><h2>Why I Write About AI</h2><p>If I could <em>bucket</em> the reasons I write, I&#8217;d organize them into the following 4 areas.  </p><h3>1. AI Literacy </h3><p>I think that like internet or digital literacy, <em><strong>AI literacy</strong></em> - how to successfully and efficiently use AI for any and all tasks - <strong>will be critical</strong>.  If we agree that AI will drive an increasing slice of decisions made by the general population, then it is important that the <em>general population understand how AI works</em>. It can be as simple as highlighting the limitations of generative AI models today (hallucinations etc) to under understanding how to effectively use AI (as a tool) for specific tasks. </p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;bc60ea61-b8f7-4da9-a030-a0bb39d106c8&quot;,&quot;caption&quot;:&quot;It's no longer a question of whether to use GenAI tools, but how to use them effectively. Just as digital literacy was crucial for navigating the internet era, AI literacy is becoming essential for engineers to achieve equitable outcomes from AI tools.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;How to Effectively Use Generative AI for Software Engineering Tasks!&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:85692678,&quot;name&quot;:&quot;Victor Dibia, PhD&quot;,&quot;bio&quot;:&quot;Hacker, Research Scientist (Microsoft Research), Author working on and writing about Generative AI, Agents. Core maintainer for the AutoGen Multi-Agent Framework (35k Stars on GitHub). Previously at Cloudera, IBM Research. All views are my own.&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/27536f63-7d8e-48dc-b34c-150acfacdc8b_1726x1396.jpeg&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2024-11-25T15:20:16.450Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F54d55546-d64d-491e-bada-43150eb40014_2446x1344.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://newsletter.victordibia.com/p/developers-stop-asking-llms-genai&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:143432589,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:22,&quot;comment_count&quot;:1,&quot;publication_id&quot;:null,&quot;publication_name&quot;:&quot;Designing with AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fff160652-8bbb-475e-80a7-ba4eb5f80dcb_504x504.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;243d9730-9bdf-49e5-96f3-5b457f358c93&quot;,&quot;caption&quot;:&quot;I have spent time building with LLMs over the last 3 years (contributed to GitHub CoPilot), and this post covers some techniques I have used to make systems built with these models more reliable.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Practical Steps to Reduce Hallucination and Improve Performance of Systems Built with Large Language Models &quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:85692678,&quot;name&quot;:&quot;Victor Dibia, PhD&quot;,&quot;bio&quot;:&quot;Hacker, Research Scientist (Microsoft Research), Author working on and writing about Generative AI, Agents. Core maintainer for the AutoGen Multi-Agent Framework (35k Stars on GitHub). Previously at Cloudera, IBM Research. All views are my own.&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/27536f63-7d8e-48dc-b34c-150acfacdc8b_1726x1396.jpeg&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2023-02-14T16:21:03.534Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F52d9206d-babb-46a5-864c-b668249280f3_1710x923.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://newsletter.victordibia.com/p/practical-steps-to-reduce-hallucination&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:101087966,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:47,&quot;comment_count&quot;:9,&quot;publication_id&quot;:null,&quot;publication_name&quot;:&quot;Designing with AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fff160652-8bbb-475e-80a7-ba4eb5f80dcb_504x504.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><p></p><h3>2. Developers Need to Understand How to Build with AI</h3><p>All engineers should understand how to build with AI and integrate AI into solutions. <a href="https://newsletter.victordibia.com/p/developers-stop-asking-llms-genai">AI enabled software engineering</a> is now a thing. It is changing how software is build and consequently the expectations of employers.<br>IMO,  developers that don&#8217;t use AI for software engineering tasks are likely behind.<br>Recently, the CEO of Shopify declared that employees must <a href="https://x.com/tobi/status/1909251946235437514">reflexively use AI</a>.  A quote is below:</p><blockquote><p><strong>Before asking for more Headcount and resources</strong>, teams must demonstrate why they cannot get what they want done using AI. What would this area look like if autonomous AI agents were already part of the team? This question can lead to really fun discussions and projects.</p></blockquote><p>It behooves all engineers to at least understand how AI and AI agents work.  </p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;c9b0d7eb-fee1-4397-b663-feac2aad7aa7&quot;,&quot;caption&quot;:&quot;AutoGen is a framework for building multi-agent applications - and we recently released a new v0.4 version - a complete rewrite of the framework.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;A Friendly Introduction to the AutoGen Framework (v0.4 API) &quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:85692678,&quot;name&quot;:&quot;Victor Dibia, PhD&quot;,&quot;bio&quot;:&quot;Hacker, Research Scientist (Microsoft Research), Author working on and writing about Generative AI, Agents. Core maintainer for the AutoGen Multi-Agent Framework (35k Stars on GitHub). Previously at Cloudera, IBM Research. All views are my own.&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/27536f63-7d8e-48dc-b34c-150acfacdc8b_1726x1396.jpeg&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-02-04T15:31:29.824Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbd6fd990-9254-47ef-861f-194767e6609e_3329x1695.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://newsletter.victordibia.com/p/a-friendly-introduction-to-the-autogen&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:156440663,&quot;type&quot;:&quot;podcast&quot;,&quot;reaction_count&quot;:15,&quot;comment_count&quot;:0,&quot;publication_id&quot;:null,&quot;publication_name&quot;:&quot;Designing with AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fff160652-8bbb-475e-80a7-ba4eb5f80dcb_504x504.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;40ed117a-5f71-47e7-bb30-a2abc3e5f60c&quot;,&quot;caption&quot;:&quot;In a world crowded with dozens of multi-agent frameworks, a critical question for engineers, consultants or teams looking to build agentic AI applications is:&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;10 Ways to Critically Evaluate and Select the Right Multi-Agent Framework&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:85692678,&quot;name&quot;:&quot;Victor Dibia, PhD&quot;,&quot;bio&quot;:&quot;Hacker, Research Scientist (Microsoft Research), Author working on and writing about Generative AI, Agents. Core maintainer for the AutoGen Multi-Agent Framework (35k Stars on GitHub). Previously at Cloudera, IBM Research. All views are my own.&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/27536f63-7d8e-48dc-b34c-150acfacdc8b_1726x1396.jpeg&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-04-14T14:31:43.839Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F26eab399-8733-4254-bfe6-f78c085ff114_2852x1745.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://newsletter.victordibia.com/p/10-ways-to-compare-and-select-the&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:160901456,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:22,&quot;comment_count&quot;:0,&quot;publication_id&quot;:null,&quot;publication_name&quot;:&quot;Designing with AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fff160652-8bbb-475e-80a7-ba4eb5f80dcb_504x504.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;366e1c5f-77d2-47ab-acda-918abf6b79d4&quot;,&quot;caption&quot;:&quot;Update: I am writing a book on this topic published by Manning Publications. Take a look and preorder here - https://mng.bz/eVP9&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Multi-Agent LLM Applications | A Review of Current Research, Tools, and Challenges&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:85692678,&quot;name&quot;:&quot;Victor Dibia, PhD&quot;,&quot;bio&quot;:&quot;Hacker, Research Scientist (Microsoft Research), Author working on and writing about Generative AI, Agents. Core maintainer for the AutoGen Multi-Agent Framework (35k Stars on GitHub). Previously at Cloudera, IBM Research. All views are my own.&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/27536f63-7d8e-48dc-b34c-150acfacdc8b_1726x1396.jpeg&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2023-12-19T20:08:35.162Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F93af1f8f-8039-48a4-9517-f4975042c98c_1836x1223.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://newsletter.victordibia.com/p/multi-agent-llm-applications-a-review&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:137700259,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:85,&quot;comment_count&quot;:12,&quot;publication_id&quot;:null,&quot;publication_name&quot;:&quot;Designing with AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fff160652-8bbb-475e-80a7-ba4eb5f80dcb_504x504.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><p></p><h3>3. AI Can Have Adverse (and Unintended) Side Effects</h3><p>AI can have adverse side effects. Some of these are subtle (e.g., <a href="https://newsletter.victordibia.com/p/you-have-ai-fatigue-thats-why-you">AI fatigue</a>) while others could lead to potentially catastrophic outcomes (AI that becomes <a href="https://newsletter.victordibia.com/p/on-sycophant-ai-unpacking-the-yes">severely misaligned</a>). Understanding these risks is essential for responsible development and deployment. <br><br>A while ago, I <a href="https://newsletter.victordibia.com/p/on-sycophant-ai-unpacking-the-yes">reflected</a> on an incident in which OpenAI released a model that was sycophantic and how naive revenue/product signal optimization directly leads to such adverse (mostly unintended) outcomes. </p><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;89aff99e-7fd7-4838-8d98-1d2f3cc8f430&quot;,&quot;caption&quot;:&quot;In 2022, an engineer was placed on administrative leave (fired) mostly due to his claims that a Generative AI model (LaMDA) was sentient.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;On Sycophant AI: Unpacking the Yes-Machine Crisis&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:85692678,&quot;name&quot;:&quot;Victor Dibia, PhD&quot;,&quot;bio&quot;:&quot;Hacker, Research Scientist (Microsoft Research), Author working on and writing about Generative AI, Agents. Core maintainer for the AutoGen Multi-Agent Framework (35k Stars on GitHub). Previously at Cloudera, IBM Research. All views are my own.&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/27536f63-7d8e-48dc-b34c-150acfacdc8b_1726x1396.jpeg&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-05-05T13:35:50.201Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc888b42c-71b8-453f-8d56-9dde5e9b420b_1901x1193.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://newsletter.victordibia.com/p/on-sycophant-ai-unpacking-the-yes&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:162271179,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:6,&quot;comment_count&quot;:0,&quot;publication_id&quot;:null,&quot;publication_name&quot;:&quot;Designing with AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fff160652-8bbb-475e-80a7-ba4eb5f80dcb_504x504.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;9bca5801-2f15-40d2-82ac-92023e247a5c&quot;,&quot;caption&quot;:&quot;Here&#8217;s a thought experiment:&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;How will AI Impact Academic Research (and Publishing) by 2027?&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:85692678,&quot;name&quot;:&quot;Victor Dibia, PhD&quot;,&quot;bio&quot;:&quot;Hacker, Research Scientist (Microsoft Research), Author working on and writing about Generative AI, Agents. Core maintainer for the AutoGen Multi-Agent Framework (35k Stars on GitHub). Previously at Cloudera, IBM Research. All views are my own.&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/27536f63-7d8e-48dc-b34c-150acfacdc8b_1726x1396.jpeg&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-05-14T15:02:23.084Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F12e99b91-3c95-4715-a35a-209c44e105e5_3871x2745.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://newsletter.victordibia.com/p/how-will-ai-impact-academic-research&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:163524799,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:8,&quot;comment_count&quot;:2,&quot;publication_id&quot;:null,&quot;publication_name&quot;:&quot;Designing with AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fff160652-8bbb-475e-80a7-ba4eb5f80dcb_504x504.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;6888f0d3-6f03-4f94-91da-21a79f10a6d3&quot;,&quot;caption&quot;:&quot;Generative AI models can now write code. There is an uptick in AI-Assisted software engineering with tools like GitHub Copilot showing a 180% year-over-year adoption increase and revenue run rate of $2 billion over the last 2 years. From simple functions that an LLM can directly generate (reverse a string), to medium complexity apps that are now achievable via multi-agent systems (see&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;Will Generative AI Replace Software Engineers? &quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:85692678,&quot;name&quot;:&quot;Victor Dibia, PhD&quot;,&quot;bio&quot;:&quot;Hacker, Research Scientist (Microsoft Research), Author working on and writing about Generative AI, Agents. Core maintainer for the AutoGen Multi-Agent Framework (35k Stars on GitHub). Previously at Cloudera, IBM Research. All views are my own.&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/27536f63-7d8e-48dc-b34c-150acfacdc8b_1726x1396.jpeg&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2024-08-20T15:31:05.834Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd77f9bd8-fe20-4415-8067-14e35fafdfc1_2003x1387.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://newsletter.victordibia.com/p/will-generative-ai-replace-software&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:106997987,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:10,&quot;comment_count&quot;:0,&quot;publication_id&quot;:null,&quot;publication_name&quot;:&quot;Designing with AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fff160652-8bbb-475e-80a7-ba4eb5f80dcb_504x504.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><p></p><h3>4. The UX of Agentic AI is Unknown</h3><p>The core ways in which humans interface with increasingly autonomous or agentic AI is <em>unknown</em>. If AI driven software can reason through novel problems, act on human behalf (agents), how do we build interfaces that keep the human in the loop, provide visibility into agent action, facilitate approvals at the right level of granularity, how do we build experiences that do not diminish critical thinking? etc.  We will only discover this unknown territory by building, experimenting and writing.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!sMGr!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc02f7082-8326-4645-830f-4ed8ba8d7d6d_2496x2082.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!sMGr!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc02f7082-8326-4645-830f-4ed8ba8d7d6d_2496x2082.png 424w, https://substackcdn.com/image/fetch/$s_!sMGr!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc02f7082-8326-4645-830f-4ed8ba8d7d6d_2496x2082.png 848w, https://substackcdn.com/image/fetch/$s_!sMGr!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc02f7082-8326-4645-830f-4ed8ba8d7d6d_2496x2082.png 1272w, https://substackcdn.com/image/fetch/$s_!sMGr!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc02f7082-8326-4645-830f-4ed8ba8d7d6d_2496x2082.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!sMGr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc02f7082-8326-4645-830f-4ed8ba8d7d6d_2496x2082.png" width="1456" height="1215" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c02f7082-8326-4645-830f-4ed8ba8d7d6d_2496x2082.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1215,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:null,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:null,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!sMGr!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc02f7082-8326-4645-830f-4ed8ba8d7d6d_2496x2082.png 424w, https://substackcdn.com/image/fetch/$s_!sMGr!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc02f7082-8326-4645-830f-4ed8ba8d7d6d_2496x2082.png 848w, https://substackcdn.com/image/fetch/$s_!sMGr!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc02f7082-8326-4645-830f-4ed8ba8d7d6d_2496x2082.png 1272w, https://substackcdn.com/image/fetch/$s_!sMGr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc02f7082-8326-4645-830f-4ed8ba8d7d6d_2496x2082.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;ad58176f-b585-4d1a-90b2-51d591884ef0&quot;,&quot;caption&quot;:&quot;I gave a keynote talk at the 2025 AI.Engineer conference - &#8220;UX Design Principles for Semi-Autonomous Multi-Agent Systems&#8221;&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;4 UX Design Principles for Autonomous Multi-Agent AI Systems&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:85692678,&quot;name&quot;:&quot;Victor Dibia, PhD&quot;,&quot;bio&quot;:&quot;Hacker, Research Scientist (Microsoft Research), Author working on and writing about Generative AI, Agents. Core maintainer for the AutoGen Multi-Agent Framework (35k Stars on GitHub). Previously at Cloudera, IBM Research. All views are my own.&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/27536f63-7d8e-48dc-b34c-150acfacdc8b_1726x1396.jpeg&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-06-06T21:58:01.919Z&quot;,&quot;cover_image&quot;:&quot;https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc02f7082-8326-4645-830f-4ed8ba8d7d6d_2496x2082.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://newsletter.victordibia.com/p/4-ux-design-principles-for-multi&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:146526830,&quot;type&quot;:&quot;newsletter&quot;,&quot;reaction_count&quot;:11,&quot;comment_count&quot;:0,&quot;publication_id&quot;:null,&quot;publication_name&quot;:&quot;Designing with AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fff160652-8bbb-475e-80a7-ba4eb5f80dcb_504x504.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><div class="digest-post-embed" data-attrs="{&quot;nodeId&quot;:&quot;f8737b1c-9ec1-40fa-9692-f14fe7fa76d6&quot;,&quot;caption&quot;:&quot;I am quite excited to announce an official release of a new version AutoGen Studio (AGS) - a low code tool for prototyping and debugging multi-agent systems. This post is meant to provide an overview of features and what you can do with the tool.&quot;,&quot;cta&quot;:&quot;Read full story&quot;,&quot;showBylines&quot;:true,&quot;size&quot;:&quot;sm&quot;,&quot;isEditorNode&quot;:true,&quot;title&quot;:&quot;A Friendly Introduction to AutoGen Studio (v0.4) - A Low-Code Tool for Prototyping Multi-Agent Systems&quot;,&quot;publishedBylines&quot;:[{&quot;id&quot;:85692678,&quot;name&quot;:&quot;Victor Dibia, PhD&quot;,&quot;bio&quot;:&quot;Hacker, Research Scientist (Microsoft Research), Author working on and writing about Generative AI, Agents. Core maintainer for the AutoGen Multi-Agent Framework (35k Stars on GitHub). Previously at Cloudera, IBM Research. All views are my own.&quot;,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/27536f63-7d8e-48dc-b34c-150acfacdc8b_1726x1396.jpeg&quot;,&quot;is_guest&quot;:false,&quot;bestseller_tier&quot;:null}],&quot;post_date&quot;:&quot;2025-02-07T16:48:19.512Z&quot;,&quot;cover_image&quot;:&quot;https://substack-video.s3.amazonaws.com/video_upload/post/156632457/f8c1cc93-afad-4b3b-888e-b5734a698236/transcoded-1738885380.png&quot;,&quot;cover_image_alt&quot;:null,&quot;canonical_url&quot;:&quot;https://newsletter.victordibia.com/p/autogen-studio-v04-a-no-code-tool&quot;,&quot;section_name&quot;:null,&quot;video_upload_id&quot;:null,&quot;id&quot;:156632457,&quot;type&quot;:&quot;podcast&quot;,&quot;reaction_count&quot;:8,&quot;comment_count&quot;:0,&quot;publication_id&quot;:null,&quot;publication_name&quot;:&quot;Designing with AI&quot;,&quot;publication_logo_url&quot;:&quot;https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fff160652-8bbb-475e-80a7-ba4eb5f80dcb_504x504.png&quot;,&quot;belowTheFold&quot;:true,&quot;youtube_url&quot;:null,&quot;show_links&quot;:null,&quot;feed_url&quot;:null}"></div><p></p><h3>Bonus: Writing for Myself</h3><p>Beyond the broader <em>external</em> goals I've outlined, I also write for deeply personal reasons. Writing forces me to clarify my thinking - transforming scattered thoughts into coherent arguments that are far more refined than anything I could produce in casual conversation. It also creates a valuable resource I can draw from repeatedly, whether I'm speaking at conferences, advising teams, or simply discussing these topics with colleagues.</p><p></p><h2>.. You Should Write Too!</h2><p>With all these points listed, I sincerely believe there just cannot be "enough" writing about AI. Sometimes, it can be hard to reason about the mechanics of social network theory - how multiple individuals can write about the <em>same</em> thing but hit completely different audiences. How you just might be the only source of that type of information within your network, might have a unique perspective and be right person to share. </p><blockquote><p><em><strong>You should write too.</strong></em></p></blockquote><p>All of this is to say - <strong>you should write too</strong>! </p><p>I have a long list of folks who actively write about the AI space that I admire and find their work useful - <span class="mention-wrap" data-attrs="{&quot;name&quot;:&quot;Nathan Lambert&quot;,&quot;id&quot;:10472909,&quot;type&quot;:&quot;user&quot;,&quot;url&quot;:null,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8fedcdfb-e137-4f6a-9089-a46add6c6242_500x500.jpeg&quot;,&quot;uuid&quot;:&quot;8ca64476-dd57-4e61-87b8-18663f832f77&quot;}" data-component-name="MentionToDOM"></span> (interconnects) , <span class="mention-wrap" data-attrs="{&quot;name&quot;:&quot;Charlie Guo&quot;,&quot;id&quot;:3625174,&quot;type&quot;:&quot;user&quot;,&quot;url&quot;:null,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7c9d1a4c-3e17-4463-9b75-8898d2565caa_800x800.jpeg&quot;,&quot;uuid&quot;:&quot;a7ee3187-fb28-4151-b5ca-2ae6172fc5ca&quot;}" data-component-name="MentionToDOM"></span> (artificial ignorance), <span class="mention-wrap" data-attrs="{&quot;name&quot;:&quot;Miles Brundage&quot;,&quot;id&quot;:280070124,&quot;type&quot;:&quot;user&quot;,&quot;url&quot;:null,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4d0f2d21-d8c6-4214-887c-158f16d163fc_144x144.png&quot;,&quot;uuid&quot;:&quot;bea0b3e0-3548-4119-bec9-9a7c86f7f53e&quot;}" data-component-name="MentionToDOM"></span>  , <span class="mention-wrap" data-attrs="{&quot;name&quot;:&quot;Grigory Sapunov&quot;,&quot;id&quot;:1253653,&quot;type&quot;:&quot;user&quot;,&quot;url&quot;:null,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d6fbffec-4136-4362-9c2c-55b75554795a_47x48.jpeg&quot;,&quot;uuid&quot;:&quot;00e2078b-bf90-42c8-8fae-889c56358484&quot;}" data-component-name="MentionToDOM"></span> (<a href="https://gonzoml.substack.com/?utm_source=mention&amp;utm_content=writes">Gonzo ML</a>), Swyx <span class="mention-wrap" data-attrs="{&quot;name&quot;:&quot;Latent.Space&quot;,&quot;id&quot;:89230629,&quot;type&quot;:&quot;user&quot;,&quot;url&quot;:null,&quot;photo_url&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/703cf3dd-3bab-4f7b-86fa-f4443f15f8a4_152x152.jpeg&quot;,&quot;uuid&quot;:&quot;f4e2f53d-635a-4c0f-8bed-0f498b3970b3&quot;}" data-component-name="MentionToDOM"></span> and a few others. Definitely follow their work!</p><p>I keep coming back to <a href="https://milesbrundage.substack.com/p/some-very-important-things-that-i">this article</a> by  <span class="mention-wrap" data-attrs="{&quot;name&quot;:&quot;Miles Brundage&quot;,&quot;id&quot;:280070124,&quot;type&quot;:&quot;user&quot;,&quot;url&quot;:null,&quot;photo_url&quot;:&quot;https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4d0f2d21-d8c6-4214-887c-158f16d163fc_144x144.png&quot;,&quot;uuid&quot;:&quot;8febe8f8-6edf-4bf3-b24b-ad12873791b7&quot;}" data-component-name="MentionToDOM"></span> on important things to work on. Miles advocates for more work on AI literacy, demonstrations that clearly illustrate the increasingly complex (as well as dangerous) capabilities of AI, tools to inspect and make sense of agentic AI, the need for policy, amongst other topics. And I agree with all these points.</p><p>And to the 6000 of you who have found my writing worth following, <strong>thank you</strong> and cheers to co-navigating the AI journey ahead. </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!AnIU!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe45ef579-b048-4715-a0ee-bdc36f6de0b9_1692x1270.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!AnIU!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe45ef579-b048-4715-a0ee-bdc36f6de0b9_1692x1270.png 424w, https://substackcdn.com/image/fetch/$s_!AnIU!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe45ef579-b048-4715-a0ee-bdc36f6de0b9_1692x1270.png 848w, https://substackcdn.com/image/fetch/$s_!AnIU!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe45ef579-b048-4715-a0ee-bdc36f6de0b9_1692x1270.png 1272w, https://substackcdn.com/image/fetch/$s_!AnIU!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe45ef579-b048-4715-a0ee-bdc36f6de0b9_1692x1270.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!AnIU!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe45ef579-b048-4715-a0ee-bdc36f6de0b9_1692x1270.png" width="1456" height="1093" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/e45ef579-b048-4715-a0ee-bdc36f6de0b9_1692x1270.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1093,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:195990,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/160793924?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe45ef579-b048-4715-a0ee-bdc36f6de0b9_1692x1270.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!AnIU!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe45ef579-b048-4715-a0ee-bdc36f6de0b9_1692x1270.png 424w, https://substackcdn.com/image/fetch/$s_!AnIU!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe45ef579-b048-4715-a0ee-bdc36f6de0b9_1692x1270.png 848w, https://substackcdn.com/image/fetch/$s_!AnIU!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe45ef579-b048-4715-a0ee-bdc36f6de0b9_1692x1270.png 1272w, https://substackcdn.com/image/fetch/$s_!AnIU!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe45ef579-b048-4715-a0ee-bdc36f6de0b9_1692x1270.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p></p><p></p><p></p><p> </p><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://newsletter.victordibia.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Designing with AI is a reader-supported publication. To receive new posts and support my work, consider becoming a free or paid subscriber.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div>]]></content:encoded></item><item><title><![CDATA[4 UX Design Principles for Autonomous Multi-Agent AI Systems]]></title><description><![CDATA[#42 | A summary from a keynote talk I gave at the 2025 AI.Engineer World's Fair Conference.]]></description><link>https://newsletter.victordibia.com/p/4-ux-design-principles-for-multi</link><guid isPermaLink="false">https://newsletter.victordibia.com/p/4-ux-design-principles-for-multi</guid><dc:creator><![CDATA[Victor Dibia, PhD]]></dc:creator><pubDate>Fri, 06 Jun 2025 21:58:01 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!sMGr!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc02f7082-8326-4645-830f-4ed8ba8d7d6d_2496x2082.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!sMGr!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc02f7082-8326-4645-830f-4ed8ba8d7d6d_2496x2082.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!sMGr!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc02f7082-8326-4645-830f-4ed8ba8d7d6d_2496x2082.png 424w, https://substackcdn.com/image/fetch/$s_!sMGr!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc02f7082-8326-4645-830f-4ed8ba8d7d6d_2496x2082.png 848w, https://substackcdn.com/image/fetch/$s_!sMGr!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc02f7082-8326-4645-830f-4ed8ba8d7d6d_2496x2082.png 1272w, https://substackcdn.com/image/fetch/$s_!sMGr!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc02f7082-8326-4645-830f-4ed8ba8d7d6d_2496x2082.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!sMGr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc02f7082-8326-4645-830f-4ed8ba8d7d6d_2496x2082.png" width="1456" height="1215" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c02f7082-8326-4645-830f-4ed8ba8d7d6d_2496x2082.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1215,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:5235978,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/146526830?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc02f7082-8326-4645-830f-4ed8ba8d7d6d_2496x2082.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!sMGr!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc02f7082-8326-4645-830f-4ed8ba8d7d6d_2496x2082.png 424w, https://substackcdn.com/image/fetch/$s_!sMGr!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc02f7082-8326-4645-830f-4ed8ba8d7d6d_2496x2082.png 848w, https://substackcdn.com/image/fetch/$s_!sMGr!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc02f7082-8326-4645-830f-4ed8ba8d7d6d_2496x2082.png 1272w, https://substackcdn.com/image/fetch/$s_!sMGr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc02f7082-8326-4645-830f-4ed8ba8d7d6d_2496x2082.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption"><strong>4 UX Design Principles for Autonomous Multi-Agent AI Systems</strong></figcaption></figure></div><p>I gave a keynote talk at the <a href="https://www.ai.engineer/schedule">2025 AI.Engineer</a> conference - &#8220;UX Design Principles for Semi-Autonomous Multi-Agent Systems&#8221;</p><p>It was a short talk and my goal was to distill a set of UX design principles that developers  could apply while building experiences for autonomous multi-agent systems. It is largely based on my experience prototyping multi-agent systems,  <a href="https://newsletter.victordibia.com/p/a-friendly-introduction-to-the-autogen">maintaining AutoGen</a> (a framework for building multi-agent apps), as well as  building and maintaining <a href="https://newsletter.victordibia.com/p/autogen-studio-v04-a-no-code-tool">AutoGen Studio</a> - a low code interface for prototyping multi-agent applications.</p><p>Also, to illustrate these ideas, I built a multi-agent system from scratch (no frameworks, just pure Python and OpenAI calls) - <a href="https://github.com/victordibia/blenderlm">BlenderLM</a>. <a href="https://github.com/victordibia/blenderlm">BlenderLM</a> is a multi-agent system that can take a natural language query, and drive the Blender 3D creation tool (via its Python api) in addressing these tasks.</p><div id="youtube2-fmZWvE7yDZo" class="youtube-wrap" data-attrs="{&quot;videoId&quot;:&quot;fmZWvE7yDZo&quot;,&quot;startTime&quot;:null,&quot;endTime&quot;:null}" data-component-name="Youtube2ToDOM"><div class="youtube-inner"><iframe src="https://www.youtube-nocookie.com/embed/fmZWvE7yDZo?rel=0&amp;autoplay=0&amp;showinfo=0&amp;enablejsapi=0" frameborder="0" loading="lazy" gesture="media" allow="autoplay; fullscreen" allowautoplay="true" allowfullscreen="true" width="728" height="409"></iframe></div></div><p>TLDR on the principles I recommended; </p><p>- <strong>Capability discovery</strong> (Help users understand what the agents can do)<br>- <strong>Observability and provenance</strong> (Ensure users can observe/trace agent actions)<br>- <strong>Interruptibility</strong> (Allow users to pause, resume or cancel agent actions)<br>- <strong>Cost-Aware delegation</strong> (Communicate the cost of agent actions, allow users decide when agents can act)</p><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://newsletter.victordibia.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Designing with AI is a reader-supported publication. To receive new posts and support my work, consider becoming a free or paid subscriber.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div><div><hr></div><h2>Beyond Workflows to Multi-Agent Systems </h2><p>I wrote in a <a href="https://newsletter.victordibia.com/p/ai-agents-2024-rewind-a-year-of-building">2024 AI Agent rewind</a> that  most &#8220;agents&#8221; in production are mostly workflows - a set of <em>deterministic steps</em> designed by an engineer with a few LLM calls sprinkled in. While this provides <em>reliability</em> and is indeed the right approach to solve many tasks; they still require that the developer <em>knows the exact solution to the problem</em> and can express that in the outlined steps.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!xGMq!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d65b198-8db6-4d8f-af54-521e93d06a77_2286x1284.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!xGMq!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d65b198-8db6-4d8f-af54-521e93d06a77_2286x1284.png 424w, https://substackcdn.com/image/fetch/$s_!xGMq!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d65b198-8db6-4d8f-af54-521e93d06a77_2286x1284.png 848w, https://substackcdn.com/image/fetch/$s_!xGMq!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d65b198-8db6-4d8f-af54-521e93d06a77_2286x1284.png 1272w, https://substackcdn.com/image/fetch/$s_!xGMq!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d65b198-8db6-4d8f-af54-521e93d06a77_2286x1284.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!xGMq!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d65b198-8db6-4d8f-af54-521e93d06a77_2286x1284.png" width="1456" height="818" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2d65b198-8db6-4d8f-af54-521e93d06a77_2286x1284.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:818,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:3694920,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/146526830?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d65b198-8db6-4d8f-af54-521e93d06a77_2286x1284.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!xGMq!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d65b198-8db6-4d8f-af54-521e93d06a77_2286x1284.png 424w, https://substackcdn.com/image/fetch/$s_!xGMq!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d65b198-8db6-4d8f-af54-521e93d06a77_2286x1284.png 848w, https://substackcdn.com/image/fetch/$s_!xGMq!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d65b198-8db6-4d8f-af54-521e93d06a77_2286x1284.png 1272w, https://substackcdn.com/image/fetch/$s_!xGMq!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2d65b198-8db6-4d8f-af54-521e93d06a77_2286x1284.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>On the other hand, <a href="https://multiagentbook.com/">Multi-Agent Systems</a> where LLMs drive control flow of execution, are suited to problem spaces where the exact solution is unknown. Software that <em>can take actions, observe results, and interactively explore solution spaces</em> - but it also introduces new challenges around reliability, user trust, and system comprehensibility. Some common use cases that are seeing benefits from a MAS approach includes software engineering, back office tasks, deep research. </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!tuNo!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdcba4c08-e99b-4b8b-ad2f-66f74d9f4b8d_2276x1280.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!tuNo!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdcba4c08-e99b-4b8b-ad2f-66f74d9f4b8d_2276x1280.png 424w, https://substackcdn.com/image/fetch/$s_!tuNo!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdcba4c08-e99b-4b8b-ad2f-66f74d9f4b8d_2276x1280.png 848w, https://substackcdn.com/image/fetch/$s_!tuNo!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdcba4c08-e99b-4b8b-ad2f-66f74d9f4b8d_2276x1280.png 1272w, https://substackcdn.com/image/fetch/$s_!tuNo!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdcba4c08-e99b-4b8b-ad2f-66f74d9f4b8d_2276x1280.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!tuNo!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdcba4c08-e99b-4b8b-ad2f-66f74d9f4b8d_2276x1280.png" width="1456" height="819" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/dcba4c08-e99b-4b8b-ad2f-66f74d9f4b8d_2276x1280.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:819,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:3165760,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/146526830?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdcba4c08-e99b-4b8b-ad2f-66f74d9f4b8d_2276x1280.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!tuNo!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdcba4c08-e99b-4b8b-ad2f-66f74d9f4b8d_2276x1280.png 424w, https://substackcdn.com/image/fetch/$s_!tuNo!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdcba4c08-e99b-4b8b-ad2f-66f74d9f4b8d_2276x1280.png 848w, https://substackcdn.com/image/fetch/$s_!tuNo!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdcba4c08-e99b-4b8b-ad2f-66f74d9f4b8d_2276x1280.png 1272w, https://substackcdn.com/image/fetch/$s_!tuNo!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fdcba4c08-e99b-4b8b-ad2f-66f74d9f4b8d_2276x1280.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><blockquote><p>The UX principles discussed here <em>apply to these types of systems</em> - systems with <em>autonomy</em>, ability to <em>act</em> on <em>complex long-running</em> tasks.</p></blockquote><p>IMO there are  properties of MAS to keep in mind that often necessitate careful UX design considerations:</p><ul><li><p><strong>Autonomy</strong> | Can do many <em>different</em> things</p></li><li><p><strong>Action</strong> | Can take action with <em>side effects</em></p></li><li><p><strong>Duration</strong> | Complex long- running tasks</p></li></ul><p></p><h2>Building A Multi-Agent System From Scratch </h2><p>I often see developers start the agent development process  <em>by immediately attempting to code up an Agent class or some agentic behaviors- e.g., selecting the prompt/model, fretting over concepts like memory etc</em>. <strong>This is often a mistake</strong>. </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!MZoX!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e10bb9c-b8f9-427f-ba75-7fb30f374abc_2040x1146.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!MZoX!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e10bb9c-b8f9-427f-ba75-7fb30f374abc_2040x1146.png 424w, https://substackcdn.com/image/fetch/$s_!MZoX!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e10bb9c-b8f9-427f-ba75-7fb30f374abc_2040x1146.png 848w, https://substackcdn.com/image/fetch/$s_!MZoX!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e10bb9c-b8f9-427f-ba75-7fb30f374abc_2040x1146.png 1272w, https://substackcdn.com/image/fetch/$s_!MZoX!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e10bb9c-b8f9-427f-ba75-7fb30f374abc_2040x1146.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!MZoX!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e10bb9c-b8f9-427f-ba75-7fb30f374abc_2040x1146.png" width="1456" height="818" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/0e10bb9c-b8f9-427f-ba75-7fb30f374abc_2040x1146.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:818,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:2584270,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/146526830?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e10bb9c-b8f9-427f-ba75-7fb30f374abc_2040x1146.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!MZoX!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e10bb9c-b8f9-427f-ba75-7fb30f374abc_2040x1146.png 424w, https://substackcdn.com/image/fetch/$s_!MZoX!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e10bb9c-b8f9-427f-ba75-7fb30f374abc_2040x1146.png 848w, https://substackcdn.com/image/fetch/$s_!MZoX!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e10bb9c-b8f9-427f-ba75-7fb30f374abc_2040x1146.png 1272w, https://substackcdn.com/image/fetch/$s_!MZoX!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0e10bb9c-b8f9-427f-ba75-7fb30f374abc_2040x1146.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p> Instead, if we take a step back from the current agent hype, it&#8217;ll become clear that a more typical software development life cycle approach still applies. I tend to follow a 5-stage process :</p><ol><li><p><strong>Goal definition</strong> (what are we building)</p></li><li><p><strong>Baseline</strong> (what is a non-agentic baseline)</p></li><li><p><strong>Tools</strong> (Architect, build and test the tools that the agent will use. You should spend the most time here)</p></li><li><p><strong>Eval testbed</strong> (Interactive e.g., Jupyter notebook, UI or offline benchmark for evaluating systems)</p></li><li><p><strong>Agent</strong> (Only work on this after steps 1-4 above - begin with a tight loop of an LLM calling tools and optimize e.g., add more agents and evaluate)</p></li></ol><blockquote><p>Also about 70% of effort should be split between step 3 and 4 - across building your <strong>tools</strong> and tuning your <strong>evaluation</strong> harness.</p></blockquote><p>One of the reasons for building a demo from scratch for this talk was to enable me to critically think through <em><strong>my process</strong></em>,  as well as validate many of the UX principles I have followed while building and maintaining AutoGen Studio. I built <a href="https://github.com/victordibia/blenderlm">BlenderLM</a> - a sample tool that can take natural language tasks e.g, &#8220;create a scene with two glossy balls&#8221; and uses an agentic system to drive the underlying Blender Python api to accomplish the task. </p><blockquote><p>TLDR; <a href="https://github.com/victordibia/blenderlm">BlenderLM</a> consists of 3 agents. It includes a React UI where the user can  initiate tasks that kick off an agent run with bidirectional communication over a web socket.</p><ul><li><p><strong>Main /Orchestrator Agent</strong>. It has access to a set of task-specific (e.g, create blender object) and general purpose (code execution) tools, and can call other agents in a loop to address a task.</p></li><li><p><strong>Planner Agent</strong>: Takes the task and system state and generates a plan to address the task</p></li><li><p><strong>Verifier Agent</strong>: Takes a task and system state and generates an assessment on if the task is accomplished. This is used with some retry logic to determine when steps in a task are accomplished.</p></li></ul></blockquote><p>The details of BlenderLM will probably be a separate post!</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!AkO5!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07c3bdb2-17e5-4db8-a9bd-9cadfbab91a6_1086x610.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!AkO5!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07c3bdb2-17e5-4db8-a9bd-9cadfbab91a6_1086x610.png 424w, https://substackcdn.com/image/fetch/$s_!AkO5!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07c3bdb2-17e5-4db8-a9bd-9cadfbab91a6_1086x610.png 848w, https://substackcdn.com/image/fetch/$s_!AkO5!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07c3bdb2-17e5-4db8-a9bd-9cadfbab91a6_1086x610.png 1272w, https://substackcdn.com/image/fetch/$s_!AkO5!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07c3bdb2-17e5-4db8-a9bd-9cadfbab91a6_1086x610.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!AkO5!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07c3bdb2-17e5-4db8-a9bd-9cadfbab91a6_1086x610.png" width="1086" height="610" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/07c3bdb2-17e5-4db8-a9bd-9cadfbab91a6_1086x610.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:610,&quot;width&quot;:1086,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:905382,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/146526830?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07c3bdb2-17e5-4db8-a9bd-9cadfbab91a6_1086x610.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!AkO5!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07c3bdb2-17e5-4db8-a9bd-9cadfbab91a6_1086x610.png 424w, https://substackcdn.com/image/fetch/$s_!AkO5!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07c3bdb2-17e5-4db8-a9bd-9cadfbab91a6_1086x610.png 848w, https://substackcdn.com/image/fetch/$s_!AkO5!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07c3bdb2-17e5-4db8-a9bd-9cadfbab91a6_1086x610.png 1272w, https://substackcdn.com/image/fetch/$s_!AkO5!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07c3bdb2-17e5-4db8-a9bd-9cadfbab91a6_1086x610.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p></p><h2>Design Principles</h2><p>The <strong>4 principles</strong> below are <em>not exhaustive</em>, but have pretty high coverage. Multi-agent systems are still early and it is likely the ideas below will be revised as the space evolves. </p><h3>Capability Discovery</h3><p>Help users understand what the agents can do</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!D2eV!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1baf461b-9505-44ac-9ff1-e19e09e0d021_2276x1280.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!D2eV!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1baf461b-9505-44ac-9ff1-e19e09e0d021_2276x1280.png 424w, https://substackcdn.com/image/fetch/$s_!D2eV!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1baf461b-9505-44ac-9ff1-e19e09e0d021_2276x1280.png 848w, https://substackcdn.com/image/fetch/$s_!D2eV!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1baf461b-9505-44ac-9ff1-e19e09e0d021_2276x1280.png 1272w, https://substackcdn.com/image/fetch/$s_!D2eV!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1baf461b-9505-44ac-9ff1-e19e09e0d021_2276x1280.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!D2eV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1baf461b-9505-44ac-9ff1-e19e09e0d021_2276x1280.png" width="1456" height="819" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/1baf461b-9505-44ac-9ff1-e19e09e0d021_2276x1280.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:819,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:3987776,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/146526830?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1baf461b-9505-44ac-9ff1-e19e09e0d021_2276x1280.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!D2eV!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1baf461b-9505-44ac-9ff1-e19e09e0d021_2276x1280.png 424w, https://substackcdn.com/image/fetch/$s_!D2eV!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1baf461b-9505-44ac-9ff1-e19e09e0d021_2276x1280.png 848w, https://substackcdn.com/image/fetch/$s_!D2eV!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1baf461b-9505-44ac-9ff1-e19e09e0d021_2276x1280.png 1272w, https://substackcdn.com/image/fetch/$s_!D2eV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1baf461b-9505-44ac-9ff1-e19e09e0d021_2276x1280.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Agents have <em>autonomy</em>. This <em>implies</em> that they can do <em>many</em> things. In reality, each agent has specific configurations&#8212;system prompts, available tools, and internal logic&#8212;that make it <em>more reliable</em> at certain types of tasks compared to others. <strong>But the user does not know this</strong>. This creates a discovery problem: users don't know which tasks will work well and which might fail, leading to frustration and mistrust when the agent underperforms on unsuitable tasks. </p><p>This principle advocates that the UX nudges the user towards high reliability task examples or even proactively suggests relevant high reliability tasks given the user&#8217;s context and the system&#8217;s capabilities.</p><p>Many tools already do this well by having a set of sample tasks as presets that the user can select to get started.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!IPC1!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd744c62c-71bc-4cf9-a002-5b20abcbc248_1136x614.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!IPC1!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd744c62c-71bc-4cf9-a002-5b20abcbc248_1136x614.png 424w, https://substackcdn.com/image/fetch/$s_!IPC1!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd744c62c-71bc-4cf9-a002-5b20abcbc248_1136x614.png 848w, https://substackcdn.com/image/fetch/$s_!IPC1!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd744c62c-71bc-4cf9-a002-5b20abcbc248_1136x614.png 1272w, https://substackcdn.com/image/fetch/$s_!IPC1!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd744c62c-71bc-4cf9-a002-5b20abcbc248_1136x614.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!IPC1!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd744c62c-71bc-4cf9-a002-5b20abcbc248_1136x614.png" width="1136" height="614" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d744c62c-71bc-4cf9-a002-5b20abcbc248_1136x614.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:614,&quot;width&quot;:1136,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:358891,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/146526830?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd744c62c-71bc-4cf9-a002-5b20abcbc248_1136x614.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!IPC1!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd744c62c-71bc-4cf9-a002-5b20abcbc248_1136x614.png 424w, https://substackcdn.com/image/fetch/$s_!IPC1!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd744c62c-71bc-4cf9-a002-5b20abcbc248_1136x614.png 848w, https://substackcdn.com/image/fetch/$s_!IPC1!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd744c62c-71bc-4cf9-a002-5b20abcbc248_1136x614.png 1272w, https://substackcdn.com/image/fetch/$s_!IPC1!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd744c62c-71bc-4cf9-a002-5b20abcbc248_1136x614.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">The UI in BlenderLM provides capability presets that nudge the user towards the complexity level of tasks that the tools can handle.</figcaption></figure></div><p></p><h3>Observability and provenance </h3><p>Ensure users can observe/trace agent actions</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!jVEu!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59a71eb4-f2a7-473d-97a2-a7f67577898c_2268x1278.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!jVEu!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59a71eb4-f2a7-473d-97a2-a7f67577898c_2268x1278.png 424w, https://substackcdn.com/image/fetch/$s_!jVEu!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59a71eb4-f2a7-473d-97a2-a7f67577898c_2268x1278.png 848w, https://substackcdn.com/image/fetch/$s_!jVEu!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59a71eb4-f2a7-473d-97a2-a7f67577898c_2268x1278.png 1272w, https://substackcdn.com/image/fetch/$s_!jVEu!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59a71eb4-f2a7-473d-97a2-a7f67577898c_2268x1278.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!jVEu!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59a71eb4-f2a7-473d-97a2-a7f67577898c_2268x1278.png" width="1456" height="820" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/59a71eb4-f2a7-473d-97a2-a7f67577898c_2268x1278.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:820,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:3912773,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/146526830?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59a71eb4-f2a7-473d-97a2-a7f67577898c_2268x1278.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!jVEu!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59a71eb4-f2a7-473d-97a2-a7f67577898c_2268x1278.png 424w, https://substackcdn.com/image/fetch/$s_!jVEu!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59a71eb4-f2a7-473d-97a2-a7f67577898c_2268x1278.png 848w, https://substackcdn.com/image/fetch/$s_!jVEu!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59a71eb4-f2a7-473d-97a2-a7f67577898c_2268x1278.png 1272w, https://substackcdn.com/image/fetch/$s_!jVEu!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59a71eb4-f2a7-473d-97a2-a7f67577898c_2268x1278.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Autonomous agents can explore trajectories that are <em>non-deterministic and only known at run time</em>. Each run with the same input can lead to significantly different trajectories. This makes it important for the end user to observe these trajectories both to build trust that the agent is doing the right thing as well as to learn more about its capabilities and limitations, and improve their task formulation approach.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!oaWA!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F480a3cce-e8b3-4e01-9ca5-69a615442b61_2306x1214.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!oaWA!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F480a3cce-e8b3-4e01-9ca5-69a615442b61_2306x1214.png 424w, https://substackcdn.com/image/fetch/$s_!oaWA!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F480a3cce-e8b3-4e01-9ca5-69a615442b61_2306x1214.png 848w, https://substackcdn.com/image/fetch/$s_!oaWA!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F480a3cce-e8b3-4e01-9ca5-69a615442b61_2306x1214.png 1272w, https://substackcdn.com/image/fetch/$s_!oaWA!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F480a3cce-e8b3-4e01-9ca5-69a615442b61_2306x1214.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!oaWA!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F480a3cce-e8b3-4e01-9ca5-69a615442b61_2306x1214.png" width="1456" height="767" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/480a3cce-e8b3-4e01-9ca5-69a615442b61_2306x1214.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:767,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1169676,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/146526830?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F480a3cce-e8b3-4e01-9ca5-69a615442b61_2306x1214.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!oaWA!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F480a3cce-e8b3-4e01-9ca5-69a615442b61_2306x1214.png 424w, https://substackcdn.com/image/fetch/$s_!oaWA!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F480a3cce-e8b3-4e01-9ca5-69a615442b61_2306x1214.png 848w, https://substackcdn.com/image/fetch/$s_!oaWA!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F480a3cce-e8b3-4e01-9ca5-69a615442b61_2306x1214.png 1272w, https://substackcdn.com/image/fetch/$s_!oaWA!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F480a3cce-e8b3-4e01-9ca5-69a615442b61_2306x1214.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>In BlenderLM, this is implemented by streaming all updates the agents explore - structured plans, the number of steps, current steps, and where appropriate showing duration and cost (LLM tokens) etc. Tools like Agent mode in VS Code/Cursor/WindSurf also do this well by streaming the actions as agents make progress.</p><h3>Interruptibility </h3><p>Allow users to pause, resume or cancel agent actions</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!REGX!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0acc5ec0-dfcc-4523-8686-6b2e4e12139d_1087x611.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!REGX!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0acc5ec0-dfcc-4523-8686-6b2e4e12139d_1087x611.png 424w, https://substackcdn.com/image/fetch/$s_!REGX!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0acc5ec0-dfcc-4523-8686-6b2e4e12139d_1087x611.png 848w, https://substackcdn.com/image/fetch/$s_!REGX!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0acc5ec0-dfcc-4523-8686-6b2e4e12139d_1087x611.png 1272w, https://substackcdn.com/image/fetch/$s_!REGX!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0acc5ec0-dfcc-4523-8686-6b2e4e12139d_1087x611.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!REGX!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0acc5ec0-dfcc-4523-8686-6b2e4e12139d_1087x611.png" width="1087" height="611" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/0acc5ec0-dfcc-4523-8686-6b2e4e12139d_1087x611.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:611,&quot;width&quot;:1087,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1004473,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/146526830?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0acc5ec0-dfcc-4523-8686-6b2e4e12139d_1087x611.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!REGX!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0acc5ec0-dfcc-4523-8686-6b2e4e12139d_1087x611.png 424w, https://substackcdn.com/image/fetch/$s_!REGX!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0acc5ec0-dfcc-4523-8686-6b2e4e12139d_1087x611.png 848w, https://substackcdn.com/image/fetch/$s_!REGX!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0acc5ec0-dfcc-4523-8686-6b2e4e12139d_1087x611.png 1272w, https://substackcdn.com/image/fetch/$s_!REGX!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F0acc5ec0-dfcc-4523-8686-6b2e4e12139d_1087x611.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Autonomous multi-agent systems can <em><strong>run for extended periods</strong></em>, make multiple tool calls, and take actions with real-world resource implications. In some cases, particularly in human-in-the-loop settings, users may observe an expensive or problematic operation trajectory and need to pause the system to provide feedback, course-correct, or cancel entirely. </p><p>This principle advocates for designing systems where users can <strong>interrupt agent execution</strong> at any point, pause long-running tasks, and resume from where they left off without losing progress or system state.</p><p><a href="https://github.com/victordibia/blenderlm">BlenderLM</a> implements this with pause/stop controls that allow users to halt execution at any point during a task.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!eZyh!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07fa2da4-b2ee-4835-999d-3354fc4252dc_2242x1210.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!eZyh!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07fa2da4-b2ee-4835-999d-3354fc4252dc_2242x1210.png 424w, https://substackcdn.com/image/fetch/$s_!eZyh!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07fa2da4-b2ee-4835-999d-3354fc4252dc_2242x1210.png 848w, https://substackcdn.com/image/fetch/$s_!eZyh!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07fa2da4-b2ee-4835-999d-3354fc4252dc_2242x1210.png 1272w, https://substackcdn.com/image/fetch/$s_!eZyh!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07fa2da4-b2ee-4835-999d-3354fc4252dc_2242x1210.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!eZyh!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07fa2da4-b2ee-4835-999d-3354fc4252dc_2242x1210.png" width="1456" height="786" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/07fa2da4-b2ee-4835-999d-3354fc4252dc_2242x1210.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:786,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1132021,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/146526830?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07fa2da4-b2ee-4835-999d-3354fc4252dc_2242x1210.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!eZyh!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07fa2da4-b2ee-4835-999d-3354fc4252dc_2242x1210.png 424w, https://substackcdn.com/image/fetch/$s_!eZyh!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07fa2da4-b2ee-4835-999d-3354fc4252dc_2242x1210.png 848w, https://substackcdn.com/image/fetch/$s_!eZyh!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07fa2da4-b2ee-4835-999d-3354fc4252dc_2242x1210.png 1272w, https://substackcdn.com/image/fetch/$s_!eZyh!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F07fa2da4-b2ee-4835-999d-3354fc4252dc_2242x1210.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p></p><h3>Cost Aware delegation </h3><p>Quantify and communicate the cost of agent actions, allow users to decide when agents can act.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!-mNb!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc399bb44-2332-4df9-9f59-43f00f07c144_1944x1094.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!-mNb!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc399bb44-2332-4df9-9f59-43f00f07c144_1944x1094.png 424w, https://substackcdn.com/image/fetch/$s_!-mNb!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc399bb44-2332-4df9-9f59-43f00f07c144_1944x1094.png 848w, https://substackcdn.com/image/fetch/$s_!-mNb!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc399bb44-2332-4df9-9f59-43f00f07c144_1944x1094.png 1272w, https://substackcdn.com/image/fetch/$s_!-mNb!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc399bb44-2332-4df9-9f59-43f00f07c144_1944x1094.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!-mNb!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc399bb44-2332-4df9-9f59-43f00f07c144_1944x1094.png" width="1456" height="819" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c399bb44-2332-4df9-9f59-43f00f07c144_1944x1094.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:819,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:2665266,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/146526830?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc399bb44-2332-4df9-9f59-43f00f07c144_1944x1094.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!-mNb!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc399bb44-2332-4df9-9f59-43f00f07c144_1944x1094.png 424w, https://substackcdn.com/image/fetch/$s_!-mNb!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc399bb44-2332-4df9-9f59-43f00f07c144_1944x1094.png 848w, https://substackcdn.com/image/fetch/$s_!-mNb!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc399bb44-2332-4df9-9f59-43f00f07c144_1944x1094.png 1272w, https://substackcdn.com/image/fetch/$s_!-mNb!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc399bb44-2332-4df9-9f59-43f00f07c144_1944x1094.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Agents <strong>can act</strong>, and these actions can have <em><strong>side effects</strong></em> that range from <strong>benign</strong> (executing code to add a cone to a 3D scene) to <strong>catastrophic</strong> (executing code that deletes critical system files). Some actions are <em><strong>reversible</strong></em> and can be rolled back, while others cannot e.g., once an email is sent to the CEO, there's no taking it back. To an agent, the <em><strong>costs, risks, and safety implications</strong></em> associated with each action <em>may not be immediately apparent without specific design</em>.</p><p>This principle advocates for implementing specialized <strong>"risk/cost classifiers"</strong> that can assess and return risk levels such as <em>low/medium/high</em>. In turn, the UX should then allow users to <em>control and configure response behavior</em>&#8212;for example, automatically allowing all low-risk actions while requiring explicit approval for medium and high-risk operations. </p><p></p><h2>Key Takeaways</h2><p>A few high-level takeaways. </p><p>Perhaps the first and most important one is to <em><strong>know when to use an autonomous multi-agent approach</strong></em>. I typically offer a complex task perspective i.e., does the task benefit from planning, can steps be decomposed into components that benefit from specialization, does it require processing extensive context that can be siloed to specific agents, and is the environment dynamic (i.e., we need to constantly sample to understand impact of actions).</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!FrZQ!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fec71723c-7268-47d6-86f2-f75874a37792_1940x1094.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!FrZQ!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fec71723c-7268-47d6-86f2-f75874a37792_1940x1094.png 424w, https://substackcdn.com/image/fetch/$s_!FrZQ!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fec71723c-7268-47d6-86f2-f75874a37792_1940x1094.png 848w, https://substackcdn.com/image/fetch/$s_!FrZQ!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fec71723c-7268-47d6-86f2-f75874a37792_1940x1094.png 1272w, https://substackcdn.com/image/fetch/$s_!FrZQ!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fec71723c-7268-47d6-86f2-f75874a37792_1940x1094.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!FrZQ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fec71723c-7268-47d6-86f2-f75874a37792_1940x1094.png" width="1456" height="821" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/ec71723c-7268-47d6-86f2-f75874a37792_1940x1094.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:821,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:2651116,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/146526830?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fec71723c-7268-47d6-86f2-f75874a37792_1940x1094.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!FrZQ!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fec71723c-7268-47d6-86f2-f75874a37792_1940x1094.png 424w, https://substackcdn.com/image/fetch/$s_!FrZQ!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fec71723c-7268-47d6-86f2-f75874a37792_1940x1094.png 848w, https://substackcdn.com/image/fetch/$s_!FrZQ!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fec71723c-7268-47d6-86f2-f75874a37792_1940x1094.png 1272w, https://substackcdn.com/image/fetch/$s_!FrZQ!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fec71723c-7268-47d6-86f2-f75874a37792_1940x1094.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!iyHl!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a98d7e-9df5-4ae8-826f-38a1f4e1cb2a_1086x613.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!iyHl!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a98d7e-9df5-4ae8-826f-38a1f4e1cb2a_1086x613.png 424w, https://substackcdn.com/image/fetch/$s_!iyHl!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a98d7e-9df5-4ae8-826f-38a1f4e1cb2a_1086x613.png 848w, https://substackcdn.com/image/fetch/$s_!iyHl!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a98d7e-9df5-4ae8-826f-38a1f4e1cb2a_1086x613.png 1272w, https://substackcdn.com/image/fetch/$s_!iyHl!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a98d7e-9df5-4ae8-826f-38a1f4e1cb2a_1086x613.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!iyHl!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a98d7e-9df5-4ae8-826f-38a1f4e1cb2a_1086x613.png" width="1086" height="613" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/d3a98d7e-9df5-4ae8-826f-38a1f4e1cb2a_1086x613.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:613,&quot;width&quot;:1086,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:111170,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/146526830?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a98d7e-9df5-4ae8-826f-38a1f4e1cb2a_1086x613.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!iyHl!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a98d7e-9df5-4ae8-826f-38a1f4e1cb2a_1086x613.png 424w, https://substackcdn.com/image/fetch/$s_!iyHl!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a98d7e-9df5-4ae8-826f-38a1f4e1cb2a_1086x613.png 848w, https://substackcdn.com/image/fetch/$s_!iyHl!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a98d7e-9df5-4ae8-826f-38a1f4e1cb2a_1086x613.png 1272w, https://substackcdn.com/image/fetch/$s_!iyHl!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fd3a98d7e-9df5-4ae8-826f-38a1f4e1cb2a_1086x613.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Takeaway  summary.</p><ul><li><p><strong>Know when to use multi-agent</strong> (most tasks don't need it)</p></li><li><p><strong>Build eval-driven</strong> : (define success metrics first)</p></li><li><p><strong>Design human-centered</strong> (capability discovery, observability, interruptibility, cost-awareness)</p></li><li><p><strong>Start simple</strong> (baseline &#8594; tools &#8594; agents &#8594; polish) </p></li></ul><p>Most of the material here is adapted from Chapter 3 of my <a href="https://multiagentbook.com/">upcoming book</a> on building multi-agent systems.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!n0xf!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4d9ddf14-5445-4d3c-900c-dda1c35156a9_2616x2060.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!n0xf!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4d9ddf14-5445-4d3c-900c-dda1c35156a9_2616x2060.png 424w, https://substackcdn.com/image/fetch/$s_!n0xf!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4d9ddf14-5445-4d3c-900c-dda1c35156a9_2616x2060.png 848w, https://substackcdn.com/image/fetch/$s_!n0xf!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4d9ddf14-5445-4d3c-900c-dda1c35156a9_2616x2060.png 1272w, https://substackcdn.com/image/fetch/$s_!n0xf!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4d9ddf14-5445-4d3c-900c-dda1c35156a9_2616x2060.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!n0xf!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4d9ddf14-5445-4d3c-900c-dda1c35156a9_2616x2060.png" width="1456" height="1147" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4d9ddf14-5445-4d3c-900c-dda1c35156a9_2616x2060.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1147,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:1679136,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:&quot;https://newsletter.victordibia.com/i/146526830?img=https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4d9ddf14-5445-4d3c-900c-dda1c35156a9_2616x2060.png&quot;,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!n0xf!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4d9ddf14-5445-4d3c-900c-dda1c35156a9_2616x2060.png 424w, https://substackcdn.com/image/fetch/$s_!n0xf!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4d9ddf14-5445-4d3c-900c-dda1c35156a9_2616x2060.png 848w, https://substackcdn.com/image/fetch/$s_!n0xf!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4d9ddf14-5445-4d3c-900c-dda1c35156a9_2616x2060.png 1272w, https://substackcdn.com/image/fetch/$s_!n0xf!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4d9ddf14-5445-4d3c-900c-dda1c35156a9_2616x2060.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Acknowledgement: The ideas here have benefited from conversations with fellow contributors to AutoGen at Microsoft and the broader OSS community. </p><p> </p><p></p>]]></content:encoded></item></channel></rss>