Quickstart

Build a typed, tool-using browser agent with Steel and the Vercel AI SDK v6 ToolLoopAgent. The agent opens a Steel session, navigates and extracts, and ends with a typed final tool whose input is the structured result.

Scroll to the bottom to see a full example!

Requirements

  • Steel API key

  • Anthropic API key

  • Node.js 20+

Step 1: Project Setup

Create a new TypeScript project and basic script:

Terminal
mkdir steel-ai-sdk && \
cd steel-ai-sdk && \
npm init -y && \
npm install -D typescript @types/node ts-node && \
npx tsc --init && \
npm pkg set scripts.start="ts-node index.ts" && \
touch index.ts .env

Step 2: Install Dependencies

Terminal
$
npm install ai @ai-sdk/anthropic steel-sdk playwright zod dotenv

Step 3: Environment Variables

Create a .env file with your API keys:

ENV
.env
1
STEEL_API_KEY=your-steel-api-key-here
2
ANTHROPIC_API_KEY=your-anthropic-api-key-here

Step 4: Define Steel tools

Each tool is a typed tool() with a Zod input schema. Browser state (the Steel session + Playwright page) lives in a closure so every tool call sees the same page.

Typescript
index.ts
1
import * as dotenv from "dotenv";
2
import Steel from "steel-sdk";
3
import { anthropic } from "@ai-sdk/anthropic";
4
import { ToolLoopAgent, tool, stepCountIs, hasToolCall } from "ai";
5
import { chromium, type Browser, type Page } from "playwright";
6
import { z } from "zod";
7
8
dotenv.config();
9
10
const STEEL_API_KEY = process.env.STEEL_API_KEY || "your-steel-api-key-here";
11
const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY || "your-anthropic-api-key-here";
12
13
const steel = new Steel({ steelAPIKey: STEEL_API_KEY });
14
15
let session: Awaited<ReturnType<typeof steel.sessions.create>> | null = null;
16
let browser: Browser | null = null;
17
let page: Page | null = null;
18
19
const openSession = tool({
20
description:
21
"Open a Steel cloud browser session. Call this exactly once, before anything else.",
22
inputSchema: z.object({}),
23
execute: async () => {
24
session = await steel.sessions.create({});
25
browser = await chromium.connectOverCDP(
26
`${session.websocketUrl}&apiKey=${STEEL_API_KEY}`
27
);
28
const ctx = browser.contexts()[0];
29
page = ctx.pages()[0] ?? (await ctx.newPage());
30
return { sessionId: session.id, liveViewUrl: session.sessionViewerUrl };
31
},
32
});
33
34
const navigate = tool({
35
description:
36
"Navigate the open session to a URL and wait for the page to load.",
37
inputSchema: z.object({ url: z.string().url() }),
38
execute: async ({ url }) => {
39
if (!page) throw new Error("openSession must be called first.");
40
await page.goto(url, { waitUntil: "domcontentloaded", timeout: 45_000 });
41
return { url: page.url(), title: await page.title() };
42
},
43
});
44
45
const snapshot = tool({
46
description:
47
"Return a readable snapshot of the current page: title, URL, visible text (capped), and a list of links with their text and href. Call this BEFORE extract so you never have to guess CSS selectors.",
48
inputSchema: z.object({
49
maxChars: z.number().int().positive().max(10_000).default(4_000),
50
maxLinks: z.number().int().positive().max(200).default(50),
51
}),
52
execute: async ({ maxChars, maxLinks }) => {
53
if (!page) throw new Error("openSession must be called first.");
54
return (await page.evaluate(
55
({ maxChars, maxLinks }: { maxChars: number; maxLinks: number }) => {
56
const text = (document.body.innerText || "").slice(0, maxChars);
57
const links = Array.from(document.querySelectorAll("a[href]"))
58
.slice(0, maxLinks)
59
.map((a) => {
60
const anchor = a as HTMLAnchorElement;
61
const t = (anchor.innerText || anchor.textContent || "").trim().slice(0, 120);
62
return { text: t, href: anchor.href };
63
})
64
.filter((l) => l.text && l.href);
65
return { url: location.href, title: document.title, text, links };
66
},
67
{ maxChars, maxLinks }
68
)) as { url: string; title: string; text: string; links: { text: string; href: string }[] };
69
},
70
});
71
72
const extract = tool({
73
description:
74
"Extract structured data from the current page using CSS selectors. Provide one row selector plus a list of per-row field selectors.",
75
inputSchema: z.object({
76
rowSelector: z
77
.string()
78
.describe("CSS selector matching each item. e.g. 'article.Box-row'"),
79
fields: z.array(z.object({
80
name: z.string(),
81
selector: z
82
.string()
83
.describe(
84
"CSS selector relative to the row. Use an empty string to read the row element itself."
85
),
86
attr: z
87
.string()
88
.optional()
89
.describe("Optional attribute to read instead of innerText, e.g. 'href'."),
90
})).min(1).max(10),
91
limit: z.number().int().positive().max(20).default(10),
92
}),
93
execute: async ({ rowSelector, fields, limit }) => {
94
if (!page) throw new Error("openSession must be called first.");
95
// Run the whole extraction inside one page.evaluate so we pay the
96
// CDP round-trip once, not N*M times. Serial CDP calls (row.$,
97
// el.getAttribute, el.innerText) are the single biggest source of
98
// slowness on a cloud browser.
99
const items = (await page.evaluate(
100
({ rowSelector, fields, limit }: {
101
rowSelector: string;
102
fields: { name: string; selector: string; attr?: string }[];
103
limit: number;
104
}) => {
105
const rows = Array.from(
106
document.querySelectorAll(rowSelector)
107
).slice(0, limit);
108
return rows.map((row) => {
109
const item: Record<string, string> = {};
110
for (const f of fields) {
111
const el = f.selector
112
? (row.querySelector(f.selector) as Element | null)
113
: row;
114
if (!el) { item[f.name] = ""; continue; }
115
if (f.attr) {
116
item[f.name] = (el.getAttribute(f.attr) ?? "").trim();
117
} else {
118
const text = (el as HTMLElement).innerText ?? el.textContent ?? "";
119
item[f.name] = text.trim();
120
}
121
}
122
return item;
123
});
124
},
125
{ rowSelector, fields, limit }
126
)) as Record<string, string>[];
127
return { count: items.length, items };
128
},
129
});
Don't do N×M serial CDP calls

The obvious implementation — page.$$(rowSelector) then await row.$(f.selector) and await el.innerText() per field — looks fine locally but each of those awaits is a separate CDP round-trip to Steel's cloud browser (~200-300ms each). A 10×4 extract becomes 40 round-trips (8-12 seconds). The page.evaluate version above is one round-trip: <500ms.

Step 5: Build the ToolLoopAgent

The agent's last move is a reportFindings tool with a Zod-typed input and no execute. In v6, a tool without an execute stops the loop as soon as it's called — so this tool doubles as the structured-output carrier. The typed final result is the tool call's input.

type: tip Why not output: Output.object(...)? On Anthropic, forcing a JSON response format disables tool calling — the provider warns "JSON response format does not support tools. The provided tools are ignored." The "final tool" pattern is the v6-idiomatic way to combine tool loops with typed output.

Typescript
index.ts
1
const reportFindings = tool({
2
description:
3
"Call this LAST with your final findings. Calling this ends the research.",
4
inputSchema: z.object({
5
summary: z
6
.string()
7
.describe("One-paragraph summary of what these repos have in common."),
8
repos: z.array(z.object({
9
name: z.string(),
10
url: z.string(),
11
stars: z.string().optional(),
12
description: z.string().optional(),
13
})).min(1).max(5),
14
}),
15
// intentionally no execute: lacking execute makes v6 stop the loop
16
});
17
18
const researchAgent = new ToolLoopAgent({
19
model: anthropic("claude-haiku-4-5"),
20
instructions: [
21
"You operate a Steel cloud browser via tools.",
22
"Workflow: (1) call openSession, (2) navigate to the target URL,",
23
"(3) call snapshot to see the page's text and links,",
24
"(4) only call extract when you need structured rows beyond what snapshot gives you,",
25
"(5) call reportFindings once with your final result.",
26
"Do not invent data. Prefer snapshot's links list over guessing selectors.",
27
].join(" "),
28
stopWhen: [stepCountIs(15), hasToolCall("reportFindings")],
29
tools: { openSession, navigate, snapshot, extract, reportFindings },
30
onStepFinish: async ({ stepNumber, toolCalls, usage }) => {
31
const names = toolCalls?.map((t: any) => t.toolName).join(", ") || "(text only)";
32
console.log(` step ${stepNumber}: ${names} | ${usage?.totalTokens ?? 0} tokens`);
33
},
34
});
Why add at all?

Without it, the agent has to guess CSS selectors. Wrong guess → empty extract → retry → another model round-trip. snapshot returns the page's visible text + link list in one page.evaluate (<500ms), so the agent can decide whether extract is even necessary. For link-heavy sites (trending pages, news indexes, search results) the findings are already in the links list, and the agent skips extract entirely — saving a step.

Step 6: Run the agent and clean up

The agent opens the Steel session itself during its first step. The final typed result is the reportFindings tool call's input, found in result.steps.

Typescript
index.ts
1
async function main() {
2
try {
3
const result = await researchAgent.generate({
4
prompt:
5
"Go to https://github.com/trending/python?since=daily and return the top 3 AI/ML-related repositories. For each, give its full name (owner/repo), GitHub URL, star count as shown on the page, and the repo description.",
6
});
7
8
const steps = (result as any).steps ?? [];
9
const reportCall = steps
10
.flatMap((s: any) => s.toolCalls ?? [])
11
.find((tc: any) => tc.toolName === "reportFindings");
12
const structured = reportCall?.input ?? { text: result.text };
13
14
console.log(JSON.stringify(structured, null, 2));
15
} finally {
16
if (browser) await browser.close().catch(() => {});
17
if (session) await steel.sessions.release(session.id).catch(() => {});
18
}
19
}
20
21
main().catch((e) => {
22
console.error(e);
23
process.exit(1);
24
});

Run It

Terminal
npm start

You'll see a live session viewer URL in the console — open it to watch the agent drive the browser in real time.

Phase-gate tools with prepareStep (optional)

prepareStep runs before each step and can narrow the tool set per phase — preventing the agent from calling openSession twice, or from extracting before navigating.

prepareStep: async ({ stepNumber, steps }) => {
if (stepNumber === 0) return { activeTools: ["openSession"] };
return { activeTools: ["navigate", "extract"] };
},

Swap the model

The default is Claude Haiku 4.5 — fast and cheap, which matters because the agent round-trips through the model 3-5 times per run. Swap up when the task needs stronger reasoning:

import { openai } from "@ai-sdk/openai";
import { google } from "@ai-sdk/google";
// model: anthropic("claude-sonnet-4-6"), // smarter, slower
// model: openai("gpt-5"),
// model: google("gemini-2.5-pro"),

Or use the AI Gateway string form (e.g. "anthropic/claude-haiku-4-5") to route through Vercel.

Full Example

Complete index.ts you can paste and run:

Typescript
index.ts
1
/*
2
* Build an AI browser agent with Vercel AI SDK v6 (ToolLoopAgent) and Steel.
3
* https://github.com/steel-dev/steel-cookbook/tree/main/examples/steel-ai-sdk-starter
4
*/
5
6
import * as dotenv from "dotenv";
7
import Steel from "steel-sdk";
8
import { anthropic } from "@ai-sdk/anthropic";
9
import { ToolLoopAgent, tool, stepCountIs, hasToolCall } from "ai";
10
import { chromium, type Browser, type Page } from "playwright";
11
import { z } from "zod";
12
13
dotenv.config();
14
15
const STEEL_API_KEY = process.env.STEEL_API_KEY || "your-steel-api-key-here";
16
const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY || "your-anthropic-api-key-here";
17
18
const steel = new Steel({ steelAPIKey: STEEL_API_KEY });
19
20
let session: Awaited<ReturnType<typeof steel.sessions.create>> | null = null;
21
let browser: Browser | null = null;
22
let page: Page | null = null;
23
24
const openSession = tool({
25
description:
26
"Open a Steel cloud browser session. Call this exactly once, before anything else.",
27
inputSchema: z.object({}),
28
execute: async () => {
29
session = await steel.sessions.create({});
30
browser = await chromium.connectOverCDP(
31
`${session.websocketUrl}&apiKey=${STEEL_API_KEY}`
32
);
33
const ctx = browser.contexts()[0];
34
page = ctx.pages()[0] ?? (await ctx.newPage());
35
return { sessionId: session.id, liveViewUrl: session.sessionViewerUrl };
36
},
37
});
38
39
const navigate = tool({
40
description:
41
"Navigate the open session to a URL and wait for the page to load.",
42
inputSchema: z.object({ url: z.string().url() }),
43
execute: async ({ url }) => {
44
if (!page) throw new Error("openSession must be called first.");
45
await page.goto(url, { waitUntil: "domcontentloaded", timeout: 45_000 });
46
return { url: page.url(), title: await page.title() };
47
},
48
});
49
50
const snapshot = tool({
51
description:
52
"Return a readable snapshot of the current page: title, URL, visible text (capped), and a list of links with their text and href. Call this BEFORE extract so you never have to guess CSS selectors.",
53
inputSchema: z.object({
54
maxChars: z.number().int().positive().max(10_000).default(4_000),
55
maxLinks: z.number().int().positive().max(200).default(50),
56
}),
57
execute: async ({ maxChars, maxLinks }) => {
58
if (!page) throw new Error("openSession must be called first.");
59
return (await page.evaluate(
60
({ maxChars, maxLinks }: { maxChars: number; maxLinks: number }) => {
61
const text = (document.body.innerText || "").slice(0, maxChars);
62
const links = Array.from(document.querySelectorAll("a[href]"))
63
.slice(0, maxLinks)
64
.map((a) => {
65
const anchor = a as HTMLAnchorElement;
66
const t = (anchor.innerText || anchor.textContent || "").trim().slice(0, 120);
67
return { text: t, href: anchor.href };
68
})
69
.filter((l) => l.text && l.href);
70
return { url: location.href, title: document.title, text, links };
71
},
72
{ maxChars, maxLinks }
73
)) as { url: string; title: string; text: string; links: { text: string; href: string }[] };
74
},
75
});
76
77
const extract = tool({
78
description:
79
"Extract structured data from the current page using CSS selectors. Provide one row selector plus a list of per-row field selectors.",
80
inputSchema: z.object({
81
rowSelector: z
82
.string()
83
.describe("CSS selector matching each item. e.g. 'article.Box-row'"),
84
fields: z.array(z.object({
85
name: z.string(),
86
selector: z
87
.string()
88
.describe(
89
"CSS selector relative to the row. Use an empty string to read the row element itself."
90
),
91
attr: z
92
.string()
93
.optional()
94
.describe("Optional attribute to read instead of innerText, e.g. 'href'."),
95
})).min(1).max(10),
96
limit: z.number().int().positive().max(20).default(10),
97
}),
98
execute: async ({ rowSelector, fields, limit }) => {
99
if (!page) throw new Error("openSession must be called first.");
100
const items = (await page.evaluate(
101
({ rowSelector, fields, limit }: {
102
rowSelector: string;
103
fields: { name: string; selector: string; attr?: string }[];
104
limit: number;
105
}) => {
106
const rows = Array.from(
107
document.querySelectorAll(rowSelector)
108
).slice(0, limit);
109
return rows.map((row) => {
110
const item: Record<string, string> = {};
111
for (const f of fields) {
112
const el = f.selector
113
? (row.querySelector(f.selector) as Element | null)
114
: row;
115
if (!el) { item[f.name] = ""; continue; }
116
if (f.attr) {
117
item[f.name] = (el.getAttribute(f.attr) ?? "").trim();
118
} else {
119
const text = (el as HTMLElement).innerText ?? el.textContent ?? "";
120
item[f.name] = text.trim();
121
}
122
}
123
return item;
124
});
125
},
126
{ rowSelector, fields, limit }
127
)) as Record<string, string>[];
128
return { count: items.length, items };
129
},
130
});
131
132
const reportFindings = tool({
133
description:
134
"Call this LAST with your final findings. Calling this ends the research.",
135
inputSchema: z.object({
136
summary: z
137
.string()
138
.describe("One-paragraph summary of what these repos have in common."),
139
repos: z.array(z.object({
140
name: z.string(),
141
url: z.string(),
142
stars: z.string().optional(),
143
description: z.string().optional(),
144
})).min(1).max(5),
145
}),
146
// intentionally no execute: lacking execute makes v6 stop the loop
147
});
148
149
const researchAgent = new ToolLoopAgent({
150
model: anthropic("claude-haiku-4-5"),
151
instructions: [
152
"You operate a Steel cloud browser via tools.",
153
"Workflow: (1) call openSession, (2) navigate to the target URL,",
154
"(3) call snapshot to see the page's text and links,",
155
"(4) only call extract when you need structured rows beyond what snapshot gives you,",
156
"(5) call reportFindings once with your final result.",
157
"Do not invent data. Prefer snapshot's links list over guessing selectors.",
158
].join(" "),
159
stopWhen: [stepCountIs(15), hasToolCall("reportFindings")],
160
tools: { openSession, navigate, snapshot, extract, reportFindings },
161
onStepFinish: async ({ stepNumber, toolCalls, usage }) => {
162
const names = toolCalls?.map((t: any) => t.toolName).join(", ") || "(text only)";
163
console.log(` step ${stepNumber}: ${names} | ${usage?.totalTokens ?? 0} tokens`);
164
},
165
});
166
167
async function main() {
168
try {
169
const result = await researchAgent.generate({
170
prompt:
171
"Go to https://github.com/trending/python?since=daily and return the top 3 AI/ML-related repositories. For each, give its full name (owner/repo), GitHub URL, star count as shown on the page, and the repo description.",
172
});
173
174
const steps = (result as any).steps ?? [];
175
const reportCall = steps
176
.flatMap((s: any) => s.toolCalls ?? [])
177
.find((tc: any) => tc.toolName === "reportFindings");
178
const structured = reportCall?.input ?? { text: result.text };
179
180
console.log(JSON.stringify(structured, null, 2));
181
} finally {
182
if (browser) await browser.close().catch(() => {});
183
if (session) await steel.sessions.release(session.id).catch(() => {});
184
}
185
}
186
187
main().catch((e) => {
188
console.error(e);
189
process.exit(1);
190
});

Next Steps