End-to-End Examples
Complete workflow examples with error handling — form filling, multi-window, and scrolling.
Form filling workflow
Open an app, fill out a form, submit it, and verify the result.
import cup
session = cup.Session()
# 1. Open the target application
result = session.open_app("notepad")
if not result.success:
print(f"Failed to open app: {result.error}")
# 2. Capture the UI
screen = session.snapshot()
print(screen)
# 3. Find the text input area
results = session.find(query="text editor")
if not results:
print("Could not find text input")
else:
element_id = results[0]["id"]
# 4. Type into it
result = session.action(element_id, "type", value="Hello from CUP!")
if not result.success:
print(f"Type failed: {result.error}")
# 5. Save with keyboard shortcut
session.press("ctrl+s")
# 6. Re-capture to verify the UI changed
screen = session.snapshot()
print(screen)import { Session } from "computeruseprotocol";
const session = await Session.create();
// 1. Open the target application
const openResult = await session.openApp("notepad");
if (!openResult.success) {
console.error(`Failed to open app: ${openResult.error}`);
}
// 2. Capture the UI
let screen = await session.snapshot();
console.log(screen);
// 3. Find the text input area
const results = await session.find({ query: "text editor" });
if (results.length === 0) {
console.error("Could not find text input");
} else {
const elementId = results[0].id;
// 4. Type into it
const typeResult = await session.action(elementId, "type", { value: "Hello from CUP!" });
if (!typeResult.success) {
console.error(`Type failed: ${typeResult.error}`);
}
}
// 5. Save with keyboard shortcut
await session.press("ctrl+s");
// 6. Re-capture to verify the UI changed
screen = await session.snapshot();
console.log(screen);Always re-capture with snapshot() after performing actions. Element IDs from previous snapshots are no longer valid.
Multi-window interaction
List open windows, switch between apps, and interact with each.
import cup
session = cup.Session()
# 1. See what's open
windows = session.snapshot(scope="overview")
print(windows)
# Output:
# ── windows ──
# "Visual Studio Code" pid=1234 [foreground] @0,0 1920x1080
# "Google Chrome" pid=5678 @0,0 1920x1080
# 2. Capture a specific app (by window title)
chrome_tree = session.snapshot(scope="full", app="Chrome")
print(chrome_tree)
# 3. Find the URL bar and navigate
results = session.find(query="address bar")
if results:
bar_id = results[0]["id"]
session.action(bar_id, "click")
session.action(bar_id, "type", value="https://example.com")
session.press("enter")
# 4. Switch to another app
vscode_tree = session.snapshot(scope="full", app="Visual Studio Code")
print(vscode_tree)
# 5. Interact with VS Code
results = session.find(query="terminal")
if results:
session.action(results[0]["id"], "click")import { Session } from "computeruseprotocol";
const session = await Session.create();
// 1. See what's open
const windows = await session.snapshot({ scope: "overview" });
console.log(windows);
// 2. Capture a specific app (by window title)
const chromeTree = await session.snapshot({ scope: "full", app: "Chrome" });
console.log(chromeTree);
// 3. Find the URL bar and navigate
let results = await session.find({ query: "address bar" });
if (results.length > 0) {
const barId = results[0].id;
await session.action(barId, "click");
await session.action(barId, "type", { value: "https://example.com" });
await session.press("enter");
}
// 4. Switch to another app
const vscodeTree = await session.snapshot({ scope: "full", app: "Visual Studio Code" });
console.log(vscodeTree);
// 5. Interact with VS Code
results = await session.find({ query: "terminal" });
if (results.length > 0) {
await session.action(results[0].id, "click");
}Scrolling and pagination
Handle long lists that don't fit on screen. CUP supports two approaches: pagination (for clipped items already in the tree) and scrolling (for virtualized content that loads on demand).
import cup
session = cup.Session()
screen = session.snapshot()
# The compact output may show:
# [e5] list "Search results" @10,100 400x600
# [e6] listitem "Result 1" ...
# [e7] listitem "Result 2" ...
# ... 48 more items — page("e5") to see
# Approach 1: Pagination (items already in tree, just clipped)
page1 = session.page("e5", direction="down")
print(page1) # shows the next batch of items
page2 = session.page("e5", direction="down")
print(page2) # shows the next batch
# Jump to a specific offset
page = session.page("e5", offset=40, limit=10)
print(page)
# Approach 2: Scrolling (for virtualized/lazy-loaded content)
session.action("e5", "scroll", direction="down")
screen = session.snapshot() # re-capture — new content is now loaded
# Find the target element after scrolling
results = session.find(query="Result 50")
if results:
session.action(results[0]["id"], "click")import { Session } from "computeruseprotocol";
const session = await Session.create();
let screen = await session.snapshot();
// Approach 1: Pagination (items already in tree, just clipped)
const page1 = await session.page("e5", { direction: "down" });
console.log(page1);
const page2 = await session.page("e5", { direction: "down" });
console.log(page2);
// Jump to a specific offset
const page = await session.page("e5", { offset: 40, limit: 10 });
console.log(page);
// Approach 2: Scrolling (for virtualized/lazy-loaded content)
await session.action("e5", "scroll", { direction: "down" });
screen = await session.snapshot(); // re-capture — new content is now loaded
// Find the target element after scrolling
const results = await session.find({ query: "Result 50" });
if (results.length > 0) {
await session.action(results[0].id, "click");
}Use page() when the compact output says "N more items — page(...) to see". This reads from the cached tree without re-capturing. Use scroll() followed by snapshot() when the app uses virtual scrolling and loads content dynamically.
Robust action pattern with retries
When automating real UIs, actions can fail due to animations, loading states, or timing. Here's a resilient pattern:
import cup
import time
session = cup.Session()
def find_and_click(session, query, retries=3):
"""Find an element and click it, retrying on failure."""
for attempt in range(retries):
screen = session.snapshot()
results = session.find(query=query)
if not results:
if attempt < retries - 1:
time.sleep(0.5)
continue
return None
result = session.action(results[0]["id"], "click")
if result.success:
return result
# Action failed — UI may have changed, retry
time.sleep(0.5)
return None
# Usage
result = find_and_click(session, "Submit button")
if result:
print(f"Clicked: {result.message}")
else:
print("Failed to find or click the element after retries")import { Session, type ActionResult } from "computeruseprotocol";
const session = await Session.create();
async function findAndClick(
session: Session,
query: string,
retries = 3
): Promise<ActionResult | null> {
for (let attempt = 0; attempt < retries; attempt++) {
await session.snapshot();
const results = await session.find({ query });
if (results.length === 0) {
if (attempt < retries - 1) {
await new Promise(r => setTimeout(r, 500));
continue;
}
return null;
}
const result = await session.action(results[0].id, "click");
if (result.success) return result;
// Action failed — UI may have changed, retry
await new Promise(r => setTimeout(r, 500));
}
return null;
}
// Usage
const result = await findAndClick(session, "Submit button");
if (result) {
console.log(`Clicked: ${result.message}`);
} else {
console.error("Failed to find or click the element after retries");
}What's next?
- Error handling — understand all error types and how to handle them
- Performance tips — optimize snapshots for speed and token usage
- Session API — full method reference