Streaming
Stream responses in real-time for a better user experience.
Overview
Streaming allows you to receive AI responses incrementally as they're generated, rather than waiting for the complete response. This creates a more responsive user experience, especially for longer responses.
How It Works
When streaming is enabled, the API sends Server-Sent Events (SSE) containing chunks of the response as they're generated. Each chunk contains a delta (the new content) that you append to build the complete response.
Enabling Streaming
Set stream: true in your request:
const stream = await client.chat.completions.create({
model: 'anthropic/claude-3-sonnet',
messages: [
{ role: 'user', content: 'Write a short story about AI' }
],
stream: true, // Enable streaming
});TypeScript/JavaScript Example
Node.js with OpenAI SDK
import OpenAI from 'openai';
const client = new OpenAI({
baseURL: 'https://superagentstack.orionixtech.com/api/v1',
apiKey: process.env.OPENROUTER_KEY,
defaultHeaders: {
'superAgentKey': process.env.SUPER_AGENT_KEY,
},
});
async function streamChat() {
const stream = await client.chat.completions.create({
model: 'anthropic/claude-3-sonnet',
messages: [
{ role: 'user', content: 'Explain quantum computing in simple terms' }
],
stream: true,
});
// Process the stream
for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content || '';
process.stdout.write(content);
}
console.log('\n\nStream complete!');
}
streamChat();Next.js API Route
import OpenAI from 'openai';
import { OpenAIStream, StreamingTextResponse } from 'ai';
const client = new OpenAI({
baseURL: 'https://superagentstack.orionixtech.com/api/v1',
apiKey: process.env.OPENROUTER_KEY!,
defaultHeaders: {
'superAgentKey': process.env.SUPER_AGENT_KEY!,
},
});
export async function POST(req: Request) {
const { messages } = await req.json();
const response = await client.chat.completions.create({
model: 'anthropic/claude-3-sonnet',
messages,
stream: true,
});
// Convert to Vercel AI SDK stream
const stream = OpenAIStream(response);
return new StreamingTextResponse(stream);
}React Component
'use client';
import { useChat } from 'ai/react';
export default function Chat() {
const { messages, input, handleInputChange, handleSubmit } = useChat({
api: '/api/chat',
});
return (
<div className="flex flex-col h-screen">
<div className="flex-1 overflow-y-auto p-4">
{messages.map(m => (
<div key={m.id} className="mb-4">
<strong>{m.role === 'user' ? 'You: ' : 'AI: '}</strong>
{m.content}
</div>
))}
</div>
<form onSubmit={handleSubmit} className="p-4 border-t">
<input
value={input}
onChange={handleInputChange}
placeholder="Type your message..."
className="w-full p-2 border rounded"
/>
</form>
</div>
);
}Python Example
from openai import OpenAI
import os
client = OpenAI(
base_url="https://superagentstack.orionixtech.com/api/v1",
api_key=os.environ.get("OPENROUTER_KEY"),
default_headers={
"superAgentKey": os.environ.get("SUPER_AGENT_KEY"),
}
)
def stream_chat():
stream = client.chat.completions.create(
model="anthropic/claude-3-sonnet",
messages=[
{
"role": "user",
"content": "Explain quantum computing in simple terms"
}
],
stream=True,
)
# Process the stream
for chunk in stream:
if chunk.choices[0].delta.content is not None:
print(chunk.choices[0].delta.content, end="")
print("\n\nStream complete!")
stream_chat()Streaming with Memory & RAG
You can combine streaming with session memory and RAG for enhanced responses:
const stream = await client.chat.completions.create({
model: 'anthropic/claude-3-sonnet',
messages: [
{ role: 'user', content: 'What did we discuss earlier?' }
],
stream: true,
// Memory & RAG parameters
sessionId: 'user-123-session', // Enable conversation memory
saveToMemory: true, // Save this conversation
useRAG: true, // Search uploaded files
});
for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content || '';
process.stdout.write(content);
}Memory in Streaming
sessionId, the conversation is automatically saved to memory after the stream completes.Response Format
Each chunk in the stream follows this format:
{
"id": "chatcmpl-123",
"object": "chat.completion.chunk",
"created": 1677652288,
"model": "anthropic/claude-3-sonnet",
"choices": [
{
"index": 0,
"delta": {
"content": "Hello" // The new content chunk
},
"finish_reason": null
}
]
}The last chunk will have finish_reason: "stop"and may include usage statistics:
{
"id": "chatcmpl-123",
"object": "chat.completion.chunk",
"created": 1677652288,
"model": "anthropic/claude-3-sonnet",
"choices": [
{
"index": 0,
"delta": {},
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 20,
"completion_tokens": 100,
"total_tokens": 120
}
}Error Handling
Always wrap streaming in try-catch blocks:
async function streamWithErrorHandling() {
try {
const stream = await client.chat.completions.create({
model: 'anthropic/claude-3-sonnet',
messages: [{ role: 'user', content: 'Hello' }],
stream: true,
});
for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content || '';
process.stdout.write(content);
}
} catch (error) {
if (error instanceof Error) {
console.error('Stream error:', error.message);
// Handle specific error types
if (error.message.includes('rate limit')) {
console.error('Rate limit exceeded. Please try again later.');
} else if (error.message.includes('authentication')) {
console.error('Invalid API keys. Check your credentials.');
}
}
}
}Best Practices
- Buffer small chunks: Accumulate small chunks before updating UI to reduce re-renders
- Handle connection drops: Implement reconnection logic for network issues
- Show loading states: Display a loading indicator while waiting for first chunk
- Graceful degradation: Fall back to non-streaming if streaming fails
- Cancel streams: Implement abort controllers to cancel streams when needed
Canceling Streams
const controller = new AbortController();
const stream = await client.chat.completions.create({
model: 'anthropic/claude-3-sonnet',
messages: [{ role: 'user', content: 'Write a long story' }],
stream: true,
}, {
signal: controller.signal, // Pass abort signal
});
// Cancel after 5 seconds
setTimeout(() => {
controller.abort();
console.log('Stream canceled');
}, 5000);
try {
for await (const chunk of stream) {
process.stdout.write(chunk.choices[0]?.delta?.content || '');
}
} catch (error) {
if (error.name === 'AbortError') {
console.log('Stream was canceled by user');
}
}Token Counting