Setup and context
Gemini 2.5 Pro excels at building sophisticated agent systems for complex multi-step task automation. This guide presents a systematic approach to designing and implementing reliable, production-grade AI agents using Gemini's latest capabilities.
Agent System Design Fundamentals
Reactive vs Planning Agents
Reactive agents observe environmental state and act immediately at each step. Simple and predictable, but unsuitable for complex tasks requiring foresight.
Planning agents construct a full plan before execution. Gemini 2.5 Pro's extended reasoning capabilities enable solving significantly more complex problems through deliberate multi-step planning.
Tool Definition Schema Design
Precision in tool definitions is critical for reliable agent behavior:
import json
import google.generativeai as genai
# Example tool definitions
tools = [
{
"name": "search_web",
"description": "Search the web and return relevant information",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "Search query"
},
"num_results": {
"type": "integer",
"description": "Number of results to return",
"default": 5
}
},
"required": ["query"]
}
},
{
"name": "fetch_url",
"description": "Retrieve content from a specified URL",
"parameters": {
"type": "object",
"properties": {
"url": {
"type": "string",
"description": "URL to fetch"
}
},
"required": ["url"]
}
}
]
model = genai.GenerativeModel(
"gemini-2.5-pro",
tools=tools
)Parallel Tool Calling Implementation
Gemini 2.5 Pro supports simultaneous invocation of multiple tools:
def execute_parallel_tools(user_query):
"""Execute multiple tools in parallel"""
messages = [
{"role": "user", "content": user_query}
]
# First reasoning step
response = model.generate_content(messages)
# Parse tool calls
tool_calls = []
for part in response.content.parts:
if part.function_call:
tool_calls.append(part.function_call)
print(f"Executing {len(tool_calls)} tools in parallel")
# Aggregate results
tool_results = []
for call in tool_calls:
result = execute_tool(call.name, call.args)
tool_results.append({
"tool_name": call.name,
"result": result
})
# Send results back to model
messages.append({"role": "model", "content": response.content})
messages.append({
"role": "user",
"content": [
{
"function_response": {
"name": r["tool_name"],
"response": r["result"]
}
}
for r in tool_results
]
})
# Generate final response
final_response = model.generate_content(messages)
return final_response.text
def execute_tool(tool_name, arguments):
"""Tool execution simulation"""
if tool_name == "search_web":
return f"Search results for: {arguments['query']}"
elif tool_name == "fetch_url":
return f"Content retrieved from {arguments['url']}"
return "Unknown tool"Multi-Step Reasoning Loop Implementation
The core of agent execution is the tool-calling and result aggregation loop:
class ResearchAgent:
def __init__(self, model):
self.model = model
self.max_iterations = 10
self.conversation_history = []
def run(self, task):
"""Execute multi-step reasoning loop"""
self.conversation_history = [
{"role": "user", "content": task}
]
for iteration in range(self.max_iterations):
print(f"\n=== Iteration {iteration + 1} ===")
# Get next action from model
response = self.model.generate_content(
self.conversation_history
)
# Check termination condition
if self._should_stop(response):
print(f"Task completed: {response.text}")
return response.text
# Process tool calls
self.conversation_history.append({
"role": "model",
"content": response.content
})
tool_calls = [
part.function_call
for part in response.content.parts
if part.function_call
]
if not tool_calls:
# No tool calls = reasoning complete
return response.text
# Execute tools and append results
results = self._execute_tools(tool_calls)
self.conversation_history.append({
"role": "user",
"content": results
})
return "Maximum iterations reached"
def _should_stop(self, response):
"""Check termination conditions"""
return not any(
part.function_call for part in response.content.parts
)
def _execute_tools(self, tool_calls):
"""Execute multiple tool calls"""
results = []
for call in tool_calls:
result = execute_tool(call.name, call.args)
results.append({
"function_response": {
"name": call.name,
"response": result
}
})
return resultsAgent State Management
Production systems require persistent state storage:
from datetime import datetime
import json
class AgentStateManager:
def __init__(self, storage_backend="memory"):
self.backend = storage_backend
self.sessions = {}
def create_session(self, user_id, task):
"""Create a new agent session"""
session_id = f"{user_id}_{datetime.now().isoformat()}"
session_state = {
"session_id": session_id,
"user_id": user_id,
"task": task,
"created_at": datetime.now().isoformat(),
"conversation_history": [],
"tool_call_log": [],
"status": "running"
}
self.sessions[session_id] = session_state
self._persist_state(session_id, session_state)
return session_id
def update_conversation(self, session_id, role, content):
"""Update conversation history"""
if session_id not in self.sessions:
raise ValueError(f"Session {session_id} not found")
self.sessions[session_id]["conversation_history"].append({
"role": role,
"content": content,
"timestamp": datetime.now().isoformat()
})
self._persist_state(session_id, self.sessions[session_id])
def log_tool_call(self, session_id, tool_name, args, result):
"""Log tool calls for audit trail"""
self.sessions[session_id]["tool_call_log"].append({
"tool_name": tool_name,
"arguments": args,
"result": result,
"timestamp": datetime.now().isoformat()
})
self._persist_state(session_id, self.sessions[session_id])
def _persist_state(self, session_id, state):
"""Persist state to backend"""
if self.backend == "memory":
self.sessions[session_id] = state
elif self.backend == "firestore":
# Firestore persistence implementation
pass
elif self.backend == "redis":
# Redis persistence implementation
passError Recovery Patterns
Robust error handling is essential for production reliability:
import time
from typing import Optional
class ResilientAgent:
def __init__(self, model, max_retries=3):
self.model = model
self.max_retries = max_retries
def call_tool_with_retry(self, tool_name, arguments):
"""Tool calls with exponential backoff retry"""
for attempt in range(self.max_retries):
try:
result = execute_tool(tool_name, arguments)
return result
except Exception as e:
if attempt < self.max_retries - 1:
# Exponential backoff: wait 2^attempt seconds
wait_time = 2 ** attempt
print(f"Retry {attempt + 1}/{self.max_retries} - waiting {wait_time}s")
time.sleep(wait_time)
else:
# Final retry failed → graceful degradation
return self._graceful_fallback(tool_name, arguments, e)
def _graceful_fallback(self, tool_name, arguments, error):
"""Fallback handling when tool fails"""
print(f"Tool {tool_name} failed: {str(error)}")
if tool_name == "search_web":
return "Cached search results (offline mode)"
elif tool_name == "fetch_url":
return "Document fetch failed - try alternative source"
return f"{tool_name} is currently unavailable"Human-in-the-Loop (HITL) Design
Critical decisions require human approval:
class HumanInTheLoopAgent:
def __init__(self, model, approval_handler):
self.model = model
self.approval_handler = approval_handler
def run_with_approval(self, task, approval_required_actions=None):
"""Execute agent with human approval gates"""
if approval_required_actions is None:
approval_required_actions = ["delete", "modify_financial", "send_email"]
messages = [{"role": "user", "content": task}]
while True:
response = self.model.generate_content(messages)
# Check termination
if not any(part.function_call for part in response.content.parts):
return response.text
messages.append({"role": "model", "content": response.content})
# Filter and validate tool calls
tool_calls = [
part.function_call
for part in response.content.parts
if part.function_call
]
approval_needed = [
call for call in tool_calls
if any(action in call.name for action in approval_required_actions)
]
if approval_needed:
print(f"\nActions requiring approval:\n")
for call in approval_needed:
print(f" - {call.name}: {call.args}")
# Wait for human judgment
if not self.approval_handler.get_approval():
return "User rejected action"
# Execute tools
results = []
for call in tool_calls:
result = execute_tool(call.name, call.args)
results.append({
"function_response": {
"name": call.name,
"response": result
}
})
messages.append({"role": "user", "content": results})Real Example: Research Agent
A practical implementation of a multi-step research agent:
class ResearchReportAgent:
def __init__(self, model):
self.model = model
self.report_sections = {}
def generate_report(self, topic, num_sources=5):
"""Generate a comprehensive research report"""
task = f"""
Create a detailed research report on the following topic:
'{topic}'
Gather information from at least 5 reliable sources with this structure:
1. Executive Summary
2. Key Findings
3. Industry Impact
4. Future Outlook
5. Recommendations
Include specific quotes and citations for each section.
"""
research_agent = ResearchAgent(self.model)
final_report = research_agent.run(task)
return final_reportAgent Evaluation and Testing
Quality assurance before production is critical:
import json
class AgentEvaluator:
def __init__(self):
self.test_cases = []
self.results = []
def add_test_case(self, task, expected_behaviors):
"""Add a test case"""
self.test_cases.append({
"task": task,
"expected_behaviors": expected_behaviors
})
def evaluate_agent(self, agent, test_case):
"""Run and evaluate agent on test case"""
result = agent.run(test_case["task"])
evaluation = {
"task": test_case["task"],
"result": result,
"metrics": {
"tool_calls_made": len(agent.conversation_history),
"total_iterations": agent.iteration_count,
"success": self._check_success(result, test_case["expected_behaviors"]),
"execution_time": agent.execution_time
}
}
self.results.append(evaluation)
return evaluation
def _check_success(self, result, expected_behaviors):
"""Validate success criteria"""
for behavior in expected_behaviors:
if behavior not in result:
return False
return True
def generate_report(self):
"""Generate evaluation summary"""
success_rate = sum(
1 for r in self.results if r["metrics"]["success"]
) / len(self.results)
return {
"total_tests": len(self.results),
"success_rate": success_rate,
"average_iterations": sum(
r["metrics"]["total_iterations"] for r in self.results
) / len(self.results),
"detailed_results": self.results
}Cost and Latency Optimization
class OptimizedAgentPipeline:
def __init__(self):
self.model_routing = {
"complex_reasoning": "gemini-2.5-pro",
"simple_lookup": "gemini-1.5-flash",
"text_generation": "gemini-1.5-flash"
}
def route_task(self, task_type, content):
"""Route to optimal model based on task complexity"""
model_name = self.model_routing.get(task_type, "gemini-2.5-pro")
return genai.GenerativeModel(model_name)
def batch_tool_results(self, tool_calls):
"""Batch tool results for efficient processing"""
batched = {}
for call in tool_calls:
if call.name not in batched:
batched[call.name] = []
batched[call.name].append(call.args)
return batchedProduction Considerations
Rate Limiting and Timeouts
from ratelimit import limits, sleep_and_retry
class ProductionAgent:
@sleep_and_retry
@limits(calls=100, period=60) # 100 calls per minute
def call_api(self, request):
return self.model.generate_content(request)
def run_with_timeout(self, task, timeout_seconds=300):
"""Execute agent with timeout protection"""
import signal
def timeout_handler(signum, frame):
raise TimeoutError("Agent execution timeout")
signal.signal(signal.SIGALRM, timeout_handler)
signal.alarm(timeout_seconds)
try:
result = self.run(task)
signal.alarm(0) # Cancel timer
return result
except TimeoutError:
return "Execution exceeded timeout limit"Observability and Logging
import logging
from datetime import datetime
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class ObservableAgent:
def run_with_logging(self, task):
"""Execute agent with comprehensive logging"""
agent_id = datetime.now().isoformat()
logger.info(f"Agent {agent_id} started: {task}")
try:
result = self.agent.run(task)
logger.info(f"Agent {agent_id} succeeded")
return result
except Exception as e:
logger.error(f"Agent {agent_id} failed: {str(e)}")
raiseAgent Design Checklist
Before production deployment, verify:
- [ ] Tool definitions have accurate types and constraints
- [ ] Parallel tool calling tested end-to-end
- [ ] Multi-step loop terminates correctly
- [ ] Error recovery patterns tested with failures
- [ ] HITL gates in place for critical decisions
- [ ] State persistence backend selected and tested
- [ ] Rate limits and timeouts configured
- [ ] Logging and monitoring enabled
- [ ] Cost estimates calculated within budget
- [ ] A/B testing completed before rollout
- [ ] Rollback procedure documented
- [ ] 24/7 support procedures established
Letting agents run parts of the Dolice Labs operations as an indie developer, the biggest win wasn't smarter orchestration — it was designing the human stop points first. I gate irreversible actions like billing or publishing behind a confirmation, funnel state and logs through it, and cap every tool with a retry limit and timeout so an agent never silently grinds forever.
Conclusion
Production-grade agent systems with Gemini 2.5 Pro are achievable through proper design principles, robust error handling, and human-in-the-loop integration. Use the patterns and examples in this guide to build reliable, maintainable AI agents that scale confidently in production environments.