Building a Chatbot API with AWS Chalice: Part 2 - Integrating AWS Bedrock
- Carlo Sansonetti
- 19 hours ago
- 6 min read
Welcome back to our series on building a serverless chatbot API! In Part 1, we set up our Chalice project and deployed a simple API to AWS. Now, it's time to add intelligence to our chatbot by integrating AWS Bedrock, Amazon's fully managed service that offers access to foundation models (FMs) from leading AI companies.
Why AWS Bedrock?
When building CGCircuit's chatbot, I chose AWS Bedrock for several compelling reasons:
Multiple model options - Access to models from Anthropic, AI21, Cohere, and Amazon's own models
No infrastructure management - Fully managed service that scales with usage
Security and privacy - Data isn't used for training, maintaining your content privacy
Seamless AWS integration - Works well with our existing AWS infrastructure
Cost control - Pay-as-you-go pricing with transparent token counting
Prerequisites
Before continuing, make sure you have:
Completed Part 1 of this series
Access to AWS Bedrock in your AWS account (note that this requires requesting access)
Updated IAM permissions for Bedrock access
Setting Up AWS Bedrock Access
First, we need to update our IAM permissions to allow Bedrock access. Update your .chalice/policy.json file:
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"logs:CreateLogGroup",
"logs:CreateLogStream",
"logs:PutLogEvents"
],
"Resource": "arn:aws:logs:*:*:*"
},
{
"Effect": "Allow",
"Action": [
"bedrock:InvokeModel",
"bedrock:InvokeModelWithResponseStream"
],
"Resource": "*"
}
]
}
Installing the Required Dependencies
Update your requirements.txt file to include the AWS SDK for Bedrock:
boto3>=1.28.0
The standard boto3 package includes support for Bedrock, so no additional packages are needed.
Creating a Bedrock Client Helper
Now, let's create a Bedrock helper in the chalicelib/bedrock.py file:
import json
import os
import boto3
# Initialize Bedrock client
bedrock_runtime = boto3.client(
service_name="bedrock-runtime",
region_name=os.environ.get("AWS_REGION", "us-east-1")
)
# Constants for models
CLAUDE_3_HAIKU = "anthropic.claude-3-haiku-20240307-v1:0"
CLAUDE_3_SONNET = "anthropic.claude-3-sonnet-20240229-v1:0"
DEFAULT_MODEL = CLAUDE_3_HAIKU
def generate_response(message, context=None, model_id=DEFAULT_MODEL, max_tokens=500):
"""
Generate a response using AWS Bedrock.
Args:
message (str): The user message to respond to
context (str, optional): Previous conversation for context
model_id (str, optional): The model ID to use
max_tokens (int, optional): Maximum tokens in response
Returns:
str: The generated response
"""
# Format the prompt based on the model
if model_id.startswith("anthropic.claude"):
prompt = format_claude_prompt(message, context)
response = invoke_claude(prompt, model_id, max_tokens)
return parse_claude_response(response)
else:
# Add support for other models as needed
raise ValueError(f"Unsupported model: {model_id}")
def format_claude_prompt(message, context=None):
"""Format the prompt for Claude models"""
if context:
return {
"anthropic_version": "bedrock-2023-05-31",
"max_tokens": 500,
"messages": [
{"role": "user", "content": context + "\n\n" + message}
]
}
else:
return {
"anthropic_version": "bedrock-2023-05-31",
"max_tokens": 500,
"messages": [
{"role": "user", "content": message}
]
}
def invoke_claude(prompt, model_id, max_tokens):
"""Invoke Claude model with the given prompt"""
try:
response = bedrock_runtime.invoke_model(
modelId=model_id,
body=json.dumps(prompt)
)
return json.loads(response["body"].read())
except Exception as e:
print(f"Error invoking Bedrock: {str(e)}")
raise
def parse_claude_response(response):
"""Parse the response from Claude models"""
try:
return response["content"][0]["text"]
except (KeyError, IndexError) as e:
print(f"Error parsing response: {str(e)}")
return "I'm sorry, I couldn't generate a response."
Creating Chat Endpoints
Now, let's update our app.py file to include chat endpoints:
from chalice import Chalice, Response
import json
from chalicelib import bedrock
app = Chalice(app_name='chatbot-api')
@app.route('/')
def index():
return {'message': 'Welcome to the Chatbot API'}
@app.route('/health')
def health_check():
return {'status': 'healthy'}
@app.route('/chat', methods=['POST'])
def chat():
request_body = app.current_request.json_body
if not request_body or 'message' not in request_body:
return Response(
body=json.dumps({'error': 'Message is required'}),
status_code=400,
headers={'Content-Type': 'application/json'}
)
message = request_body['message']
context = request_body.get('context')
model_id = request_body.get('model_id', bedrock.DEFAULT_MODEL)
try:
response = bedrock.generate_response(message, context, model_id)
return {
'response': response
}
except Exception as e:
return Response(
body=json.dumps({'error': str(e)}),
status_code=500,
headers={'Content-Type': 'application/json'}
)
@app.route('/models', methods=['GET'])
def list_models():
"""List available models"""
return {
'models': [
{
'id': bedrock.CLAUDE_3_HAIKU,
'name': 'Claude 3 Haiku',
'description': 'Fast and efficient text generation'
},
{
'id': bedrock.CLAUDE_3_SONNET,
'name': 'Claude 3 Sonnet',
'description': 'Balanced performance and intelligence'
}
# Add more models as needed
]
}
Creating a Utility Function for Conversation Handling
To make our chatbot more user-friendly, let's add a utility function in chalicelib/utils.py to format conversations in a way that maintains context:
def format_conversation_history(conversation):
"""
Format a conversation history for use with the chatbot API.
Args:
conversation (list): List of message dictionaries with 'role' and 'content'
Returns:
list: Properly formatted conversation history
"""
formatted = []
for message in conversation:
if not isinstance(message, dict) or 'role' not in message or 'content' not in message:
continue
role = message.get('role')
content = message.get('content')
if role not in ['user', 'assistant', 'system']:
continue
formatted.append({
'role': role,
'content': content
})
return formatted
def truncate_conversation_if_too_long(conversation, max_length=4000):
"""
Truncate conversation history if it exceeds a certain length
to prevent token limits from being exceeded.
Args:
conversation (list): List of message dictionaries
max_length (int): Maximum allowed total content length
Returns:
list: Truncated conversation history
"""
total_length = sum(len(msg.get('content', '')) for msg in conversation)
if total_length <= max_length:
return conversation
# Keep the most recent messages, dropping older ones
truncated = []
current_length = 0
for msg in reversed(conversation):
msg_length = len(msg.get('content', ''))
if current_length + msg_length > max_length:
break
truncated.insert(0, msg)
current_length += msg_length
return truncated
Testing Locally
Let's test our enhanced chatbot API locally:
chalice local
Now you can use curl or Postman to test the chat endpoint:
curl -X POST http://127.0.0.1:8000/chat \
-H "Content-Type: application/json" \
-d '{"message": "What are the benefits of using AWS Chalice for API development?"}'
You should receive a thoughtful response from your AI-powered chatbot! Try using the new system prompt parameter:
curl -X POST http://127.0.0.1:8000/chat \
-H "Content-Type: application/json" \
-d '{
"message": "What tutorials do you offer on rigging characters?",
"system_prompt": "You are a knowledgeable assistant for CGCircuit, an educational platform focusing on technical art, 3D modeling, and animation. You specialize in helping users find tutorials about rigging and character animation. Your tone is friendly and enthusiastic."
}'
Deploying to AWS
Now that our chatbot is working locally, let's deploy it to AWS:
chalice deploy
After deployment, you can test your live API:
curl -X POST https://your-api-id.execute-api.us-east-1.amazonaws.com/api/chat \
-H "Content-Type: application/json" \
-d '{"message": "What are the benefits of using AWS Chalice for API development?"}'
Creating a Custom Streaming Response Endpoint
One improvement we can make is to implement streaming responses, which provides a more interactive experience for users. Let's add a streaming endpoint to our app.py:
@app.route('/chat/stream', methods=['POST'])
def chat_stream():
request_body = app.current_request.json_body
if not request_body or 'message' not in request_body:
return Response(
body=json.dumps({'error': 'Message is required'}),
status_code=400,
headers={'Content-Type': 'application/json'}
)
message = request_body['message']
context = request_body.get('context')
model_id = request_body.get('model_id', bedrock.DEFAULT_MODEL)
try:
# This would be implemented in bedrock.py
# For now, we'll return a non-streaming response
response = bedrock.generate_response(message, context, model_id)
return {
'response': response,
'streaming': False,
'message': 'Streaming not yet implemented'
}
except Exception as e:
return Response(
body=json.dumps({'error': str(e)}),
status_code=500,
headers={'Content-Type': 'application/json'}
)
Enhancing the Chatbot with System Instructions
We've already added system instructions in our updated prompt formatting function, but let's expand on this concept by allowing custom instructions for different use cases. Update the /chat endpoint in app.py to accept custom system instructions:
@app.route('/chat', methods=['POST'])
def chat():
request_body = app.current_request.json_body
if not request_body or 'message' not in request_body:
return Response(
body=json.dumps({'error': 'Message is required'}),
status_code=400,
headers={'Content-Type': 'application/json'}
)
message = request_body['message']
context = request_body.get('context')
model_id = request_body.get('model_id', bedrock.DEFAULT_MODEL)
system_prompt = request_body.get('system_prompt') # New parameter
try:
response = bedrock.generate_response(
message,
context,
model_id,
system_prompt=system_prompt # Pass the system prompt
)
return {
'response': response
}
except Exception as e:
return Response(
body=json.dumps({'error': str(e)}),
status_code=500,
headers={'Content-Type': 'application/json'}
)
And update the generate_response function in bedrock.py to accept the system prompt:
def generate_response(message, context=None, model_id=DEFAULT_MODEL, max_tokens=500, system_prompt=None):
"""
Generate a response using AWS Bedrock.
Args:
message (str): The user message to respond to
context (str, optional): Previous conversation for context
model_id (str, optional): The model ID to use
max_tokens (int, optional): Maximum tokens in response
system_prompt (str, optional): Custom system instructions
Returns:
str: The generated response
"""
# Format the prompt based on the model
if model_id.startswith("anthropic.claude"):
prompt = format_claude_prompt(message, context, system_prompt)
response = invoke_claude(prompt, model_id, max_tokens)
return parse_claude_response(response)
else:
# Add support for other models as needed
raise ValueError(f"Unsupported model: {model_id}")
if not system_prompt:
system_prompt = "You are a helpful, friendly AI assistant for CGCircuit, an educational platform for technical artists and developers. You provide concise, accurate information about CGCircuit's offerings, tutorials, and technical topics related to computer graphics, programming, and digital art. If you don't know the answer to something, acknowledge that instead of making up information."
messages = []
if context:
# Add previous conversation turns for context
context_turns = parse_context(context)
messages.extend(context_turns)
# Add the current user message
messages.append({"role": "user", "content": message})
return {
"anthropic_version": "bedrock-2023-05-31",
"max_tokens": 500,
"system": system_prompt,
"messages": messages
}
def parse_context(context):
"""Parse the context string into a list of message objects"""
try:
# If context is already in JSON format
if isinstance(context, list):
return context
# If context is a string that contains JSON
if context.startswith('[') and context.endswith(']'):
return json.loads(context)
# Otherwise, create a simple context message
return [{"role": "assistant", "content": context}]
except:
# Fallback to simple context
return [{"role": "assistant", "content": context}]
Comments