mirror of
https://github.com/wshobson/agents.git
synced 2026-03-18 09:37:15 +00:00
chore: update model references to Claude 4.6 and GPT-5.2
- Claude Opus 4.5 → Opus 4.6, Claude Sonnet 4.5 → Sonnet 4.6 (Haiku stays 4.5) - Update claude-sonnet-4-5 model IDs to claude-sonnet-4-6 in code examples - Update SWE-bench stat from 80.9% to 80.8% for Opus 4.6 - Update GPT refs: GPT-5 → GPT-5.2, GPT-4o → gpt-5.2, GPT-4o-mini → GPT-5-mini - Fix GPT-5.2-mini → GPT-5-mini (correct model name per OpenAI) - Bump marketplace to v1.5.2 and affected plugin versions
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "llm-application-dev",
|
||||
"description": "LLM application development with LangGraph, RAG systems, vector search, and AI agent architectures for Claude 4.5 and GPT-5.2",
|
||||
"version": "2.0.3",
|
||||
"description": "LLM application development with LangGraph, RAG systems, vector search, and AI agent architectures for Claude 4.6 and GPT-5.2",
|
||||
"version": "2.0.4",
|
||||
"author": {
|
||||
"name": "Seth Hobson",
|
||||
"email": "seth@major7apps.com"
|
||||
|
||||
@@ -5,7 +5,7 @@ Build production-ready LLM applications, advanced RAG systems, and intelligent a
|
||||
## Version 2.0.0 Highlights
|
||||
|
||||
- **LangGraph Integration**: Updated from deprecated LangChain patterns to LangGraph StateGraph workflows
|
||||
- **Modern Model Support**: Claude Opus/Sonnet/Haiku 4.5 and GPT-5.2/GPT-5.2-mini
|
||||
- **Modern Model Support**: Claude Opus 4.6/Sonnet 4.6/Haiku 4.5 and GPT-5.2/GPT-5-mini
|
||||
- **Voyage AI Embeddings**: Recommended embedding models for Claude applications
|
||||
- **Structured Outputs**: Pydantic-based structured output patterns
|
||||
|
||||
@@ -71,7 +71,7 @@ Build production-ready LLM applications, advanced RAG systems, and intelligent a
|
||||
### 2.0.0 (January 2026)
|
||||
|
||||
- **Breaking**: Migrated from LangChain 0.x to LangChain 1.x/LangGraph
|
||||
- **Breaking**: Updated model references to Claude 4.5 and GPT-5.2
|
||||
- **Breaking**: Updated model references to Claude 4.6 and GPT-5.2
|
||||
- Added Voyage AI as primary embedding recommendation for Claude apps
|
||||
- Added LangGraph StateGraph patterns replacing deprecated `initialize_agent()`
|
||||
- Added structured outputs with Pydantic
|
||||
|
||||
@@ -14,8 +14,8 @@ Expert AI engineer specializing in LLM application development, RAG systems, and
|
||||
|
||||
### LLM Integration & Model Management
|
||||
|
||||
- OpenAI GPT-5.2/GPT-5.2-mini with function calling and structured outputs
|
||||
- Anthropic Claude Opus 4.5, Claude Sonnet 4.5, Claude Haiku 4.5 with tool use and computer use
|
||||
- OpenAI GPT-5.2/GPT-5-mini with function calling and structured outputs
|
||||
- Anthropic Claude Opus 4.6, Claude Sonnet 4.6, Claude Haiku 4.5 with tool use and computer use
|
||||
- Open-source models: Llama 3.3, Mixtral 8x22B, Qwen 2.5, DeepSeek-V3
|
||||
- Local deployment with Ollama, vLLM, TGI (Text Generation Inference)
|
||||
- Model serving with TorchServe, MLflow, BentoML for production deployment
|
||||
@@ -76,7 +76,7 @@ Expert AI engineer specializing in LLM application development, RAG systems, and
|
||||
|
||||
### Multimodal AI Integration
|
||||
|
||||
- Vision models: GPT-4V, Claude 4 Vision, LLaVA, CLIP for image understanding
|
||||
- Vision models: GPT-5.2, Claude 4 Vision, LLaVA, CLIP for image understanding
|
||||
- Audio processing: Whisper for speech-to-text, ElevenLabs for text-to-speech
|
||||
- Document AI: OCR, table extraction, layout understanding with models like LayoutLM
|
||||
- Video analysis and processing for multimedia applications
|
||||
@@ -124,7 +124,7 @@ Expert AI engineer specializing in LLM application development, RAG systems, and
|
||||
|
||||
## Knowledge Base
|
||||
|
||||
- Latest LLM developments and model capabilities (GPT-5.2, Claude 4.5, Llama 3.3)
|
||||
- Latest LLM developments and model capabilities (GPT-5.2, Claude 4.6, Llama 3.3)
|
||||
- Modern vector database architectures and optimization techniques
|
||||
- Production AI system design patterns and best practices
|
||||
- AI safety and security considerations for enterprise deployments
|
||||
|
||||
@@ -48,7 +48,7 @@ Expert prompt engineer specializing in advanced prompting methodologies and LLM
|
||||
|
||||
### Model-Specific Optimization
|
||||
|
||||
#### OpenAI Models (GPT-5.2, GPT-5.2-mini)
|
||||
#### OpenAI Models (GPT-5.2, GPT-5-mini)
|
||||
|
||||
- Function calling optimization and structured outputs
|
||||
- JSON mode utilization for reliable data extraction
|
||||
@@ -58,7 +58,7 @@ Expert prompt engineer specializing in advanced prompting methodologies and LLM
|
||||
- Multi-turn conversation management
|
||||
- Image and multimodal prompt engineering
|
||||
|
||||
#### Anthropic Claude (Claude Opus 4.5, Sonnet 4.5, Haiku 4.5)
|
||||
#### Anthropic Claude (Claude Opus 4.6, Sonnet 4.6, Haiku 4.5)
|
||||
|
||||
- Constitutional AI alignment with Claude's training
|
||||
- Tool use optimization for complex workflows
|
||||
|
||||
@@ -37,7 +37,7 @@ class AgentState(TypedDict):
|
||||
|
||||
### Model & Embeddings
|
||||
|
||||
- **Primary LLM**: Claude Sonnet 4.5 (`claude-sonnet-4-5`)
|
||||
- **Primary LLM**: Claude Sonnet 4.6 (`claude-sonnet-4-6`)
|
||||
- **Embeddings**: Voyage AI (`voyage-3-large`) - officially recommended by Anthropic for Claude
|
||||
- **Specialized**: `voyage-code-3` (code), `voyage-finance-2` (finance), `voyage-law-2` (legal)
|
||||
|
||||
@@ -158,7 +158,7 @@ from langsmith.evaluation import evaluate
|
||||
# Run evaluation suite
|
||||
eval_config = RunEvalConfig(
|
||||
evaluators=["qa", "context_qa", "cot_qa"],
|
||||
eval_llm=ChatAnthropic(model="claude-sonnet-4-5")
|
||||
eval_llm=ChatAnthropic(model="claude-sonnet-4-6")
|
||||
)
|
||||
|
||||
results = await evaluate(
|
||||
@@ -209,7 +209,7 @@ async def call_with_retry():
|
||||
|
||||
## Implementation Checklist
|
||||
|
||||
- [ ] Initialize LLM with Claude Sonnet 4.5
|
||||
- [ ] Initialize LLM with Claude Sonnet 4.6
|
||||
- [ ] Setup Voyage AI embeddings (voyage-3-large)
|
||||
- [ ] Create tools with async support and error handling
|
||||
- [ ] Implement memory system (choose type based on use case)
|
||||
|
||||
@@ -150,7 +150,7 @@ gpt5_optimized = """
|
||||
|
||||
````
|
||||
|
||||
**Claude 4.5/4**
|
||||
**Claude 4.6/4.5**
|
||||
```python
|
||||
claude_optimized = """
|
||||
<context>
|
||||
@@ -607,7 +607,7 @@ testing_recommendations:
|
||||
metrics: ["accuracy", "satisfaction", "cost"]
|
||||
|
||||
deployment_strategy:
|
||||
model: "GPT-5.2 for quality, Claude 4.5 for safety"
|
||||
model: "GPT-5.2 for quality, Claude 4.6 for safety"
|
||||
temperature: 0.7
|
||||
max_tokens: 2000
|
||||
monitoring: "Track success, latency, feedback"
|
||||
|
||||
@@ -115,8 +115,8 @@ from langchain_core.tools import tool
|
||||
import ast
|
||||
import operator
|
||||
|
||||
# Initialize LLM (Claude Sonnet 4.5 recommended)
|
||||
llm = ChatAnthropic(model="claude-sonnet-4-5", temperature=0)
|
||||
# Initialize LLM (Claude Sonnet 4.6 recommended)
|
||||
llm = ChatAnthropic(model="claude-sonnet-4-6", temperature=0)
|
||||
|
||||
# Define tools with Pydantic schemas
|
||||
@tool
|
||||
@@ -201,7 +201,7 @@ class RAGState(TypedDict):
|
||||
answer: str
|
||||
|
||||
# Initialize components
|
||||
llm = ChatAnthropic(model="claude-sonnet-4-5")
|
||||
llm = ChatAnthropic(model="claude-sonnet-4-6")
|
||||
embeddings = VoyageAIEmbeddings(model="voyage-3-large")
|
||||
vectorstore = PineconeVectorStore(index_name="docs", embedding=embeddings)
|
||||
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
|
||||
@@ -489,7 +489,7 @@ os.environ["LANGCHAIN_API_KEY"] = "your-api-key"
|
||||
os.environ["LANGCHAIN_PROJECT"] = "my-project"
|
||||
|
||||
# All LangChain/LangGraph operations are automatically traced
|
||||
llm = ChatAnthropic(model="claude-sonnet-4-5")
|
||||
llm = ChatAnthropic(model="claude-sonnet-4-6")
|
||||
```
|
||||
|
||||
### Custom Callback Handler
|
||||
@@ -530,7 +530,7 @@ result = await agent.ainvoke(
|
||||
```python
|
||||
from langchain_anthropic import ChatAnthropic
|
||||
|
||||
llm = ChatAnthropic(model="claude-sonnet-4-5", streaming=True)
|
||||
llm = ChatAnthropic(model="claude-sonnet-4-6", streaming=True)
|
||||
|
||||
# Stream tokens
|
||||
async for chunk in llm.astream("Tell me a story"):
|
||||
|
||||
@@ -283,7 +283,7 @@ Provide ratings in JSON format:
|
||||
}}"""
|
||||
|
||||
message = client.messages.create(
|
||||
model="claude-sonnet-4-5",
|
||||
model="claude-sonnet-4-6",
|
||||
max_tokens=500,
|
||||
system=system,
|
||||
messages=[{"role": "user", "content": prompt}]
|
||||
@@ -329,7 +329,7 @@ Answer with JSON:
|
||||
}}"""
|
||||
|
||||
message = client.messages.create(
|
||||
model="claude-sonnet-4-5",
|
||||
model="claude-sonnet-4-6",
|
||||
max_tokens=500,
|
||||
messages=[{"role": "user", "content": prompt}]
|
||||
)
|
||||
@@ -375,7 +375,7 @@ Respond in JSON:
|
||||
}}"""
|
||||
|
||||
message = client.messages.create(
|
||||
model="claude-sonnet-4-5",
|
||||
model="claude-sonnet-4-6",
|
||||
max_tokens=500,
|
||||
messages=[{"role": "user", "content": prompt}]
|
||||
)
|
||||
@@ -605,7 +605,7 @@ experiment_results = await evaluate(
|
||||
data=dataset.name,
|
||||
evaluators=evaluators,
|
||||
experiment_prefix="v1.0.0",
|
||||
metadata={"model": "claude-sonnet-4-5", "version": "1.0.0"}
|
||||
metadata={"model": "claude-sonnet-4-6", "version": "1.0.0"}
|
||||
)
|
||||
|
||||
print(f"Mean score: {experiment_results.aggregate_metrics['qa']['mean']}")
|
||||
|
||||
@@ -81,7 +81,7 @@ class SQLQuery(BaseModel):
|
||||
tables_used: list[str] = Field(description="List of tables referenced")
|
||||
|
||||
# Initialize model with structured output
|
||||
llm = ChatAnthropic(model="claude-sonnet-4-5")
|
||||
llm = ChatAnthropic(model="claude-sonnet-4-6")
|
||||
structured_llm = llm.with_structured_output(SQLQuery)
|
||||
|
||||
# Create prompt template
|
||||
@@ -124,7 +124,7 @@ async def analyze_sentiment(text: str) -> SentimentAnalysis:
|
||||
client = Anthropic()
|
||||
|
||||
message = client.messages.create(
|
||||
model="claude-sonnet-4-5",
|
||||
model="claude-sonnet-4-6",
|
||||
max_tokens=500,
|
||||
messages=[{
|
||||
"role": "user",
|
||||
@@ -427,7 +427,7 @@ client = Anthropic()
|
||||
|
||||
# Use prompt caching for repeated system prompts
|
||||
response = client.messages.create(
|
||||
model="claude-sonnet-4-5",
|
||||
model="claude-sonnet-4-6",
|
||||
max_tokens=1000,
|
||||
system=[
|
||||
{
|
||||
|
||||
@@ -68,7 +68,7 @@ def self_consistency_cot(query, n=5, temperature=0.7):
|
||||
responses = []
|
||||
for _ in range(n):
|
||||
response = openai.ChatCompletion.create(
|
||||
model="gpt-5",
|
||||
model="gpt-5.2",
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
temperature=temperature
|
||||
)
|
||||
|
||||
@@ -85,7 +85,7 @@ class RAGState(TypedDict):
|
||||
answer: str
|
||||
|
||||
# Initialize components
|
||||
llm = ChatAnthropic(model="claude-sonnet-4-5")
|
||||
llm = ChatAnthropic(model="claude-sonnet-4-6")
|
||||
embeddings = VoyageAIEmbeddings(model="voyage-3-large")
|
||||
vectorstore = PineconeVectorStore(index_name="docs", embedding=embeddings)
|
||||
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
|
||||
|
||||
Reference in New Issue
Block a user