mirror of
https://github.com/wshobson/agents.git
synced 2026-03-18 17:47:16 +00:00
chore: update model references to Claude 4.6 and GPT-5.2
- Claude Opus 4.5 → Opus 4.6, Claude Sonnet 4.5 → Sonnet 4.6 (Haiku stays 4.5) - Update claude-sonnet-4-5 model IDs to claude-sonnet-4-6 in code examples - Update SWE-bench stat from 80.9% to 80.8% for Opus 4.6 - Update GPT refs: GPT-5 → GPT-5.2, GPT-4o → gpt-5.2, GPT-4o-mini → GPT-5-mini - Fix GPT-5.2-mini → GPT-5-mini (correct model name per OpenAI) - Bump marketplace to v1.5.2 and affected plugin versions
This commit is contained in:
@@ -283,7 +283,7 @@ Provide ratings in JSON format:
|
||||
}}"""
|
||||
|
||||
message = client.messages.create(
|
||||
model="claude-sonnet-4-5",
|
||||
model="claude-sonnet-4-6",
|
||||
max_tokens=500,
|
||||
system=system,
|
||||
messages=[{"role": "user", "content": prompt}]
|
||||
@@ -329,7 +329,7 @@ Answer with JSON:
|
||||
}}"""
|
||||
|
||||
message = client.messages.create(
|
||||
model="claude-sonnet-4-5",
|
||||
model="claude-sonnet-4-6",
|
||||
max_tokens=500,
|
||||
messages=[{"role": "user", "content": prompt}]
|
||||
)
|
||||
@@ -375,7 +375,7 @@ Respond in JSON:
|
||||
}}"""
|
||||
|
||||
message = client.messages.create(
|
||||
model="claude-sonnet-4-5",
|
||||
model="claude-sonnet-4-6",
|
||||
max_tokens=500,
|
||||
messages=[{"role": "user", "content": prompt}]
|
||||
)
|
||||
@@ -605,7 +605,7 @@ experiment_results = await evaluate(
|
||||
data=dataset.name,
|
||||
evaluators=evaluators,
|
||||
experiment_prefix="v1.0.0",
|
||||
metadata={"model": "claude-sonnet-4-5", "version": "1.0.0"}
|
||||
metadata={"model": "claude-sonnet-4-6", "version": "1.0.0"}
|
||||
)
|
||||
|
||||
print(f"Mean score: {experiment_results.aggregate_metrics['qa']['mean']}")
|
||||
|
||||
Reference in New Issue
Block a user