chore: update model references to Claude 4.6 and GPT-5.2

- Claude Opus 4.5 → Opus 4.6, Claude Sonnet 4.5 → Sonnet 4.6 (Haiku stays 4.5)
- Update claude-sonnet-4-5 model IDs to claude-sonnet-4-6 in code examples
- Update SWE-bench stat from 80.9% to 80.8% for Opus 4.6
- Update GPT refs: GPT-5 → GPT-5.2, GPT-4o → gpt-5.2, GPT-4o-mini → GPT-5-mini
- Fix GPT-5.2-mini → GPT-5-mini (correct model name per OpenAI)
- Bump marketplace to v1.5.2 and affected plugin versions
This commit is contained in:
Seth Hobson
2026-02-19 14:03:46 -05:00
parent 5d65aa1063
commit 086557180a
19 changed files with 62 additions and 62 deletions

View File

@@ -283,7 +283,7 @@ Provide ratings in JSON format:
}}"""
message = client.messages.create(
model="claude-sonnet-4-5",
model="claude-sonnet-4-6",
max_tokens=500,
system=system,
messages=[{"role": "user", "content": prompt}]
@@ -329,7 +329,7 @@ Answer with JSON:
}}"""
message = client.messages.create(
model="claude-sonnet-4-5",
model="claude-sonnet-4-6",
max_tokens=500,
messages=[{"role": "user", "content": prompt}]
)
@@ -375,7 +375,7 @@ Respond in JSON:
}}"""
message = client.messages.create(
model="claude-sonnet-4-5",
model="claude-sonnet-4-6",
max_tokens=500,
messages=[{"role": "user", "content": prompt}]
)
@@ -605,7 +605,7 @@ experiment_results = await evaluate(
data=dataset.name,
evaluators=evaluators,
experiment_prefix="v1.0.0",
metadata={"model": "claude-sonnet-4-5", "version": "1.0.0"}
metadata={"model": "claude-sonnet-4-6", "version": "1.0.0"}
)
print(f"Mean score: {experiment_results.aggregate_metrics['qa']['mean']}")