mirror of
https://github.com/wshobson/agents.git
synced 2026-03-18 09:37:15 +00:00
Replace GPT and Claude models to latest, better and cheaper models (#118)
* Updated GPT and Claude models to latest, better and cheaper models * updated more files to use GPT-5 and Sonnet/Haiku 4.5 because theu are the latest, cheaper and better models
This commit is contained in:
@@ -186,7 +186,7 @@ def calculate_factuality(claim, knowledge_base):
|
||||
### Single Output Evaluation
|
||||
```python
|
||||
def llm_judge_quality(response, question):
|
||||
"""Use GPT-4 to judge response quality."""
|
||||
"""Use GPT-5 to judge response quality."""
|
||||
prompt = f"""Rate the following response on a scale of 1-10 for:
|
||||
1. Accuracy (factually correct)
|
||||
2. Helpfulness (answers the question)
|
||||
@@ -205,7 +205,7 @@ Provide ratings in JSON format:
|
||||
"""
|
||||
|
||||
result = openai.ChatCompletion.create(
|
||||
model="gpt-4",
|
||||
model="gpt-5",
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
temperature=0
|
||||
)
|
||||
@@ -236,7 +236,7 @@ Answer with JSON:
|
||||
"""
|
||||
|
||||
result = openai.ChatCompletion.create(
|
||||
model="gpt-4",
|
||||
model="gpt-5",
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
temperature=0
|
||||
)
|
||||
|
||||
@@ -65,7 +65,7 @@ def self_consistency_cot(query, n=5, temperature=0.7):
|
||||
responses = []
|
||||
for _ in range(n):
|
||||
response = openai.ChatCompletion.create(
|
||||
model="gpt-4",
|
||||
model="gpt-5",
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
temperature=temperature
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user