Replace GPT and Claude models to latest, better and cheaper models (#118)

* Updated GPT and Claude models to latest, better and cheaper models * updated more files to use GPT-5 and Sonnet/Haiku 4.5 because theu are the latest, cheaper and better models
2026-03-18 09:37:15 +00:00 · 2025-11-17 09:22:36 +08:00
parent 7581142104
commit 1305e48672
8 changed files with 26 additions and 26 deletions
--- a/plugins/llm-application-dev/skills/llm-evaluation/SKILL.md
+++ b/plugins/llm-application-dev/skills/llm-evaluation/SKILL.md
@@ -186,7 +186,7 @@ def calculate_factuality(claim, knowledge_base):
 ### Single Output Evaluation
 ```python
 def llm_judge_quality(response, question):
-    """Use GPT-4 to judge response quality."""
+    """Use GPT-5 to judge response quality."""
    prompt = f"""Rate the following response on a scale of 1-10 for:
 1. Accuracy (factually correct)
 2. Helpfulness (answers the question)
@@ -205,7 +205,7 @@ Provide ratings in JSON format:
 """

    result = openai.ChatCompletion.create(
-        model="gpt-4",
+        model="gpt-5",
        messages=[{"role": "user", "content": prompt}],
        temperature=0
    )
@@ -236,7 +236,7 @@ Answer with JSON:
 """

    result = openai.ChatCompletion.create(
-        model="gpt-4",
+        model="gpt-5",
        messages=[{"role": "user", "content": prompt}],
        temperature=0
    )
--- a/plugins/llm-application-dev/skills/prompt-engineering-patterns/references/chain-of-thought.md
+++ b/plugins/llm-application-dev/skills/prompt-engineering-patterns/references/chain-of-thought.md
@@ -65,7 +65,7 @@ def self_consistency_cot(query, n=5, temperature=0.7):
    responses = []
    for _ in range(n):
        response = openai.ChatCompletion.create(
-            model="gpt-4",
+            model="gpt-5",
            messages=[{"role": "user", "content": prompt}],
            temperature=temperature
        )