diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 45dffa3..218c012 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -7,7 +7,7 @@ }, "metadata": { "description": "Production-ready workflow orchestration with 73 focused plugins, 112 specialized agents, and 146 skills - optimized for granular installation and minimal token usage", - "version": "1.5.1" + "version": "1.5.2" }, "plugins": [ { @@ -118,7 +118,7 @@ "name": "code-review-ai", "source": "./plugins/code-review-ai", "description": "AI-powered architectural review and code quality analysis", - "version": "1.2.0", + "version": "1.2.1", "author": { "name": "Seth Hobson", "email": "seth@major7apps.com" @@ -181,8 +181,8 @@ }, { "name": "llm-application-dev", - "description": "LLM application development with LangGraph, RAG systems, vector search, and AI agent architectures for Claude 4.5 and GPT-5.2", - "version": "2.0.3", + "description": "LLM application development with LangGraph, RAG systems, vector search, and AI agent architectures for Claude 4.6 and GPT-5.2", + "version": "2.0.4", "author": { "name": "Seth Hobson", "email": "seth@major7apps.com" @@ -196,7 +196,7 @@ "name": "agent-orchestration", "source": "./plugins/agent-orchestration", "description": "Multi-agent system optimization, agent improvement workflows, and context management", - "version": "1.2.0", + "version": "1.2.1", "author": { "name": "Seth Hobson", "email": "seth@major7apps.com" @@ -404,7 +404,7 @@ "name": "performance-testing-review", "source": "./plugins/performance-testing-review", "description": "Performance analysis, test coverage review, and AI-powered code quality assessment", - "version": "1.2.0", + "version": "1.2.1", "author": { "name": "Seth Hobson", "email": "seth@major7apps.com" diff --git a/README.md b/README.md index e7cab7a..929d9db 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Claude Code Plugins: Orchestration and Automation -> **⚡ Updated for Opus 4.5, Sonnet 4.5 & Haiku 4.5** — Three-tier model strategy for optimal performance +> **⚡ Updated for Opus 4.6, Sonnet 4.6 & Haiku 4.5** — Three-tier model strategy for optimal performance [![Run in Smithery](https://smithery.ai/badge/skills/wshobson)](https://smithery.ai/skills?ns=wshobson&utm_source=github&utm_medium=badge) @@ -203,14 +203,14 @@ Strategic model assignment for optimal performance and cost: | Tier | Model | Agents | Use Case | | ---------- | -------- | ------ | ----------------------------------------------------------------------------------------------- | -| **Tier 1** | Opus 4.5 | 42 | Critical architecture, security, ALL code review, production coding (language pros, frameworks) | +| **Tier 1** | Opus 4.6 | 42 | Critical architecture, security, ALL code review, production coding (language pros, frameworks) | | **Tier 2** | Inherit | 42 | Complex tasks - user chooses model (AI/ML, backend, frontend/mobile, specialized) | | **Tier 3** | Sonnet | 51 | Support with intelligence (docs, testing, debugging, network, API docs, DX, legacy, payments) | | **Tier 4** | Haiku | 18 | Fast operational tasks (SEO, deployment, simple docs, sales, content, search) | -**Why Opus 4.5 for Critical Agents?** +**Why Opus 4.6 for Critical Agents?** -- 80.9% on SWE-bench (industry-leading) +- 80.8% on SWE-bench (industry-leading) - 65% fewer tokens for complex tasks - Best for architecture decisions and security audits @@ -218,14 +218,14 @@ Strategic model assignment for optimal performance and cost: Agents marked `inherit` use your session's default model, letting you balance cost and capability: - Set via `claude --model opus` or `claude --model sonnet` when starting a session -- Falls back to Sonnet 4.5 if no default specified +- Falls back to Sonnet 4.6 if no default specified - Perfect for frontend/mobile developers who want cost control - AI/ML engineers can choose Opus for complex model work **Cost Considerations:** -- **Opus 4.5**: $5/$25 per million input/output tokens - Premium for critical work -- **Sonnet 4.5**: $3/$15 per million tokens - Balanced performance/cost +- **Opus 4.6**: $5/$25 per million input/output tokens - Premium for critical work +- **Sonnet 4.6**: $3/$15 per million tokens - Balanced performance/cost - **Haiku 4.5**: $1/$5 per million tokens - Fast, cost-effective operations - Opus's 65% token reduction on complex tasks often offsets higher rate - Use `inherit` tier to control costs for high-volume use cases diff --git a/plugins/agent-orchestration/.claude-plugin/plugin.json b/plugins/agent-orchestration/.claude-plugin/plugin.json index d0e1729..61b7738 100644 --- a/plugins/agent-orchestration/.claude-plugin/plugin.json +++ b/plugins/agent-orchestration/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "agent-orchestration", - "version": "1.2.0", + "version": "1.2.1", "description": "Multi-agent system optimization, agent improvement workflows, and context management", "author": { "name": "Seth Hobson", diff --git a/plugins/agent-orchestration/commands/multi-agent-optimize.md b/plugins/agent-orchestration/commands/multi-agent-optimize.md index 55c39fd..731055c 100644 --- a/plugins/agent-orchestration/commands/multi-agent-optimize.md +++ b/plugins/agent-orchestration/commands/multi-agent-optimize.md @@ -146,7 +146,7 @@ class CostOptimizer: self.token_budget = 100000 # Monthly budget self.token_usage = 0 self.model_costs = { - 'gpt-5': 0.03, + 'gpt-5.2': 0.03, 'claude-4-sonnet': 0.015, 'claude-4-haiku': 0.0025 } diff --git a/plugins/code-review-ai/.claude-plugin/plugin.json b/plugins/code-review-ai/.claude-plugin/plugin.json index 97a77ab..e458cbb 100644 --- a/plugins/code-review-ai/.claude-plugin/plugin.json +++ b/plugins/code-review-ai/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "code-review-ai", - "version": "1.2.0", + "version": "1.2.1", "description": "AI-powered architectural review and code quality analysis", "author": { "name": "Seth Hobson", diff --git a/plugins/code-review-ai/commands/ai-review.md b/plugins/code-review-ai/commands/ai-review.md index ae9b700..2f5b7b0 100644 --- a/plugins/code-review-ai/commands/ai-review.md +++ b/plugins/code-review-ai/commands/ai-review.md @@ -1,6 +1,6 @@ # AI-Powered Code Review Specialist -You are an expert AI-powered code review specialist combining automated static analysis, intelligent pattern recognition, and modern DevOps practices. Leverage AI tools (GitHub Copilot, Qodo, GPT-5, Claude 4.5 Sonnet) with battle-tested platforms (SonarQube, CodeQL, Semgrep) to identify bugs, vulnerabilities, and performance issues. +You are an expert AI-powered code review specialist combining automated static analysis, intelligent pattern recognition, and modern DevOps practices. Leverage AI tools (GitHub Copilot, Qodo, GPT-5.2, Claude 4.6 Sonnet) with battle-tested platforms (SonarQube, CodeQL, Semgrep) to identify bugs, vulnerabilities, and performance issues. ## Context @@ -34,7 +34,7 @@ Execute in parallel: ### AI-Assisted Review ```python -# Context-aware review prompt for Claude 4.5 Sonnet +# Context-aware review prompt for Claude 4.6 Sonnet review_prompt = f""" You are reviewing a pull request for a {language} {project_type} application. @@ -64,8 +64,8 @@ Format as JSON array. ### Model Selection (2025) -- **Fast reviews (<200 lines)**: GPT-4o-mini or Claude 4.5 Haiku -- **Deep reasoning**: Claude 4.5 Sonnet or GPT-5 (200K+ tokens) +- **Fast reviews (<200 lines)**: GPT-5-mini or Claude 4.5 Haiku +- **Deep reasoning**: Claude 4.6 Sonnet or GPT-5.2 (200K+ tokens) - **Code generation**: GitHub Copilot or Qodo - **Multi-language**: Qodo or CodeAnt AI (30+ languages) @@ -92,7 +92,7 @@ interface ReviewRoutingStrategy { return new QodoEngine({ mode: "test-generation", coverageTarget: 80 }); } - return new AIEngine("gpt-4o", { temperature: 0.3, maxTokens: 2000 }); + return new AIEngine("gpt-5.2", { temperature: 0.3, maxTokens: 2000 }); } } ``` @@ -312,13 +312,13 @@ jobs: codeql database create codeql-db --language=javascript,python semgrep scan --config=auto --sarif --output=semgrep.sarif - - name: AI-Enhanced Review (GPT-5) + - name: AI-Enhanced Review (GPT-5.2) env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} run: | python scripts/ai_review.py \ --pr-number ${{ github.event.number }} \ - --model gpt-4o \ + --model gpt-5.2 \ --static-analysis-results codeql.sarif,semgrep.sarif - name: Post Comments @@ -446,7 +446,7 @@ if __name__ == '__main__': Comprehensive AI code review combining: 1. Multi-tool static analysis (SonarQube, CodeQL, Semgrep) -2. State-of-the-art LLMs (GPT-5, Claude 4.5 Sonnet) +2. State-of-the-art LLMs (GPT-5.2, Claude 4.6 Sonnet) 3. Seamless CI/CD integration (GitHub Actions, GitLab, Azure DevOps) 4. 30+ language support with language-specific linters 5. Actionable review comments with severity and fix examples diff --git a/plugins/llm-application-dev/.claude-plugin/plugin.json b/plugins/llm-application-dev/.claude-plugin/plugin.json index 5b4fb1b..38a1825 100644 --- a/plugins/llm-application-dev/.claude-plugin/plugin.json +++ b/plugins/llm-application-dev/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "llm-application-dev", - "description": "LLM application development with LangGraph, RAG systems, vector search, and AI agent architectures for Claude 4.5 and GPT-5.2", - "version": "2.0.3", + "description": "LLM application development with LangGraph, RAG systems, vector search, and AI agent architectures for Claude 4.6 and GPT-5.2", + "version": "2.0.4", "author": { "name": "Seth Hobson", "email": "seth@major7apps.com" diff --git a/plugins/llm-application-dev/README.md b/plugins/llm-application-dev/README.md index 7c78088..ec5744d 100644 --- a/plugins/llm-application-dev/README.md +++ b/plugins/llm-application-dev/README.md @@ -5,7 +5,7 @@ Build production-ready LLM applications, advanced RAG systems, and intelligent a ## Version 2.0.0 Highlights - **LangGraph Integration**: Updated from deprecated LangChain patterns to LangGraph StateGraph workflows -- **Modern Model Support**: Claude Opus/Sonnet/Haiku 4.5 and GPT-5.2/GPT-5.2-mini +- **Modern Model Support**: Claude Opus 4.6/Sonnet 4.6/Haiku 4.5 and GPT-5.2/GPT-5-mini - **Voyage AI Embeddings**: Recommended embedding models for Claude applications - **Structured Outputs**: Pydantic-based structured output patterns @@ -71,7 +71,7 @@ Build production-ready LLM applications, advanced RAG systems, and intelligent a ### 2.0.0 (January 2026) - **Breaking**: Migrated from LangChain 0.x to LangChain 1.x/LangGraph -- **Breaking**: Updated model references to Claude 4.5 and GPT-5.2 +- **Breaking**: Updated model references to Claude 4.6 and GPT-5.2 - Added Voyage AI as primary embedding recommendation for Claude apps - Added LangGraph StateGraph patterns replacing deprecated `initialize_agent()` - Added structured outputs with Pydantic diff --git a/plugins/llm-application-dev/agents/ai-engineer.md b/plugins/llm-application-dev/agents/ai-engineer.md index abfb6a3..190e9c6 100644 --- a/plugins/llm-application-dev/agents/ai-engineer.md +++ b/plugins/llm-application-dev/agents/ai-engineer.md @@ -14,8 +14,8 @@ Expert AI engineer specializing in LLM application development, RAG systems, and ### LLM Integration & Model Management -- OpenAI GPT-5.2/GPT-5.2-mini with function calling and structured outputs -- Anthropic Claude Opus 4.5, Claude Sonnet 4.5, Claude Haiku 4.5 with tool use and computer use +- OpenAI GPT-5.2/GPT-5-mini with function calling and structured outputs +- Anthropic Claude Opus 4.6, Claude Sonnet 4.6, Claude Haiku 4.5 with tool use and computer use - Open-source models: Llama 3.3, Mixtral 8x22B, Qwen 2.5, DeepSeek-V3 - Local deployment with Ollama, vLLM, TGI (Text Generation Inference) - Model serving with TorchServe, MLflow, BentoML for production deployment @@ -76,7 +76,7 @@ Expert AI engineer specializing in LLM application development, RAG systems, and ### Multimodal AI Integration -- Vision models: GPT-4V, Claude 4 Vision, LLaVA, CLIP for image understanding +- Vision models: GPT-5.2, Claude 4 Vision, LLaVA, CLIP for image understanding - Audio processing: Whisper for speech-to-text, ElevenLabs for text-to-speech - Document AI: OCR, table extraction, layout understanding with models like LayoutLM - Video analysis and processing for multimedia applications @@ -124,7 +124,7 @@ Expert AI engineer specializing in LLM application development, RAG systems, and ## Knowledge Base -- Latest LLM developments and model capabilities (GPT-5.2, Claude 4.5, Llama 3.3) +- Latest LLM developments and model capabilities (GPT-5.2, Claude 4.6, Llama 3.3) - Modern vector database architectures and optimization techniques - Production AI system design patterns and best practices - AI safety and security considerations for enterprise deployments diff --git a/plugins/llm-application-dev/agents/prompt-engineer.md b/plugins/llm-application-dev/agents/prompt-engineer.md index 1a68ffa..acc2919 100644 --- a/plugins/llm-application-dev/agents/prompt-engineer.md +++ b/plugins/llm-application-dev/agents/prompt-engineer.md @@ -48,7 +48,7 @@ Expert prompt engineer specializing in advanced prompting methodologies and LLM ### Model-Specific Optimization -#### OpenAI Models (GPT-5.2, GPT-5.2-mini) +#### OpenAI Models (GPT-5.2, GPT-5-mini) - Function calling optimization and structured outputs - JSON mode utilization for reliable data extraction @@ -58,7 +58,7 @@ Expert prompt engineer specializing in advanced prompting methodologies and LLM - Multi-turn conversation management - Image and multimodal prompt engineering -#### Anthropic Claude (Claude Opus 4.5, Sonnet 4.5, Haiku 4.5) +#### Anthropic Claude (Claude Opus 4.6, Sonnet 4.6, Haiku 4.5) - Constitutional AI alignment with Claude's training - Tool use optimization for complex workflows diff --git a/plugins/llm-application-dev/commands/langchain-agent.md b/plugins/llm-application-dev/commands/langchain-agent.md index 9416692..fc16476 100644 --- a/plugins/llm-application-dev/commands/langchain-agent.md +++ b/plugins/llm-application-dev/commands/langchain-agent.md @@ -37,7 +37,7 @@ class AgentState(TypedDict): ### Model & Embeddings -- **Primary LLM**: Claude Sonnet 4.5 (`claude-sonnet-4-5`) +- **Primary LLM**: Claude Sonnet 4.6 (`claude-sonnet-4-6`) - **Embeddings**: Voyage AI (`voyage-3-large`) - officially recommended by Anthropic for Claude - **Specialized**: `voyage-code-3` (code), `voyage-finance-2` (finance), `voyage-law-2` (legal) @@ -158,7 +158,7 @@ from langsmith.evaluation import evaluate # Run evaluation suite eval_config = RunEvalConfig( evaluators=["qa", "context_qa", "cot_qa"], - eval_llm=ChatAnthropic(model="claude-sonnet-4-5") + eval_llm=ChatAnthropic(model="claude-sonnet-4-6") ) results = await evaluate( @@ -209,7 +209,7 @@ async def call_with_retry(): ## Implementation Checklist -- [ ] Initialize LLM with Claude Sonnet 4.5 +- [ ] Initialize LLM with Claude Sonnet 4.6 - [ ] Setup Voyage AI embeddings (voyage-3-large) - [ ] Create tools with async support and error handling - [ ] Implement memory system (choose type based on use case) diff --git a/plugins/llm-application-dev/commands/prompt-optimize.md b/plugins/llm-application-dev/commands/prompt-optimize.md index 9471808..979f6ad 100644 --- a/plugins/llm-application-dev/commands/prompt-optimize.md +++ b/plugins/llm-application-dev/commands/prompt-optimize.md @@ -150,7 +150,7 @@ gpt5_optimized = """ ```` -**Claude 4.5/4** +**Claude 4.6/4.5** ```python claude_optimized = """ @@ -607,7 +607,7 @@ testing_recommendations: metrics: ["accuracy", "satisfaction", "cost"] deployment_strategy: - model: "GPT-5.2 for quality, Claude 4.5 for safety" + model: "GPT-5.2 for quality, Claude 4.6 for safety" temperature: 0.7 max_tokens: 2000 monitoring: "Track success, latency, feedback" diff --git a/plugins/llm-application-dev/skills/langchain-architecture/SKILL.md b/plugins/llm-application-dev/skills/langchain-architecture/SKILL.md index c509e40..92f64a6 100644 --- a/plugins/llm-application-dev/skills/langchain-architecture/SKILL.md +++ b/plugins/llm-application-dev/skills/langchain-architecture/SKILL.md @@ -115,8 +115,8 @@ from langchain_core.tools import tool import ast import operator -# Initialize LLM (Claude Sonnet 4.5 recommended) -llm = ChatAnthropic(model="claude-sonnet-4-5", temperature=0) +# Initialize LLM (Claude Sonnet 4.6 recommended) +llm = ChatAnthropic(model="claude-sonnet-4-6", temperature=0) # Define tools with Pydantic schemas @tool @@ -201,7 +201,7 @@ class RAGState(TypedDict): answer: str # Initialize components -llm = ChatAnthropic(model="claude-sonnet-4-5") +llm = ChatAnthropic(model="claude-sonnet-4-6") embeddings = VoyageAIEmbeddings(model="voyage-3-large") vectorstore = PineconeVectorStore(index_name="docs", embedding=embeddings) retriever = vectorstore.as_retriever(search_kwargs={"k": 4}) @@ -489,7 +489,7 @@ os.environ["LANGCHAIN_API_KEY"] = "your-api-key" os.environ["LANGCHAIN_PROJECT"] = "my-project" # All LangChain/LangGraph operations are automatically traced -llm = ChatAnthropic(model="claude-sonnet-4-5") +llm = ChatAnthropic(model="claude-sonnet-4-6") ``` ### Custom Callback Handler @@ -530,7 +530,7 @@ result = await agent.ainvoke( ```python from langchain_anthropic import ChatAnthropic -llm = ChatAnthropic(model="claude-sonnet-4-5", streaming=True) +llm = ChatAnthropic(model="claude-sonnet-4-6", streaming=True) # Stream tokens async for chunk in llm.astream("Tell me a story"): diff --git a/plugins/llm-application-dev/skills/llm-evaluation/SKILL.md b/plugins/llm-application-dev/skills/llm-evaluation/SKILL.md index 3c1f1f5..d29fd28 100644 --- a/plugins/llm-application-dev/skills/llm-evaluation/SKILL.md +++ b/plugins/llm-application-dev/skills/llm-evaluation/SKILL.md @@ -283,7 +283,7 @@ Provide ratings in JSON format: }}""" message = client.messages.create( - model="claude-sonnet-4-5", + model="claude-sonnet-4-6", max_tokens=500, system=system, messages=[{"role": "user", "content": prompt}] @@ -329,7 +329,7 @@ Answer with JSON: }}""" message = client.messages.create( - model="claude-sonnet-4-5", + model="claude-sonnet-4-6", max_tokens=500, messages=[{"role": "user", "content": prompt}] ) @@ -375,7 +375,7 @@ Respond in JSON: }}""" message = client.messages.create( - model="claude-sonnet-4-5", + model="claude-sonnet-4-6", max_tokens=500, messages=[{"role": "user", "content": prompt}] ) @@ -605,7 +605,7 @@ experiment_results = await evaluate( data=dataset.name, evaluators=evaluators, experiment_prefix="v1.0.0", - metadata={"model": "claude-sonnet-4-5", "version": "1.0.0"} + metadata={"model": "claude-sonnet-4-6", "version": "1.0.0"} ) print(f"Mean score: {experiment_results.aggregate_metrics['qa']['mean']}") diff --git a/plugins/llm-application-dev/skills/prompt-engineering-patterns/SKILL.md b/plugins/llm-application-dev/skills/prompt-engineering-patterns/SKILL.md index 2d471af..016cbe2 100644 --- a/plugins/llm-application-dev/skills/prompt-engineering-patterns/SKILL.md +++ b/plugins/llm-application-dev/skills/prompt-engineering-patterns/SKILL.md @@ -81,7 +81,7 @@ class SQLQuery(BaseModel): tables_used: list[str] = Field(description="List of tables referenced") # Initialize model with structured output -llm = ChatAnthropic(model="claude-sonnet-4-5") +llm = ChatAnthropic(model="claude-sonnet-4-6") structured_llm = llm.with_structured_output(SQLQuery) # Create prompt template @@ -124,7 +124,7 @@ async def analyze_sentiment(text: str) -> SentimentAnalysis: client = Anthropic() message = client.messages.create( - model="claude-sonnet-4-5", + model="claude-sonnet-4-6", max_tokens=500, messages=[{ "role": "user", @@ -427,7 +427,7 @@ client = Anthropic() # Use prompt caching for repeated system prompts response = client.messages.create( - model="claude-sonnet-4-5", + model="claude-sonnet-4-6", max_tokens=1000, system=[ { diff --git a/plugins/llm-application-dev/skills/prompt-engineering-patterns/references/chain-of-thought.md b/plugins/llm-application-dev/skills/prompt-engineering-patterns/references/chain-of-thought.md index 77ad985..31be6ef 100644 --- a/plugins/llm-application-dev/skills/prompt-engineering-patterns/references/chain-of-thought.md +++ b/plugins/llm-application-dev/skills/prompt-engineering-patterns/references/chain-of-thought.md @@ -68,7 +68,7 @@ def self_consistency_cot(query, n=5, temperature=0.7): responses = [] for _ in range(n): response = openai.ChatCompletion.create( - model="gpt-5", + model="gpt-5.2", messages=[{"role": "user", "content": prompt}], temperature=temperature ) diff --git a/plugins/llm-application-dev/skills/rag-implementation/SKILL.md b/plugins/llm-application-dev/skills/rag-implementation/SKILL.md index d72adbd..51037e7 100644 --- a/plugins/llm-application-dev/skills/rag-implementation/SKILL.md +++ b/plugins/llm-application-dev/skills/rag-implementation/SKILL.md @@ -85,7 +85,7 @@ class RAGState(TypedDict): answer: str # Initialize components -llm = ChatAnthropic(model="claude-sonnet-4-5") +llm = ChatAnthropic(model="claude-sonnet-4-6") embeddings = VoyageAIEmbeddings(model="voyage-3-large") vectorstore = PineconeVectorStore(index_name="docs", embedding=embeddings) retriever = vectorstore.as_retriever(search_kwargs={"k": 4}) diff --git a/plugins/performance-testing-review/.claude-plugin/plugin.json b/plugins/performance-testing-review/.claude-plugin/plugin.json index 8fdb9f9..509045f 100644 --- a/plugins/performance-testing-review/.claude-plugin/plugin.json +++ b/plugins/performance-testing-review/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "performance-testing-review", - "version": "1.2.0", + "version": "1.2.1", "description": "Performance analysis, test coverage review, and AI-powered code quality assessment", "author": { "name": "Seth Hobson", diff --git a/plugins/performance-testing-review/commands/ai-review.md b/plugins/performance-testing-review/commands/ai-review.md index c3396d3..2f5b7b0 100644 --- a/plugins/performance-testing-review/commands/ai-review.md +++ b/plugins/performance-testing-review/commands/ai-review.md @@ -1,6 +1,6 @@ # AI-Powered Code Review Specialist -You are an expert AI-powered code review specialist combining automated static analysis, intelligent pattern recognition, and modern DevOps practices. Leverage AI tools (GitHub Copilot, Qodo, GPT-5, Claude 4.5 Sonnet) with battle-tested platforms (SonarQube, CodeQL, Semgrep) to identify bugs, vulnerabilities, and performance issues. +You are an expert AI-powered code review specialist combining automated static analysis, intelligent pattern recognition, and modern DevOps practices. Leverage AI tools (GitHub Copilot, Qodo, GPT-5.2, Claude 4.6 Sonnet) with battle-tested platforms (SonarQube, CodeQL, Semgrep) to identify bugs, vulnerabilities, and performance issues. ## Context @@ -34,7 +34,7 @@ Execute in parallel: ### AI-Assisted Review ```python -# Context-aware review prompt for Claude 4.5 Sonnet +# Context-aware review prompt for Claude 4.6 Sonnet review_prompt = f""" You are reviewing a pull request for a {language} {project_type} application. @@ -64,8 +64,8 @@ Format as JSON array. ### Model Selection (2025) -- **Fast reviews (<200 lines)**: GPT-4o-mini or Claude 4.5 Haiku -- **Deep reasoning**: Claude 4.5 Sonnet or GPT-4.5 (200K+ tokens) +- **Fast reviews (<200 lines)**: GPT-5-mini or Claude 4.5 Haiku +- **Deep reasoning**: Claude 4.6 Sonnet or GPT-5.2 (200K+ tokens) - **Code generation**: GitHub Copilot or Qodo - **Multi-language**: Qodo or CodeAnt AI (30+ languages) @@ -92,7 +92,7 @@ interface ReviewRoutingStrategy { return new QodoEngine({ mode: "test-generation", coverageTarget: 80 }); } - return new AIEngine("gpt-4o", { temperature: 0.3, maxTokens: 2000 }); + return new AIEngine("gpt-5.2", { temperature: 0.3, maxTokens: 2000 }); } } ``` @@ -312,13 +312,13 @@ jobs: codeql database create codeql-db --language=javascript,python semgrep scan --config=auto --sarif --output=semgrep.sarif - - name: AI-Enhanced Review (GPT-5) + - name: AI-Enhanced Review (GPT-5.2) env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} run: | python scripts/ai_review.py \ --pr-number ${{ github.event.number }} \ - --model gpt-4o \ + --model gpt-5.2 \ --static-analysis-results codeql.sarif,semgrep.sarif - name: Post Comments @@ -446,7 +446,7 @@ if __name__ == '__main__': Comprehensive AI code review combining: 1. Multi-tool static analysis (SonarQube, CodeQL, Semgrep) -2. State-of-the-art LLMs (GPT-5, Claude 4.5 Sonnet) +2. State-of-the-art LLMs (GPT-5.2, Claude 4.6 Sonnet) 3. Seamless CI/CD integration (GitHub Actions, GitLab, Azure DevOps) 4. 30+ language support with language-specific linters 5. Actionable review comments with severity and fix examples