From 70cf3f3682326fec7a46e82d0f3cd263511f2b98 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Fri, 19 Dec 2025 09:12:15 -0500 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Bolt:=20Parallelize=20Prompt=20Eval?= =?UTF-8?q?uation=20in=20optimize-prompt.py=20(#145)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: Parallelize prompt evaluation in optimize-prompt.py - Update `PromptOptimizer.evaluate_prompt` to use `ThreadPoolExecutor` for concurrent test case processing - Significantly reduces total execution time when using high-latency LLM clients (network IO bound) - Maintain accurate metric aggregation (latency, accuracy, token count) from parallel results - This prepares the script for real-world usage where sequential execution is a major bottleneck ⚡ Bolt: Reduces total evaluation time from O(n) to O(1) latency-wise (bounded by max_workers) for concurrent requests. * feat: Parallelize prompt evaluation in optimize-prompt.py - Update `PromptOptimizer.evaluate_prompt` to use `ThreadPoolExecutor` for concurrent test case processing - Significantly reduces total execution time when using high-latency LLM clients (network IO bound) - Maintain accurate metric aggregation (latency, accuracy, token count) from parallel results - Ensure no generated artifacts (`optimization_results.json`) are committed ⚡ Bolt: Reduces total evaluation time from O(n) to O(1) latency-wise (bounded by max_workers) for concurrent requests. --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com> --- .../scripts/optimize-prompt.py | 32 +++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/plugins/llm-application-dev/skills/prompt-engineering-patterns/scripts/optimize-prompt.py b/plugins/llm-application-dev/skills/prompt-engineering-patterns/scripts/optimize-prompt.py index 63ecf32..ce52721 100644 --- a/plugins/llm-application-dev/skills/prompt-engineering-patterns/scripts/optimize-prompt.py +++ b/plugins/llm-application-dev/skills/prompt-engineering-patterns/scripts/optimize-prompt.py @@ -9,6 +9,7 @@ import json import time from typing import List, Dict, Any from dataclasses import dataclass +from concurrent.futures import ThreadPoolExecutor import numpy as np @@ -26,7 +27,7 @@ class PromptOptimizer: self.results_history = [] def evaluate_prompt(self, prompt_template: str, test_cases: List[TestCase] = None) -> Dict[str, float]: - """Evaluate a prompt template against test cases.""" + """Evaluate a prompt template against test cases in parallel.""" if test_cases is None: test_cases = self.test_suite @@ -37,7 +38,7 @@ class PromptOptimizer: 'success_rate': [] } - for test_case in test_cases: + def process_test_case(test_case): start_time = time.time() # Render prompt with test case inputs @@ -49,16 +50,29 @@ class PromptOptimizer: # Measure latency latency = time.time() - start_time - # Calculate metrics - metrics['latency'].append(latency) - metrics['token_count'].append(len(prompt.split()) + len(response.split())) - metrics['success_rate'].append(1 if response else 0) - - # Check accuracy + # Calculate individual metrics + token_count = len(prompt.split()) + len(response.split()) + success = 1 if response else 0 accuracy = self.calculate_accuracy(response, test_case.expected_output) - metrics['accuracy'].append(accuracy) + + return { + 'latency': latency, + 'token_count': token_count, + 'success_rate': success, + 'accuracy': accuracy + } + + # Run test cases in parallel + with ThreadPoolExecutor() as executor: + results = list(executor.map(process_test_case, test_cases)) # Aggregate metrics + for result in results: + metrics['latency'].append(result['latency']) + metrics['token_count'].append(result['token_count']) + metrics['success_rate'].append(result['success_rate']) + metrics['accuracy'].append(result['accuracy']) + return { 'avg_accuracy': np.mean(metrics['accuracy']), 'avg_latency': np.mean(metrics['latency']),