mirror of
https://github.com/wshobson/agents.git
synced 2026-03-18 09:37:15 +00:00
feat: add 5 new specialized agents with 20 skills
Add domain expert agents with comprehensive skill sets: - service-mesh-expert (cloud-infrastructure): Istio/Linkerd patterns, mTLS, observability - event-sourcing-architect (backend-development): CQRS, event stores, projections, sagas - vector-database-engineer (llm-application-dev): embeddings, similarity search, hybrid search - monorepo-architect (developer-essentials): Nx, Turborepo, Bazel, pnpm workspaces - threat-modeling-expert (security-scanning): STRIDE, attack trees, security requirements Update all documentation to reflect correct counts: - 67 plugins, 99 agents, 107 skills, 71 commands
This commit is contained in:
@@ -0,0 +1,558 @@
|
||||
---
|
||||
name: similarity-search-patterns
|
||||
description: Implement efficient similarity search with vector databases. Use when building semantic search, implementing nearest neighbor queries, or optimizing retrieval performance.
|
||||
---
|
||||
|
||||
# Similarity Search Patterns
|
||||
|
||||
Patterns for implementing efficient similarity search in production systems.
|
||||
|
||||
## When to Use This Skill
|
||||
|
||||
- Building semantic search systems
|
||||
- Implementing RAG retrieval
|
||||
- Creating recommendation engines
|
||||
- Optimizing search latency
|
||||
- Scaling to millions of vectors
|
||||
- Combining semantic and keyword search
|
||||
|
||||
## Core Concepts
|
||||
|
||||
### 1. Distance Metrics
|
||||
|
||||
| Metric | Formula | Best For |
|
||||
|--------|---------|----------|
|
||||
| **Cosine** | 1 - (A·B)/(‖A‖‖B‖) | Normalized embeddings |
|
||||
| **Euclidean (L2)** | √Σ(a-b)² | Raw embeddings |
|
||||
| **Dot Product** | A·B | Magnitude matters |
|
||||
| **Manhattan (L1)** | Σ|a-b| | Sparse vectors |
|
||||
|
||||
### 2. Index Types
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────┐
|
||||
│ Index Types │
|
||||
├─────────────┬───────────────┬───────────────────┤
|
||||
│ Flat │ HNSW │ IVF+PQ │
|
||||
│ (Exact) │ (Graph-based) │ (Quantized) │
|
||||
├─────────────┼───────────────┼───────────────────┤
|
||||
│ O(n) search │ O(log n) │ O(√n) │
|
||||
│ 100% recall │ ~95-99% │ ~90-95% │
|
||||
│ Small data │ Medium-Large │ Very Large │
|
||||
└─────────────┴───────────────┴───────────────────┘
|
||||
```
|
||||
|
||||
## Templates
|
||||
|
||||
### Template 1: Pinecone Implementation
|
||||
|
||||
```python
|
||||
from pinecone import Pinecone, ServerlessSpec
|
||||
from typing import List, Dict, Optional
|
||||
import hashlib
|
||||
|
||||
class PineconeVectorStore:
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str,
|
||||
index_name: str,
|
||||
dimension: int = 1536,
|
||||
metric: str = "cosine"
|
||||
):
|
||||
self.pc = Pinecone(api_key=api_key)
|
||||
|
||||
# Create index if not exists
|
||||
if index_name not in self.pc.list_indexes().names():
|
||||
self.pc.create_index(
|
||||
name=index_name,
|
||||
dimension=dimension,
|
||||
metric=metric,
|
||||
spec=ServerlessSpec(cloud="aws", region="us-east-1")
|
||||
)
|
||||
|
||||
self.index = self.pc.Index(index_name)
|
||||
|
||||
def upsert(
|
||||
self,
|
||||
vectors: List[Dict],
|
||||
namespace: str = ""
|
||||
) -> int:
|
||||
"""
|
||||
Upsert vectors.
|
||||
vectors: [{"id": str, "values": List[float], "metadata": dict}]
|
||||
"""
|
||||
# Batch upsert
|
||||
batch_size = 100
|
||||
total = 0
|
||||
|
||||
for i in range(0, len(vectors), batch_size):
|
||||
batch = vectors[i:i + batch_size]
|
||||
self.index.upsert(vectors=batch, namespace=namespace)
|
||||
total += len(batch)
|
||||
|
||||
return total
|
||||
|
||||
def search(
|
||||
self,
|
||||
query_vector: List[float],
|
||||
top_k: int = 10,
|
||||
namespace: str = "",
|
||||
filter: Optional[Dict] = None,
|
||||
include_metadata: bool = True
|
||||
) -> List[Dict]:
|
||||
"""Search for similar vectors."""
|
||||
results = self.index.query(
|
||||
vector=query_vector,
|
||||
top_k=top_k,
|
||||
namespace=namespace,
|
||||
filter=filter,
|
||||
include_metadata=include_metadata
|
||||
)
|
||||
|
||||
return [
|
||||
{
|
||||
"id": match.id,
|
||||
"score": match.score,
|
||||
"metadata": match.metadata
|
||||
}
|
||||
for match in results.matches
|
||||
]
|
||||
|
||||
def search_with_rerank(
|
||||
self,
|
||||
query: str,
|
||||
query_vector: List[float],
|
||||
top_k: int = 10,
|
||||
rerank_top_n: int = 50,
|
||||
namespace: str = ""
|
||||
) -> List[Dict]:
|
||||
"""Search and rerank results."""
|
||||
# Over-fetch for reranking
|
||||
initial_results = self.search(
|
||||
query_vector,
|
||||
top_k=rerank_top_n,
|
||||
namespace=namespace
|
||||
)
|
||||
|
||||
# Rerank with cross-encoder or LLM
|
||||
reranked = self._rerank(query, initial_results)
|
||||
|
||||
return reranked[:top_k]
|
||||
|
||||
def _rerank(self, query: str, results: List[Dict]) -> List[Dict]:
|
||||
"""Rerank results using cross-encoder."""
|
||||
from sentence_transformers import CrossEncoder
|
||||
|
||||
model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
|
||||
|
||||
pairs = [(query, r["metadata"]["text"]) for r in results]
|
||||
scores = model.predict(pairs)
|
||||
|
||||
for result, score in zip(results, scores):
|
||||
result["rerank_score"] = float(score)
|
||||
|
||||
return sorted(results, key=lambda x: x["rerank_score"], reverse=True)
|
||||
|
||||
def delete(self, ids: List[str], namespace: str = ""):
|
||||
"""Delete vectors by ID."""
|
||||
self.index.delete(ids=ids, namespace=namespace)
|
||||
|
||||
def delete_by_filter(self, filter: Dict, namespace: str = ""):
|
||||
"""Delete vectors matching filter."""
|
||||
self.index.delete(filter=filter, namespace=namespace)
|
||||
```
|
||||
|
||||
### Template 2: Qdrant Implementation
|
||||
|
||||
```python
|
||||
from qdrant_client import QdrantClient
|
||||
from qdrant_client.http import models
|
||||
from typing import List, Dict, Optional
|
||||
|
||||
class QdrantVectorStore:
|
||||
def __init__(
|
||||
self,
|
||||
url: str = "localhost",
|
||||
port: int = 6333,
|
||||
collection_name: str = "documents",
|
||||
vector_size: int = 1536
|
||||
):
|
||||
self.client = QdrantClient(url=url, port=port)
|
||||
self.collection_name = collection_name
|
||||
|
||||
# Create collection if not exists
|
||||
collections = self.client.get_collections().collections
|
||||
if collection_name not in [c.name for c in collections]:
|
||||
self.client.create_collection(
|
||||
collection_name=collection_name,
|
||||
vectors_config=models.VectorParams(
|
||||
size=vector_size,
|
||||
distance=models.Distance.COSINE
|
||||
),
|
||||
# Optional: enable quantization for memory efficiency
|
||||
quantization_config=models.ScalarQuantization(
|
||||
scalar=models.ScalarQuantizationConfig(
|
||||
type=models.ScalarType.INT8,
|
||||
quantile=0.99,
|
||||
always_ram=True
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
def upsert(self, points: List[Dict]) -> int:
|
||||
"""
|
||||
Upsert points.
|
||||
points: [{"id": str/int, "vector": List[float], "payload": dict}]
|
||||
"""
|
||||
qdrant_points = [
|
||||
models.PointStruct(
|
||||
id=p["id"],
|
||||
vector=p["vector"],
|
||||
payload=p.get("payload", {})
|
||||
)
|
||||
for p in points
|
||||
]
|
||||
|
||||
self.client.upsert(
|
||||
collection_name=self.collection_name,
|
||||
points=qdrant_points
|
||||
)
|
||||
return len(points)
|
||||
|
||||
def search(
|
||||
self,
|
||||
query_vector: List[float],
|
||||
limit: int = 10,
|
||||
filter: Optional[models.Filter] = None,
|
||||
score_threshold: Optional[float] = None
|
||||
) -> List[Dict]:
|
||||
"""Search for similar vectors."""
|
||||
results = self.client.search(
|
||||
collection_name=self.collection_name,
|
||||
query_vector=query_vector,
|
||||
limit=limit,
|
||||
query_filter=filter,
|
||||
score_threshold=score_threshold
|
||||
)
|
||||
|
||||
return [
|
||||
{
|
||||
"id": r.id,
|
||||
"score": r.score,
|
||||
"payload": r.payload
|
||||
}
|
||||
for r in results
|
||||
]
|
||||
|
||||
def search_with_filter(
|
||||
self,
|
||||
query_vector: List[float],
|
||||
must_conditions: List[Dict] = None,
|
||||
should_conditions: List[Dict] = None,
|
||||
must_not_conditions: List[Dict] = None,
|
||||
limit: int = 10
|
||||
) -> List[Dict]:
|
||||
"""Search with complex filters."""
|
||||
conditions = []
|
||||
|
||||
if must_conditions:
|
||||
conditions.extend([
|
||||
models.FieldCondition(
|
||||
key=c["key"],
|
||||
match=models.MatchValue(value=c["value"])
|
||||
)
|
||||
for c in must_conditions
|
||||
])
|
||||
|
||||
filter = models.Filter(must=conditions) if conditions else None
|
||||
|
||||
return self.search(query_vector, limit=limit, filter=filter)
|
||||
|
||||
def search_with_sparse(
|
||||
self,
|
||||
dense_vector: List[float],
|
||||
sparse_vector: Dict[int, float],
|
||||
limit: int = 10,
|
||||
dense_weight: float = 0.7
|
||||
) -> List[Dict]:
|
||||
"""Hybrid search with dense and sparse vectors."""
|
||||
# Requires collection with named vectors
|
||||
results = self.client.search(
|
||||
collection_name=self.collection_name,
|
||||
query_vector=models.NamedVector(
|
||||
name="dense",
|
||||
vector=dense_vector
|
||||
),
|
||||
limit=limit
|
||||
)
|
||||
return [{"id": r.id, "score": r.score, "payload": r.payload} for r in results]
|
||||
```
|
||||
|
||||
### Template 3: pgvector with PostgreSQL
|
||||
|
||||
```python
|
||||
import asyncpg
|
||||
from typing import List, Dict, Optional
|
||||
import numpy as np
|
||||
|
||||
class PgVectorStore:
|
||||
def __init__(self, connection_string: str):
|
||||
self.connection_string = connection_string
|
||||
|
||||
async def init(self):
|
||||
"""Initialize connection pool and extension."""
|
||||
self.pool = await asyncpg.create_pool(self.connection_string)
|
||||
|
||||
async with self.pool.acquire() as conn:
|
||||
# Enable extension
|
||||
await conn.execute("CREATE EXTENSION IF NOT EXISTS vector")
|
||||
|
||||
# Create table
|
||||
await conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS documents (
|
||||
id TEXT PRIMARY KEY,
|
||||
content TEXT,
|
||||
metadata JSONB,
|
||||
embedding vector(1536)
|
||||
)
|
||||
""")
|
||||
|
||||
# Create index (HNSW for better performance)
|
||||
await conn.execute("""
|
||||
CREATE INDEX IF NOT EXISTS documents_embedding_idx
|
||||
ON documents
|
||||
USING hnsw (embedding vector_cosine_ops)
|
||||
WITH (m = 16, ef_construction = 64)
|
||||
""")
|
||||
|
||||
async def upsert(self, documents: List[Dict]):
|
||||
"""Upsert documents with embeddings."""
|
||||
async with self.pool.acquire() as conn:
|
||||
await conn.executemany(
|
||||
"""
|
||||
INSERT INTO documents (id, content, metadata, embedding)
|
||||
VALUES ($1, $2, $3, $4)
|
||||
ON CONFLICT (id) DO UPDATE SET
|
||||
content = EXCLUDED.content,
|
||||
metadata = EXCLUDED.metadata,
|
||||
embedding = EXCLUDED.embedding
|
||||
""",
|
||||
[
|
||||
(
|
||||
doc["id"],
|
||||
doc["content"],
|
||||
doc.get("metadata", {}),
|
||||
np.array(doc["embedding"]).tolist()
|
||||
)
|
||||
for doc in documents
|
||||
]
|
||||
)
|
||||
|
||||
async def search(
|
||||
self,
|
||||
query_embedding: List[float],
|
||||
limit: int = 10,
|
||||
filter_metadata: Optional[Dict] = None
|
||||
) -> List[Dict]:
|
||||
"""Search for similar documents."""
|
||||
query = """
|
||||
SELECT id, content, metadata,
|
||||
1 - (embedding <=> $1::vector) as similarity
|
||||
FROM documents
|
||||
"""
|
||||
|
||||
params = [query_embedding]
|
||||
|
||||
if filter_metadata:
|
||||
conditions = []
|
||||
for key, value in filter_metadata.items():
|
||||
params.append(value)
|
||||
conditions.append(f"metadata->>'{key}' = ${len(params)}")
|
||||
query += " WHERE " + " AND ".join(conditions)
|
||||
|
||||
query += f" ORDER BY embedding <=> $1::vector LIMIT ${len(params) + 1}"
|
||||
params.append(limit)
|
||||
|
||||
async with self.pool.acquire() as conn:
|
||||
rows = await conn.fetch(query, *params)
|
||||
|
||||
return [
|
||||
{
|
||||
"id": row["id"],
|
||||
"content": row["content"],
|
||||
"metadata": row["metadata"],
|
||||
"score": row["similarity"]
|
||||
}
|
||||
for row in rows
|
||||
]
|
||||
|
||||
async def hybrid_search(
|
||||
self,
|
||||
query_embedding: List[float],
|
||||
query_text: str,
|
||||
limit: int = 10,
|
||||
vector_weight: float = 0.5
|
||||
) -> List[Dict]:
|
||||
"""Hybrid search combining vector and full-text."""
|
||||
async with self.pool.acquire() as conn:
|
||||
rows = await conn.fetch(
|
||||
"""
|
||||
WITH vector_results AS (
|
||||
SELECT id, content, metadata,
|
||||
1 - (embedding <=> $1::vector) as vector_score
|
||||
FROM documents
|
||||
ORDER BY embedding <=> $1::vector
|
||||
LIMIT $3 * 2
|
||||
),
|
||||
text_results AS (
|
||||
SELECT id, content, metadata,
|
||||
ts_rank(to_tsvector('english', content),
|
||||
plainto_tsquery('english', $2)) as text_score
|
||||
FROM documents
|
||||
WHERE to_tsvector('english', content) @@ plainto_tsquery('english', $2)
|
||||
LIMIT $3 * 2
|
||||
)
|
||||
SELECT
|
||||
COALESCE(v.id, t.id) as id,
|
||||
COALESCE(v.content, t.content) as content,
|
||||
COALESCE(v.metadata, t.metadata) as metadata,
|
||||
COALESCE(v.vector_score, 0) * $4 +
|
||||
COALESCE(t.text_score, 0) * (1 - $4) as combined_score
|
||||
FROM vector_results v
|
||||
FULL OUTER JOIN text_results t ON v.id = t.id
|
||||
ORDER BY combined_score DESC
|
||||
LIMIT $3
|
||||
""",
|
||||
query_embedding, query_text, limit, vector_weight
|
||||
)
|
||||
|
||||
return [dict(row) for row in rows]
|
||||
```
|
||||
|
||||
### Template 4: Weaviate Implementation
|
||||
|
||||
```python
|
||||
import weaviate
|
||||
from weaviate.util import generate_uuid5
|
||||
from typing import List, Dict, Optional
|
||||
|
||||
class WeaviateVectorStore:
|
||||
def __init__(
|
||||
self,
|
||||
url: str = "http://localhost:8080",
|
||||
class_name: str = "Document"
|
||||
):
|
||||
self.client = weaviate.Client(url=url)
|
||||
self.class_name = class_name
|
||||
self._ensure_schema()
|
||||
|
||||
def _ensure_schema(self):
|
||||
"""Create schema if not exists."""
|
||||
schema = {
|
||||
"class": self.class_name,
|
||||
"vectorizer": "none", # We provide vectors
|
||||
"properties": [
|
||||
{"name": "content", "dataType": ["text"]},
|
||||
{"name": "source", "dataType": ["string"]},
|
||||
{"name": "chunk_id", "dataType": ["int"]}
|
||||
]
|
||||
}
|
||||
|
||||
if not self.client.schema.exists(self.class_name):
|
||||
self.client.schema.create_class(schema)
|
||||
|
||||
def upsert(self, documents: List[Dict]):
|
||||
"""Batch upsert documents."""
|
||||
with self.client.batch as batch:
|
||||
batch.batch_size = 100
|
||||
|
||||
for doc in documents:
|
||||
batch.add_data_object(
|
||||
data_object={
|
||||
"content": doc["content"],
|
||||
"source": doc.get("source", ""),
|
||||
"chunk_id": doc.get("chunk_id", 0)
|
||||
},
|
||||
class_name=self.class_name,
|
||||
uuid=generate_uuid5(doc["id"]),
|
||||
vector=doc["embedding"]
|
||||
)
|
||||
|
||||
def search(
|
||||
self,
|
||||
query_vector: List[float],
|
||||
limit: int = 10,
|
||||
where_filter: Optional[Dict] = None
|
||||
) -> List[Dict]:
|
||||
"""Vector search."""
|
||||
query = (
|
||||
self.client.query
|
||||
.get(self.class_name, ["content", "source", "chunk_id"])
|
||||
.with_near_vector({"vector": query_vector})
|
||||
.with_limit(limit)
|
||||
.with_additional(["distance", "id"])
|
||||
)
|
||||
|
||||
if where_filter:
|
||||
query = query.with_where(where_filter)
|
||||
|
||||
results = query.do()
|
||||
|
||||
return [
|
||||
{
|
||||
"id": item["_additional"]["id"],
|
||||
"content": item["content"],
|
||||
"source": item["source"],
|
||||
"score": 1 - item["_additional"]["distance"]
|
||||
}
|
||||
for item in results["data"]["Get"][self.class_name]
|
||||
]
|
||||
|
||||
def hybrid_search(
|
||||
self,
|
||||
query: str,
|
||||
query_vector: List[float],
|
||||
limit: int = 10,
|
||||
alpha: float = 0.5 # 0 = keyword, 1 = vector
|
||||
) -> List[Dict]:
|
||||
"""Hybrid search combining BM25 and vector."""
|
||||
results = (
|
||||
self.client.query
|
||||
.get(self.class_name, ["content", "source"])
|
||||
.with_hybrid(query=query, vector=query_vector, alpha=alpha)
|
||||
.with_limit(limit)
|
||||
.with_additional(["score"])
|
||||
.do()
|
||||
)
|
||||
|
||||
return [
|
||||
{
|
||||
"content": item["content"],
|
||||
"source": item["source"],
|
||||
"score": item["_additional"]["score"]
|
||||
}
|
||||
for item in results["data"]["Get"][self.class_name]
|
||||
]
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
### Do's
|
||||
- **Use appropriate index** - HNSW for most cases
|
||||
- **Tune parameters** - ef_search, nprobe for recall/speed
|
||||
- **Implement hybrid search** - Combine with keyword search
|
||||
- **Monitor recall** - Measure search quality
|
||||
- **Pre-filter when possible** - Reduce search space
|
||||
|
||||
### Don'ts
|
||||
- **Don't skip evaluation** - Measure before optimizing
|
||||
- **Don't over-index** - Start with flat, scale up
|
||||
- **Don't ignore latency** - P99 matters for UX
|
||||
- **Don't forget costs** - Vector storage adds up
|
||||
|
||||
## Resources
|
||||
|
||||
- [Pinecone Docs](https://docs.pinecone.io/)
|
||||
- [Qdrant Docs](https://qdrant.tech/documentation/)
|
||||
- [pgvector](https://github.com/pgvector/pgvector)
|
||||
- [Weaviate Docs](https://weaviate.io/developers/weaviate)
|
||||
Reference in New Issue
Block a user