YeboShops AI System
Deep dive into Gemini AI product processing and pgvector semantic search.
AI Architecture
┌────────────────────────────────────────────────────────────────┐
│ AI Pipeline │
├────────────────────────────────────────────────────────────────┤
│ │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │ Upload │───►│ Gemini │───►│ Webhook │ │
│ │ Media │ │ Vision │ │ Callback │ │
│ └─────────────┘ └─────────────┘ └─────────────┘ │
│ │ │ │
│ ▼ ▼ │
│ ┌─────────────┐ ┌─────────────┐ │
│ │ Extract │ │ Update │ │
│ │ Metadata │ │ Product │ │
│ └─────────────┘ └─────────────┘ │
│ │ │
│ ▼ │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │ Vector DB │◄───│ Voyage AI │◄───│ Generate │ │
│ │ (pgvector) │ │ Embedding │ │ Embedding │ │
│ └─────────────┘ └─────────────┘ └─────────────┘ │
│ │
└────────────────────────────────────────────────────────────────┘Gemini AI Product Processing
When a user uploads product media, Gemini Vision AI extracts:
Extracted Fields
| Field | Type | Description |
|---|---|---|
title | string | Product name/title |
description | string | Detailed description |
seoDescription | string | SEO-optimized short description |
category | string | Product category |
tags | string[] | Relevant tags |
color | string[] | Detected colors |
material | string | Material (fabric, metal, etc.) |
condition | string | new, like_new, good, fair, poor |
price | number | Detected price from image |
detectedText | string[] | OCR text from image |
hasNudity | boolean | NSFW content detection |
hasPrice | boolean | Price tag visible |
mediaQuality | string | high, medium, low |
mediaType | string | image, video |
needsManualApproval | boolean | Flagged for review |
Processing Flow
typescript
// 1. User uploads media
const product = await createProduct({
title: 'New Product',
shopId: 'shop_123',
userId: 'user_456'
});
// Product status: DRAFT
// 2. Media sent to AI processing service (external)
// Gemini Vision analyzes the image/video
// 3. Webhook callback with processed data
await processProductMediaWebhook({
productId: product.id,
processedData: {
title: 'iPhone 12 Pro Max 256GB Pacific Blue',
description: 'Excellent condition iPhone 12 Pro Max...',
category: 'Electronics',
tags: ['iphone', 'apple', 'smartphone', '5g'],
color: ['blue'],
condition: 'like_new',
price: 899,
hasNudity: false,
mediaQuality: 'high',
needsManualApproval: false
}
});
// 4. Product updated and published
// Product status: DRAFT → PUBLISHED
// AI processing completed: true
// Embedding generated: yesWebhook Handler
typescript
async function processProductMediaWebhook(params: {
productId: string;
processedData: any;
}) {
const { productId, processedData } = params;
const product = await prisma.product.findUnique({
where: { id: productId },
include: { shop: { include: { country: true } } }
});
// Map webhook data to product fields
const updateData: any = {};
if (processedData.title) {
updateData.title = processedData.title;
updateData.slug = await generateUniqueSlug(processedData.title);
}
if (processedData.description) {
updateData.description = processedData.description;
}
// ... map all fields
// Mark as processed and publish
updateData.aiProcessingCompleted = true;
updateData.isActive = true;
updateData.status = 'PUBLISHED';
// Update product
const updated = await prisma.product.update({
where: { id: productId },
data: updateData
});
// Generate embedding for vector search
await generateAndStoreProductEmbedding(productId);
return updated;
}Vector Search with pgvector
Configuration
typescript
// Voyage AI configuration
const VOYAGE_API_KEY = process.env.VOYAGE_API_KEY;
const VOYAGE_MODEL = 'voyage-3-lite'; // 512 dimensions
const BATCH_SIZE = 50;pgvector Setup
sql
-- Enable pgvector extension
CREATE EXTENSION IF NOT EXISTS vector;
-- Add embedding column to Product table
ALTER TABLE "Product" ADD COLUMN embedding vector(512);
-- Create HNSW index for fast cosine similarity
CREATE INDEX product_embedding_cosine_idx
ON "Product" USING hnsw (embedding vector_cosine_ops)
WITH (m = 16, ef_construction = 64);Embedding Generation
typescript
class EmbeddingService {
// Create embedding text from product data
static createEmbeddingText(product: any): string {
return [
product.title || '',
product.description || '',
product.category || '',
(product.tags || []).join(' '),
product.condition || '',
(product.color || []).join(' ')
].filter(part => part.trim().length > 0).join(' ');
}
// Generate embeddings via Voyage AI
static async generateEmbeddings(texts: string[]): Promise<number[][]> {
const response = await fetch('https://api.voyageai.com/v1/embeddings', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${VOYAGE_API_KEY}`,
},
body: JSON.stringify({
input: texts,
model: VOYAGE_MODEL, // voyage-3-lite (512 dims)
}),
});
const result = await response.json();
return result.data.map((item: any) => item.embedding);
}
// Store embedding in PostgreSQL
static async embedProduct(productId: string): Promise<void> {
const product = await prisma.product.findUnique({ where: { id: productId } });
const embeddingText = this.createEmbeddingText(product);
const [embedding] = await this.generateEmbeddings([embeddingText]);
// Store using raw SQL (Prisma doesn't support vector type natively)
const embeddingArray = `{${embedding.join(',')}}`;
await prisma.$queryRawUnsafe(
`UPDATE "Product" SET embedding = $1::double precision[] WHERE id = $2`,
embeddingArray,
productId
);
}
}Vector Search
typescript
// Search products using cosine similarity
static async searchProducts(
queryEmbedding: number[],
filters: SearchFilters = {},
limit: number = 20,
offset: number = 0
): Promise<any[]> {
let query = `
SELECT
p.id, p.title, p.description, p.slug, p.category,
p."priceAmount", p."priceCurrency", p."shopId",
p.tags, p.condition, p.color, p.status,
(p.embedding::vector <=> $1::vector) as distance,
(1 - (p.embedding::vector <=> $1::vector)) as score,
pm.url as "mediaUrl",
s.name as "shopName"
FROM "Product" p
LEFT JOIN "ProductMedia" pm ON pm."productId" = p.id AND pm."order" = 0
LEFT JOIN "Shop" s ON s.id = p."shopId"
WHERE p.status = 'PUBLISHED'
AND p."isActive" = true
AND p.embedding IS NOT NULL
`;
// Add filters
if (filters.category) {
query += ` AND LOWER(p.category) = LOWER($2)`;
}
if (filters.minPrice) {
query += ` AND p."priceAmount" >= $3`;
}
if (filters.maxPrice) {
query += ` AND p."priceAmount" <= $4`;
}
// Order by vector similarity (ascending distance = most similar)
query += ` ORDER BY p.embedding::vector <=> $1::vector LIMIT $5 OFFSET $6`;
const results = await prisma.$queryRawUnsafe(query, ...params);
return results.map(result => ({
id: result.id,
title: result.title,
score: parseFloat(result.score),
// ... other fields
}));
}Natural Language Search
Query Expansion with Gemini
typescript
static async expandQuery(query: string): Promise<SearchIntent> {
const prompt = `You are a search query analyzer for an African classifieds marketplace.
Query: "${query}"
Available categories: Electronics, Fashion & Apparel, Home & Garden, Sports & Outdoors, Beauty & Health, Toys & Games, Books & Media, Baby & Kids, Pets, Office & Stationery, Automotive, Food & Grocery, Services, Other
Return ONLY a JSON object with:
{
"keywords": ["relevant", "search", "terms", "synonyms"],
"categories": ["most relevant category matches"],
"priceRange": {"min": 10, "max": 100} or null,
"condition": "new" | "like_new" | "good" | "fair" | "poor" | null,
"intent": "brief description of what user is looking for",
"confidence": 0.0 to 1.0
}`;
const response = await fetch(
`https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:generateContent?key=${GEMINI_API_KEY}`,
{
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
contents: [{ parts: [{ text: prompt }] }],
generationConfig: {
temperature: 0.1,
maxOutputTokens: 512,
responseMimeType: 'application/json'
}
}),
}
);
const result = await response.json();
return JSON.parse(result.candidates[0].content.parts[0].text);
}
// Example outputs:
// "blue sneakers under $50" →
// {
// keywords: ["blue", "sneakers", "shoes", "footwear"],
// categories: ["Fashion & Apparel"],
// priceRange: { max: 50 },
// intent: "looking for affordable blue sneakers",
// confidence: 0.9
// }
// "birthday gift for my mom" →
// {
// keywords: ["gift", "mother", "birthday", "present", "jewelry", "perfume"],
// categories: ["Fashion & Apparel", "Beauty & Health"],
// priceRange: null,
// intent: "finding a birthday gift for mother",
// confidence: 0.8
// }Hybrid Search Pipeline
typescript
static async search(query: string, filters: SearchFilters = {}): Promise<SearchResponse> {
const startTime = Date.now();
// Step 1: Generate query embedding (skip Gemini expansion for speed)
let vectorResults = [];
try {
const [queryEmbedding] = await EmbeddingService.generateEmbeddings([query]);
vectorResults = await EmbeddingService.searchProducts(queryEmbedding, filters, limit);
} catch (error) {
console.warn('Vector search failed:', error.message);
}
// Step 2: Keyword fallback if vector results insufficient
if (vectorResults.length < 3) {
const keywords = query.toLowerCase().split(/\s+/);
const keywordProducts = await prisma.product.findMany({
where: {
status: 'PUBLISHED',
isActive: true,
OR: [
{ title: { contains: query, mode: 'insensitive' } },
{ description: { contains: query, mode: 'insensitive' } },
{ tags: { hasSome: keywords } }
]
},
orderBy: [{ viewCount: 'desc' }, { createdAt: 'desc' }],
take: limit - vectorResults.length
});
vectorResults.push(...keywordProducts);
}
return {
results: vectorResults,
totalCount: vectorResults.length,
query,
searchTime: Date.now() - startTime,
hasMore: vectorResults.length >= limit
};
}Similar Products
typescript
// Find similar products using vector similarity
static async findSimilarProducts(
productId: string,
limit: number = 10,
categoryMatch: boolean = true
): Promise<any[]> {
const query = `
SELECT
p2.id, p2.title, p2.category, p2."priceAmount",
(p1.embedding::vector <=> p2.embedding::vector) as distance,
(1 - (p1.embedding::vector <=> p2.embedding::vector)) as score
FROM "Product" p1, "Product" p2
WHERE p1.id = $1
AND p1.embedding IS NOT NULL
AND p2.status = 'PUBLISHED'
AND p2."isActive" = true
AND p2.embedding IS NOT NULL
AND p2.id != p1.id
${categoryMatch ? 'AND p2.category = p1.category' : ''}
ORDER BY p1.embedding::vector <=> p2.embedding::vector
LIMIT $2
`;
return await prisma.$queryRawUnsafe(query, productId, limit);
}Embedding Statistics
typescript
// Get embedding coverage stats
static async getStats(): Promise<any> {
const [totalProducts, withEmbeddings, published] = await Promise.all([
prisma.product.count(),
prisma.$queryRaw`SELECT COUNT(*) FROM "Product" WHERE embedding IS NOT NULL`,
prisma.product.count({ where: { status: 'PUBLISHED', isActive: true } })
]);
return {
totalProducts,
withEmbeddings: Number(withEmbeddings[0]?.count || 0),
published,
embeddingCoverage: `${((withEmbeddings / totalProducts) * 100).toFixed(1)}%`,
model: 'voyage-3-lite',
dimensions: 512
};
}Batch Processing
typescript
// Batch embed products without embeddings
static async batchEmbedProducts(limit: number = 50): Promise<{
processed: number;
errors: string[];
}> {
await this.ensureVectorSetup();
// Find products needing embeddings
const products = await prisma.$queryRaw`
SELECT id, title, description, category, tags, condition, color
FROM "Product"
WHERE embedding IS NULL
AND status = 'PUBLISHED'
AND "isActive" = true
LIMIT ${limit}
`;
if (products.length === 0) {
return { processed: 0, errors: [] };
}
// Generate embeddings in batch
const texts = products.map(p => this.createEmbeddingText(p));
const embeddings = await this.generateEmbeddings(texts);
// Store embeddings
let processed = 0;
const errors = [];
for (let i = 0; i < products.length; i++) {
try {
const embArr = `{${embeddings[i].join(',')}}`;
await prisma.$queryRawUnsafe(
`UPDATE "Product" SET embedding = $1::double precision[] WHERE id = $2`,
embArr,
products[i].id
);
processed++;
} catch (error) {
errors.push(`${products[i].id}: ${error.message}`);
}
}
return { processed, errors };
}API Integration
Internal Embedding Routes /internal
http
POST /internal/embeddings/batch
X-API-Key: <internal-api-key>
{
"limit": 50
}
Response:
{
"processed": 47,
"errors": ["prod_xyz: Insufficient content"]
}http
GET /internal/embeddings/stats
X-API-Key: <internal-api-key>
Response:
{
"totalProducts": 1250,
"withEmbeddings": 1180,
"published": 980,
"embeddingCoverage": "94.4%",
"model": "voyage-3-lite",
"dimensions": 512
}