Semantic Search with Embeddings
Build a semantic search system using embedding models. This example shows how to generate text embeddings and use them for similarity-based search.
Overview
This example demonstrates:
- Provisioning an embedding model (kind:
embed) - Generating embeddings via the OpenAI-compatible API
- Computing similarity between texts
- Building a simple semantic search engine
Prerequisites
- mimOE AI Foundation Package running (Quick Start)
- Node.js 18+ or Python 3.8+ (for code examples)
Step 1: Provision an Embedding Model
Create the model metadata:
curl -X POST "http://localhost:8083/mimik-ai/store/v1/models" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer 1234" \
-d '{
"id": "nomic-embed-text",
"version": "1.0.0",
"kind": "embed",
"gguf": {
"initContextSize": 8192
}
}'
Download the model:
curl -X POST "http://localhost:8083/mimik-ai/store/v1/models/nomic-embed-text/download" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer 1234" \
-d '{
"url": "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF/resolve/main/nomic-embed-text-v1.5.Q8_0.gguf?download=true"
}'
Verify the model is ready:
curl "http://localhost:8083/mimik-ai/store/v1/models/nomic-embed-text"
Step 2: Generate Embeddings
Single Text
curl -X POST "http://localhost:8083/mimik-ai/openai/v1/embeddings" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer 1234" \
-d '{
"model": "nomic-embed-text",
"input": "What is machine learning?"
}'
Response:
{
"object": "list",
"data": [
{
"object": "embedding",
"index": 0,
"embedding": [0.0234, -0.0891, 0.0456, ...]
}
],
"model": "nomic-embed-text",
"usage": {
"prompt_tokens": 5,
"total_tokens": 5
}
}
Step 3: Build a Semantic Search Engine
JavaScript/Node.js
Install the OpenAI SDK:
npm install openai
semantic-search.js
import OpenAI from 'openai';
const client = new OpenAI({
baseURL: 'http://localhost:8083/mimik-ai/openai/v1',
apiKey: '1234'
});
// Sample document corpus
const documents = [
{
id: 1,
title: 'Introduction to Machine Learning',
content: 'Machine learning is a branch of artificial intelligence that enables systems to learn and improve from experience without being explicitly programmed.'
},
{
id: 2,
title: 'Deep Learning Fundamentals',
content: 'Deep learning is a subset of machine learning that uses neural networks with multiple layers to progressively extract higher-level features from raw input.'
},
{
id: 3,
title: 'Natural Language Processing',
content: 'NLP is a field of AI focused on the interaction between computers and humans through natural language, enabling machines to understand and respond to text or voice data.'
},
{
id: 4,
title: 'Computer Vision Applications',
content: 'Computer vision is an AI field that trains computers to interpret and understand the visual world, using digital images from cameras and deep learning models.'
},
{
id: 5,
title: 'Reinforcement Learning',
content: 'Reinforcement learning is an area of ML where an agent learns to make decisions by taking actions in an environment to maximize cumulative reward.'
}
];
// Generate embeddings for one or more texts (batch limit: 50)
async function getEmbeddings(input) {
const response = await client.embeddings.create({
model: 'nomic-embed-text',
input
});
return response.data.map(d => d.embedding);
}
// Compute cosine similarity between two vectors
function cosineSimilarity(a, b) {
let dotProduct = 0;
let normA = 0;
let normB = 0;
for (let i = 0; i < a.length; i++) {
dotProduct += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
}
// Semantic search class
class SemanticSearch {
constructor() {
this.documents = [];
this.embeddings = [];
}
async indexDocuments(docs) {
this.documents = docs;
console.log(`Indexing ${docs.length} documents...`);
// Batch embed all documents in a single request
this.embeddings = await getEmbeddings(docs.map(doc => doc.content));
console.log(`Indexed ${this.embeddings.length} documents`);
}
async search(query, topK = 3) {
// Get embedding for query
const [queryEmbedding] = await getEmbeddings(query);
// Calculate similarities
const similarities = this.embeddings.map((docEmbedding, index) => ({
document: this.documents[index],
similarity: cosineSimilarity(queryEmbedding, docEmbedding)
}));
// Sort by similarity and return top K
return similarities
.sort((a, b) => b.similarity - a.similarity)
.slice(0, topK);
}
}
// Demo
async function main() {
const search = new SemanticSearch();
// Index documents
await search.indexDocuments(documents);
console.log();
// Test queries
const queries = [
'How do neural networks work?',
'What is AI used for in images?',
'How can machines understand text?'
];
for (const query of queries) {
console.log(`Query: "${query}"`);
console.log('-'.repeat(50));
const results = await search.search(query, 2);
for (const result of results) {
console.log(` ${result.document.title}`);
console.log(` Similarity: ${(result.similarity * 100).toFixed(1)}%`);
console.log();
}
}
}
main().catch(console.error);
Run with:
node semantic-search.js
Python
Install the OpenAI SDK:
pip install openai numpy
semantic_search.py
from openai import OpenAI
import numpy as np
from typing import List, Dict
client = OpenAI(
base_url="http://localhost:8083/mimik-ai/openai/v1",
api_key="1234"
)
# Sample document corpus
documents = [
{
"id": 1,
"title": "Introduction to Machine Learning",
"content": "Machine learning is a branch of artificial intelligence that enables systems to learn and improve from experience without being explicitly programmed."
},
{
"id": 2,
"title": "Deep Learning Fundamentals",
"content": "Deep learning is a subset of machine learning that uses neural networks with multiple layers to progressively extract higher-level features from raw input."
},
{
"id": 3,
"title": "Natural Language Processing",
"content": "NLP is a field of AI focused on the interaction between computers and humans through natural language, enabling machines to understand and respond to text or voice data."
},
{
"id": 4,
"title": "Computer Vision Applications",
"content": "Computer vision is an AI field that trains computers to interpret and understand the visual world, using digital images from cameras and deep learning models."
},
{
"id": 5,
"title": "Reinforcement Learning",
"content": "Reinforcement learning is an area of ML where an agent learns to make decisions by taking actions in an environment to maximize cumulative reward."
}
]
def get_embeddings(input) -> List[List[float]]:
"""Generate embeddings for one or more texts (batch limit: 50)."""
response = client.embeddings.create(
model="nomic-embed-text",
input=input
)
return [d.embedding for d in response.data]
def cosine_similarity(a: List[float], b: List[float]) -> float:
"""Compute cosine similarity between two vectors."""
a = np.array(a)
b = np.array(b)
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
class SemanticSearch:
def __init__(self):
self.documents = []
self.embeddings = []
def index_documents(self, docs: List[Dict]):
"""Index documents by generating embeddings."""
self.documents = docs
print(f"Indexing {len(docs)} documents...")
# Batch embed all documents in a single request
self.embeddings = get_embeddings([doc["content"] for doc in docs])
print(f"Indexed {len(self.embeddings)} documents")
def search(self, query: str, top_k: int = 3) -> List[Dict]:
"""Search for documents similar to the query."""
# Get embedding for query
[query_embedding] = get_embeddings(query)
# Calculate similarities
similarities = []
for i, doc_embedding in enumerate(self.embeddings):
similarity = cosine_similarity(query_embedding, doc_embedding)
similarities.append({
"document": self.documents[i],
"similarity": similarity
})
# Sort by similarity and return top K
similarities.sort(key=lambda x: x["similarity"], reverse=True)
return similarities[:top_k]
def main():
search = SemanticSearch()
# Index documents
search.index_documents(documents)
print()
# Test queries
queries = [
"How do neural networks work?",
"What is AI used for in images?",
"How can machines understand text?"
]
for query in queries:
print(f'Query: "{query}"')
print("-" * 50)
results = search.search(query, top_k=2)
for result in results:
print(f" {result['document']['title']}")
print(f" Similarity: {result['similarity'] * 100:.1f}%")
print()
if __name__ == "__main__":
main()
Run with:
python semantic_search.py
Step 4: Advanced Usage
RAG (Retrieval-Augmented Generation)
Combine embeddings with chat completions for RAG:
rag.js
import OpenAI from 'openai';
const client = new OpenAI({
baseURL: 'http://localhost:8083/mimik-ai/openai/v1',
apiKey: '1234'
});
async function ragQuery(query) {
// 1. Find relevant documents (using SemanticSearch from earlier)
const results = await search.search(query, 3);
// 2. Build context from retrieved documents
const context = results
.map(r => `${r.document.title}: ${r.document.content}`)
.join('\n\n');
// 3. Generate response using context
const response = await client.chat.completions.create({
model: 'smollm2-360m',
messages: [
{
role: 'system',
content: `Answer questions based on the following context:\n\n${context}`
},
{ role: 'user', content: query }
]
});
return response.choices[0].message.content;
}
Troubleshooting
Empty or Zero Embeddings
- Verify the model is ready (
readyToUse: true) - Check that you're using
kind: embedfor embedding models - Ensure the input text is not empty
Slow Embedding Generation
- First request loads the model (expected)
- Subsequent requests will be faster
Different Embedding Dimensions
Different models produce different embedding dimensions. Ensure you use the same model for both indexing and querying.
Next Steps
- Chat with SmolLM2: Build a chat application
- Inference API Reference: Complete API documentation