feat: implementing embedding AI

This commit is contained in:
2026-03-18 17:15:39 -03:00
parent e8cdf61468
commit 64d3d5be91
32 changed files with 3568 additions and 174 deletions
+2
View File
@@ -31,3 +31,5 @@ http.workspace = true
zip = "0.6"
mime_guess = "2.0"
base64 = "0.22.1"
regex = "1.11"
rand = "0.8"
@@ -0,0 +1,106 @@
-- MySQL Courses Integration
-- Store imported course and study plan data from external MySQL database
-- Used for test template creation with automatic level/course_type detection
-- Study Plans from MySQL
CREATE TABLE mysql_study_plans (
id SERIAL PRIMARY KEY,
mysql_id INTEGER NOT NULL UNIQUE, -- idPlanDeEstudios from MySQL
organization_id UUID NOT NULL REFERENCES organizations(id) ON DELETE CASCADE,
name VARCHAR(255) NOT NULL, -- Nombre from MySQL
-- Course type detection
course_type VARCHAR(20) NOT NULL DEFAULT 'regular', -- 'regular' (40h) or 'intensive' (80h)
-- Metadata
is_active BOOLEAN NOT NULL DEFAULT true,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE(organization_id, mysql_id)
);
-- Courses from MySQL
CREATE TABLE IF NOT EXISTS mysql_courses (
id SERIAL PRIMARY KEY,
mysql_id INTEGER NOT NULL UNIQUE, -- idCursos from MySQL
organization_id UUID NOT NULL REFERENCES organizations(id) ON DELETE CASCADE,
study_plan_id INTEGER NOT NULL REFERENCES mysql_study_plans(id) ON DELETE CASCADE,
name VARCHAR(255) NOT NULL, -- NombreCurso from MySQL
level INTEGER, -- NivelCurso from MySQL (1-12+)
duracion INTEGER, -- Duracion from MySQL (40h or 80h)
-- Auto-calculated fields
course_type VARCHAR(20) NOT NULL DEFAULT 'regular', -- 'regular' (40h) or 'intensive' (80h)
level_calculated VARCHAR(20), -- Calculated from NivelCurso: beginner, beginner_1, etc.
-- Metadata
is_active BOOLEAN NOT NULL DEFAULT true,
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE(organization_id, mysql_id)
);
-- Indexes for performance
CREATE INDEX idx_mysql_courses_study_plan ON mysql_courses(study_plan_id);
CREATE INDEX idx_mysql_courses_org ON mysql_courses(organization_id);
CREATE INDEX idx_mysql_plans_org ON mysql_study_plans(organization_id);
-- Function to update updated_at timestamp
CREATE OR REPLACE FUNCTION update_mysql_integration_updated_at()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Triggers for updated_at
CREATE TRIGGER update_mysql_study_plans_updated_at
BEFORE UPDATE ON mysql_study_plans
FOR EACH ROW
EXECUTE FUNCTION update_mysql_integration_updated_at();
CREATE TRIGGER update_mysql_courses_updated_at
BEFORE UPDATE ON mysql_courses
FOR EACH ROW
EXECUTE FUNCTION update_mysql_integration_updated_at();
-- Function to determine course level from NivelCurso
CREATE OR REPLACE FUNCTION calculate_course_level(nivel INTEGER)
RETURNS TEXT AS $$
BEGIN
IF nivel IS NULL THEN
RETURN 'intermediate';
ELSIF nivel <= 2 THEN
RETURN 'beginner';
ELSIF nivel <= 4 THEN
RETURN 'beginner_1';
ELSIF nivel <= 6 THEN
RETURN 'beginner_2';
ELSIF nivel <= 8 THEN
RETURN 'intermediate';
ELSIF nivel <= 10 THEN
RETURN 'intermediate_1';
ELSIF nivel <= 12 THEN
RETURN 'intermediate_2';
ELSE
RETURN 'advanced';
END IF;
END;
$$ LANGUAGE plpgsql;
-- Function to determine course type from plan name
CREATE OR REPLACE FUNCTION calculate_course_type(plan_name TEXT)
RETURNS TEXT AS $$
BEGIN
IF LOWER(plan_name) LIKE '%intensive%' OR LOWER(plan_name) LIKE '%intensivo%' THEN
RETURN 'intensive';
ELSE
RETURN 'regular';
END IF;
END;
$$ LANGUAGE plpgsql;
@@ -0,0 +1,82 @@
-- Fix test_templates to use mysql_course_id reference instead of level/course_type strings
-- This ensures data consistency and leverages the imported MySQL course data in PostgreSQL
-- Add mysql_course_id column to test_templates
ALTER TABLE test_templates
ADD COLUMN mysql_course_id INTEGER REFERENCES mysql_courses(mysql_id) ON DELETE SET NULL,
ALTER COLUMN level DROP NOT NULL,
ALTER COLUMN course_type DROP NOT NULL;
-- Create index for faster lookups
CREATE INDEX IF NOT EXISTS idx_test_templates_mysql_course ON test_templates(mysql_course_id);
-- Add comment for documentation
COMMENT ON COLUMN test_templates.mysql_course_id IS 'Reference to imported MySQL course (mysql_courses.mysql_id). Preferred over level/course_type fields.';
-- Create view for backward compatibility - shows calculated level/course_type from mysql_courses
CREATE OR REPLACE VIEW test_templates_with_course_info AS
SELECT
tt.*,
mc.name AS course_name,
mc.level_calculated,
mc.course_type AS calculated_course_type,
mc.duracion AS course_duration
FROM test_templates tt
LEFT JOIN mysql_courses mc ON tt.mysql_course_id = mc.mysql_id;
-- Function to get template with course info
CREATE OR REPLACE FUNCTION get_test_template_with_course(p_template_id UUID)
RETURNS TABLE (
id UUID,
organization_id UUID,
name VARCHAR,
description TEXT,
mysql_course_id INTEGER,
course_name VARCHAR,
level course_level,
level_calculated TEXT,
course_type course_type,
calculated_course_type TEXT,
test_type test_type,
duration_minutes INTEGER,
passing_score INTEGER,
total_points INTEGER,
instructions TEXT,
template_data JSONB,
tags TEXT[],
is_active BOOLEAN,
usage_count INTEGER,
created_by UUID,
created_at TIMESTAMPTZ,
updated_at TIMESTAMPTZ
) AS $$
BEGIN
RETURN QUERY
SELECT
tt.id,
tt.organization_id,
tt.name,
tt.description,
tt.mysql_course_id,
mc.name,
tt.level,
mc.level_calculated,
tt.course_type,
mc.course_type,
tt.test_type,
tt.duration_minutes,
tt.passing_score,
tt.total_points,
tt.instructions,
tt.template_data,
tt.tags,
tt.is_active,
tt.usage_count,
tt.created_by,
tt.created_at,
tt.updated_at
FROM test_templates tt
LEFT JOIN mysql_courses mc ON tt.mysql_course_id = mc.mysql_id
WHERE tt.id = p_template_id;
END;
$$ LANGUAGE plpgsql;
@@ -0,0 +1,167 @@
-- PGVector Embeddings Integration
-- Enables semantic search for question bank and RAG generation
-- Enable pgvector extension
CREATE EXTENSION IF NOT EXISTS vector;
-- Add embedding column to question_bank table
-- Using 768 dimensions for nomic-embed-text model
ALTER TABLE question_bank
ADD COLUMN IF NOT EXISTS embedding vector(768);
-- Add embedding_updated_at timestamp
ALTER TABLE question_bank
ADD COLUMN IF NOT EXISTS embedding_updated_at TIMESTAMPTZ;
-- Create index for fast semantic search (IVFFlat for >10k rows)
CREATE INDEX IF NOT EXISTS idx_question_embeddings
ON question_bank
USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 100);
-- Create index for filtering by embedding status
CREATE INDEX IF NOT EXISTS idx_question_embedding_updated
ON question_bank (embedding_updated_at);
-- Function to calculate cosine similarity between two embeddings
CREATE OR REPLACE FUNCTION question_similarity(
q1_id UUID,
q2_id UUID
)
RETURNS REAL AS $$
BEGIN
RETURN (
SELECT qb1.embedding <=> qb2.embedding
FROM question_bank qb1, question_bank qb2
WHERE qb1.id = q1_id AND qb2.id = q2_id
);
END;
$$ LANGUAGE plpgsql STABLE;
-- Function to find similar questions (for duplicate detection)
CREATE OR REPLACE FUNCTION find_similar_questions(
p_question_id UUID,
p_threshold REAL DEFAULT 0.85,
p_limit INTEGER DEFAULT 10
)
RETURNS TABLE (
id UUID,
question_text TEXT,
similarity REAL,
question_type question_bank_type
) AS $$
BEGIN
RETURN QUERY
SELECT
qb.id,
qb.question_text,
1 - (qb.embedding <=> (SELECT embedding FROM question_bank WHERE id = p_question_id)) AS similarity,
qb.question_type
FROM question_bank qb
WHERE qb.id != p_question_id
AND qb.organization_id = (SELECT organization_id FROM question_bank WHERE id = p_question_id)
AND qb.embedding IS NOT NULL
ORDER BY qb.embedding <=> (SELECT embedding FROM question_bank WHERE id = p_question_id)
LIMIT p_limit;
END;
$$ LANGUAGE plpgsql STABLE;
-- Function to search questions by semantic similarity
CREATE OR REPLACE FUNCTION search_questions_semantic(
p_organization_id UUID,
p_query_embedding vector(768),
p_limit INTEGER DEFAULT 20,
p_threshold DOUBLE PRECISION DEFAULT 0.5
)
RETURNS TABLE (
id UUID,
question_text TEXT,
question_type question_bank_type,
similarity DOUBLE PRECISION,
tags TEXT[],
difficulty VARCHAR,
points INTEGER
) AS $$
BEGIN
RETURN QUERY
SELECT
qb.id,
qb.question_text,
qb.question_type,
(1 - (qb.embedding <=> p_query_embedding))::DOUBLE PRECISION AS similarity,
qb.tags,
qb.difficulty,
qb.points
FROM question_bank qb
WHERE qb.organization_id = p_organization_id
AND qb.embedding IS NOT NULL
AND (1 - (qb.embedding <=> p_query_embedding))::DOUBLE PRECISION >= p_threshold
ORDER BY qb.embedding <=> p_query_embedding
LIMIT p_limit;
END;
$$ LANGUAGE plpgsql STABLE;
-- Function to get diverse questions covering multiple topics
-- Uses Maximal Marginal Relevance (MMR) to balance relevance and diversity
CREATE OR REPLACE FUNCTION get_diverse_questions(
p_organization_id UUID,
p_query_embedding vector(768),
p_limit INTEGER DEFAULT 10,
p_lambda DOUBLE PRECISION DEFAULT 0.7 -- 0 = max diversity, 1 = max relevance
)
RETURNS TABLE (
id UUID,
question_text TEXT,
question_type question_bank_type,
similarity DOUBLE PRECISION
) AS $$
DECLARE
selected_ids UUID[] := ARRAY[]::UUID[];
candidate_id UUID;
best_score REAL;
current_score REAL;
diversity_score REAL;
relevance_score REAL;
BEGIN
-- Simple MMR implementation: iteratively select questions
-- that are relevant but dissimilar to already selected ones
FOR i IN 1..p_limit LOOP
SELECT qb.id INTO candidate_id
FROM question_bank qb
WHERE qb.organization_id = p_organization_id
AND qb.id != ALL(selected_ids)
AND qb.embedding IS NOT NULL
ORDER BY
(1 - (qb.embedding <=> p_query_embedding)) * p_lambda -
(COALESCE((
SELECT MAX(1 - (qb.embedding <=> qb2.embedding))
FROM unnest(selected_ids) AS sid
JOIN question_bank qb2 ON qb2.id = sid
), 0)) * (1 - p_lambda)
DESC
LIMIT 1;
EXIT WHEN candidate_id IS NULL;
selected_ids := array_append(selected_ids, candidate_id);
END LOOP;
RETURN QUERY
SELECT
qb.id,
qb.question_text,
qb.question_type,
1 - (qb.embedding <=> p_query_embedding) AS similarity
FROM question_bank qb
WHERE qb.id = ANY(selected_ids)
ORDER BY similarity DESC;
END;
$$ LANGUAGE plpgsql STABLE;
-- Comments
COMMENT ON COLUMN question_bank.embedding IS 'Semantic embedding vector for similarity search (nomic-embed-text, 384 dimensions)';
COMMENT ON COLUMN question_bank.embedding_updated_at IS 'Timestamp when embedding was last generated';
COMMENT ON FUNCTION question_similarity IS 'Calculate cosine similarity between two questions';
COMMENT ON FUNCTION find_similar_questions IS 'Find questions similar to a given question (for duplicate detection)';
COMMENT ON FUNCTION search_questions_semantic IS 'Search questions by semantic similarity using embedding vector';
COMMENT ON FUNCTION get_diverse_questions IS 'Get diverse questions using Maximal Marginal Relevance (MMR)';
@@ -0,0 +1,364 @@
//! Handlers for PGVector embeddings in Question Bank
//! Enables semantic search and RAG with AI-powered embeddings
use axum::{
Json,
extract::{Path, Query, State},
http::StatusCode,
};
use common::ai::{self, generate_embedding};
use common::models::QuestionBank;
use common::middleware::Org;
use reqwest::Client;
use serde::{Deserialize, Serialize};
use sqlx::PgPool;
use uuid::Uuid;
// ==================== Query Parameters ====================
#[derive(Debug, Deserialize)]
pub struct SemanticSearchFilters {
pub query: String,
pub limit: Option<i32>,
pub threshold: Option<f64>,
pub question_type: Option<String>,
pub difficulty: Option<String>,
}
#[derive(Debug, Serialize, Deserialize, sqlx::FromRow)]
pub struct SemanticSearchResult {
pub id: Uuid,
pub question_text: String,
pub question_type: String,
pub similarity: f64, // PostgreSQL vector similarity returns double precision
pub tags: Option<Vec<String>>,
pub difficulty: Option<String>,
pub points: i32,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct GenerateEmbeddingsResult {
pub processed: i32,
pub failed: i32,
pub duration_ms: u64,
}
// ==================== Generate Embeddings ====================
/// POST /api/question-bank/embeddings/generate - Generate embeddings for all questions without them
pub async fn generate_question_embeddings(
Org(org_ctx): Org,
State(pool): State<PgPool>,
) -> Result<Json<GenerateEmbeddingsResult>, (StatusCode, String)> {
let start = std::time::Instant::now();
// Create client that accepts invalid certificates (for dev with self-signed certs)
let client = reqwest::Client::builder()
.danger_accept_invalid_certs(true)
.danger_accept_invalid_hostnames(true)
.build()
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("HTTP client error: {}", e)))?;
let ollama_url = ai::get_ollama_url();
let model = ai::get_embedding_model();
// Get questions without embeddings
let questions: Vec<QuestionBank> = sqlx::query_as(
r#"
SELECT * FROM question_bank
WHERE organization_id = $1
AND (embedding IS NULL OR embedding_updated_at IS NULL)
ORDER BY created_at DESC
LIMIT 100
"#
)
.bind(org_ctx.id)
.fetch_all(&pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let total = questions.len();
let mut processed = 0;
let mut failed = 0;
for question in questions {
// Generate embedding text (combine question + options + explanation)
let mut embedding_text = question.question_text.clone();
if let Some(options) = &question.options {
if let Some(opts_str) = options.as_str() {
embedding_text.push_str(" ");
embedding_text.push_str(opts_str);
} else if let Some(opts_arr) = options.as_array() {
for opt in opts_arr {
if let Some(opt_str) = opt.as_str() {
embedding_text.push_str(" ");
embedding_text.push_str(opt_str);
}
}
}
}
if let Some(explanation) = &question.explanation {
embedding_text.push_str(" ");
embedding_text.push_str(explanation);
}
// Generate embedding
match generate_embedding(&client, &ollama_url, &model, &embedding_text).await {
Ok(response) => {
let pgvector = ai::embedding_to_pgvector(&response.embedding);
// Update question with embedding
let result: Result<(i64,), sqlx::Error> = sqlx::query_as(
r#"
UPDATE question_bank
SET embedding = $1::vector,
embedding_updated_at = NOW()
WHERE id = $2
RETURNING 1
"#
)
.bind(&pgvector)
.bind(question.id)
.fetch_one(&pool)
.await;
match result {
Ok(_) => {
processed += 1;
tracing::debug!("Generated embedding for question {}", question.id);
}
Err(e) => {
failed += 1;
tracing::error!("Failed to update embedding for question {}: {}", question.id, e);
}
}
}
Err(e) => {
tracing::error!("Failed to generate embedding for question {}: {}", question.id, e);
failed += 1;
}
}
}
let duration_ms = start.elapsed().as_millis() as u64;
tracing::info!(
"Generated embeddings: {} processed, {} failed in {}ms",
processed,
failed,
duration_ms
);
Ok(Json(GenerateEmbeddingsResult {
processed,
failed,
duration_ms,
}))
}
/// POST /api/question-bank/:id/embedding/regenerate - Regenerate embedding for a specific question
pub async fn regenerate_question_embedding(
Org(org_ctx): Org,
Path(question_id): Path<Uuid>,
State(pool): State<PgPool>,
) -> Result<StatusCode, (StatusCode, String)> {
// Create client that accepts invalid certificates
let client = reqwest::Client::builder()
.danger_accept_invalid_certs(true)
.danger_accept_invalid_hostnames(true)
.build()
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("HTTP client error: {}", e)))?;
let ollama_url = ai::get_ollama_url();
let model = ai::get_embedding_model();
// Get question
let question: QuestionBank = sqlx::query_as(
"SELECT * FROM question_bank WHERE id = $1 AND organization_id = $2"
)
.bind(question_id)
.bind(org_ctx.id)
.fetch_optional(&pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?
.ok_or((StatusCode::NOT_FOUND, "Question not found".to_string()))?;
// Generate embedding text
let mut embedding_text = question.question_text.clone();
if let Some(options) = &question.options {
if let Some(opts_str) = options.as_str() {
embedding_text.push_str(" ");
embedding_text.push_str(opts_str);
} else if let Some(opts_arr) = options.as_array() {
for opt in opts_arr {
if let Some(opt_str) = opt.as_str() {
embedding_text.push_str(" ");
embedding_text.push_str(opt_str);
}
}
}
}
if let Some(explanation) = &question.explanation {
embedding_text.push_str(" ");
embedding_text.push_str(explanation);
}
// Generate embedding
let response = generate_embedding(&client, &ollama_url, &model, &embedding_text)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("AI error: {}", e)))?;
let pgvector = ai::embedding_to_pgvector(&response.embedding);
// Update question
sqlx::query(
r#"
UPDATE question_bank
SET embedding = $1::vector,
embedding_updated_at = NOW()
WHERE id = $2
"#
)
.bind(&pgvector)
.bind(question_id)
.execute(&pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
Ok(StatusCode::OK)
}
// ==================== Semantic Search ====================
/// GET /api/question-bank/semantic-search - Search questions by semantic similarity
pub async fn semantic_search(
Org(org_ctx): Org,
State(pool): State<PgPool>,
Query(filters): Query<SemanticSearchFilters>,
) -> Result<Json<Vec<SemanticSearchResult>>, (StatusCode, String)> {
// Create client that accepts invalid certificates
let client = reqwest::Client::builder()
.danger_accept_invalid_certs(true)
.danger_accept_invalid_hostnames(true)
.build()
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("HTTP client error: {}", e)))?;
let ollama_url = ai::get_ollama_url();
let model = ai::get_embedding_model();
// Generate embedding for query
let embedding_response = generate_embedding(&client, &ollama_url, &model, &filters.query)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("AI error: {}", e)))?;
let pgvector = ai::embedding_to_pgvector(&embedding_response.embedding);
let limit = filters.limit.unwrap_or(20);
let threshold = filters.threshold.unwrap_or(0.5);
// Build query with optional filters
let mut query = String::from(
r#"
SELECT
id,
question_text,
question_type::text,
1 - (embedding <=> $1::vector) AS similarity,
tags,
difficulty,
points
FROM question_bank
WHERE organization_id = $2
AND embedding IS NOT NULL
AND 1 - (embedding <=> $1::vector) >= $3
"#
);
let mut param_idx = 3;
if let Some(ref question_type) = filters.question_type {
param_idx += 1;
query.push_str(&format!(" AND question_type::text = ${}", param_idx));
}
if let Some(ref difficulty) = filters.difficulty {
param_idx += 1;
query.push_str(&format!(" AND difficulty = ${}", param_idx));
}
param_idx += 1;
query.push_str(&format!(" ORDER BY embedding <=> $1::vector LIMIT ${}", param_idx));
let mut sql_query = sqlx::query_as::<_, SemanticSearchResult>(&query)
.bind(&pgvector)
.bind(org_ctx.id)
.bind(threshold);
if let Some(ref question_type) = filters.question_type {
sql_query = sql_query.bind(question_type);
}
if let Some(ref difficulty) = filters.difficulty {
sql_query = sql_query.bind(difficulty);
}
sql_query = sql_query.bind(limit);
let results = sql_query
.fetch_all(&pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
Ok(Json(results))
}
/// GET /api/question-bank/similar/:id - Find questions similar to a given question
pub async fn find_similar_questions(
Org(org_ctx): Org,
Path(question_id): Path<Uuid>,
Query(params): Query<SimilarityParams>,
State(pool): State<PgPool>,
) -> Result<Json<Vec<SemanticSearchResult>>, (StatusCode, String)> {
let threshold = params.threshold.unwrap_or(0.85);
let limit = params.limit.unwrap_or(10);
let results = sqlx::query_as::<_, SemanticSearchResult>(
r#"
SELECT
id,
question_text,
question_type::text,
1 - (embedding <=> (SELECT embedding FROM question_bank WHERE id = $1)) AS similarity,
tags,
difficulty,
points
FROM question_bank
WHERE id != $1
AND organization_id = $2
AND embedding IS NOT NULL
ORDER BY embedding <=> (SELECT embedding FROM question_bank WHERE id = $1)
LIMIT $3
"#
)
.bind(question_id)
.bind(org_ctx.id)
.bind(limit)
.fetch_all(&pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?
.into_iter()
.filter(|r| r.similarity >= threshold)
.collect();
Ok(Json(results))
}
#[derive(Debug, Deserialize)]
pub struct SimilarityParams {
pub threshold: Option<f64>,
pub limit: Option<i32>,
}
@@ -12,6 +12,142 @@ use serde::{Deserialize, Serialize};
use sqlx::PgPool;
use uuid::Uuid;
// ==================== MySQL Study Plans & Courses ====================
#[derive(Debug, sqlx::FromRow, Serialize, Deserialize)]
pub struct MySqlStudyPlan {
pub id: i32,
pub mysql_id: i32,
pub organization_id: Uuid,
pub name: String,
pub course_type: String,
pub is_active: bool,
pub created_at: chrono::DateTime<chrono::Utc>,
pub updated_at: chrono::DateTime<chrono::Utc>,
}
#[derive(Debug, sqlx::FromRow, Serialize, Deserialize)]
pub struct MySqlCourse {
pub id: i32,
pub mysql_id: i32,
pub organization_id: Uuid,
pub study_plan_id: i32,
pub name: String,
pub level: Option<i32>,
pub course_type: String,
pub level_calculated: Option<String>,
pub is_active: bool,
pub created_at: chrono::DateTime<chrono::Utc>,
pub updated_at: chrono::DateTime<chrono::Utc>,
}
/// Save or update study plans and courses from MySQL during import
pub async fn save_mysql_courses_and_plans(
pool: &PgPool,
org_id: Uuid,
plans: Vec<MySqlPlanInfo>,
courses: Vec<MySqlCourseInfo>,
) -> Result<(), String> {
// Save study plans first
for plan in plans {
let course_type = calculate_course_type(&plan.nombre_plan);
sqlx::query(
r#"
INSERT INTO mysql_study_plans (mysql_id, organization_id, name, course_type)
VALUES ($1, $2, $3, $4)
ON CONFLICT (mysql_id) DO UPDATE SET
name = EXCLUDED.name,
course_type = EXCLUDED.course_type,
updated_at = NOW()
"#
)
.bind(plan.id_plan_de_estudios)
.bind(org_id)
.bind(&plan.nombre_plan)
.bind(&course_type)
.execute(pool)
.await
.map_err(|e| format!("Failed to save study plan: {}", e))?;
}
// Save courses
for course in courses {
// Determine course_type from duration (40h = regular, 80h = intensive)
let course_type = calculate_course_type_from_duration(course.duracion);
let level_calculated = calculate_course_level(course.nivel_curso);
// Get study_plan_id from mysql_study_plans
let study_plan_id: i32 = sqlx::query_scalar(
"SELECT id FROM mysql_study_plans WHERE mysql_id = $1 AND organization_id = $2"
)
.bind(course.id_plan_de_estudios)
.bind(org_id)
.fetch_one(pool)
.await
.map_err(|e| format!("Failed to find study plan: {}", e))?;
sqlx::query(
r#"
INSERT INTO mysql_courses (
mysql_id, organization_id, study_plan_id, name, level, duracion,
course_type, level_calculated
)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
ON CONFLICT (mysql_id) DO UPDATE SET
name = EXCLUDED.name,
level = EXCLUDED.level,
duracion = EXCLUDED.duracion,
course_type = EXCLUDED.course_type,
level_calculated = EXCLUDED.level_calculated,
updated_at = NOW()
"#
)
.bind(course.id_cursos)
.bind(org_id)
.bind(study_plan_id)
.bind(&course.nombre_curso)
.bind(course.nivel_curso)
.bind(course.duracion)
.bind(&course_type)
.bind(&level_calculated)
.execute(pool)
.await
.map_err(|e| format!("Failed to save course: {}", e))?;
}
Ok(())
}
fn calculate_course_type(plan_name: &str) -> String {
let plan_lower = plan_name.to_lowercase();
if plan_lower.contains("intensive") || plan_lower.contains("intensivo") {
"intensive".to_string()
} else {
"regular".to_string()
}
}
fn calculate_course_type_from_duration(duracion: Option<i32>) -> String {
match duracion {
Some(d) if d >= 70 => "intensive".to_string(), // 80h or more = intensive
_ => "regular".to_string(), // 40h or less = regular
}
}
fn calculate_course_level(nivel: Option<i32>) -> String {
match nivel {
None => "intermediate".to_string(),
Some(n) if n <= 2 => "beginner".to_string(),
Some(n) if n <= 4 => "beginner_1".to_string(),
Some(n) if n <= 6 => "beginner_2".to_string(),
Some(n) if n <= 8 => "intermediate".to_string(),
Some(n) if n <= 10 => "intermediate_1".to_string(),
Some(n) if n <= 12 => "intermediate_2".to_string(),
Some(_) => "advanced".to_string(),
}
}
// ==================== Create ====================
/// POST /api/question-bank - Create a new question in the bank
@@ -239,7 +375,47 @@ pub async fn import_from_mysql(
let mysql_pool = sqlx::MySqlPool::connect(&mysql_url)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to connect to MySQL: {}", e)))?;
// Fetch all study plans and courses from MySQL to sync them
let mysql_plans: Vec<MySqlPlanInfo> = sqlx::query_as(
r#"
SELECT DISTINCT
pe.idPlanDeEstudios AS id_plan_de_estudios,
pe.Nombre AS nombre_plan
FROM plandeestudios pe
WHERE pe.Activo = 1
ORDER BY pe.Nombre
"#
)
.fetch_all(&mysql_pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to fetch plans: {}", e)))?;
let mysql_courses: Vec<MySqlCourseInfo> = sqlx::query_as(
r#"
SELECT DISTINCT
c.idCursos AS id_cursos,
c.NombreCurso AS nombre_curso,
c.NivelCurso AS nivel_curso,
pe.idPlanDeEstudios AS id_plan_de_estudios,
pe.Nombre AS nombre_plan,
CAST(c.Duracion AS SIGNED INTEGER) AS duracion
FROM curso c
JOIN plandeestudios pe ON c.idPlanDeEstudios = pe.idPlanDeEstudios
WHERE c.Activo = 1
AND pe.Activo = 1
ORDER BY pe.Nombre, c.NivelCurso
"#
)
.fetch_all(&mysql_pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to fetch courses: {}", e)))?;
// Save plans and courses to PostgreSQL
save_mysql_courses_and_plans(&pool, org_ctx.id, mysql_plans, mysql_courses)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to save courses/plans: {}", e)))?;
// Fetch questions from MySQL
let mysql_questions: Vec<MySqlQuestion> = if payload.import_all.unwrap_or(false) {
sqlx::query_as(
@@ -250,6 +426,8 @@ pub async fn import_from_mysql(
JOIN curso c ON bp.idCursos = c.idCursos
JOIN plandeestudios pe ON bp.idPlanDeEstudios = pe.idPlanDeEstudios
WHERE bp.activo = 1
AND c.Activo = 1
AND pe.Activo = 1
LIMIT 200
"#
)
@@ -265,6 +443,8 @@ pub async fn import_from_mysql(
JOIN curso c ON bp.idCursos = c.idCursos
JOIN plandeestudios pe ON bp.idPlanDeEstudios = pe.idPlanDeEstudios
WHERE bp.idCursos = ? AND bp.activo = 1
AND c.Activo = 1
AND pe.Activo = 1
LIMIT 100
"#
)
@@ -285,6 +465,8 @@ pub async fn import_from_mysql(
JOIN curso c ON bp.idCursos = c.idCursos
JOIN plandeestudios pe ON bp.idPlanDeEstudios = pe.idPlanDeEstudios
WHERE bp.idPregunta = ? AND bp.activo = 1
AND c.Activo = 1
AND pe.Activo = 1
"#
)
.bind(q_id)
@@ -555,16 +737,18 @@ pub async fn list_mysql_courses(
// Fetch courses with their plan names
let courses: Vec<MySqlCourseInfo> = sqlx::query_as(
r#"
SELECT DISTINCT
c.idCursos,
c.NombreCurso,
c.NivelCurso,
pe.idPlanDeEstudios,
pe.Nombre as NombrePlan
SELECT DISTINCT
c.idCursos AS id_cursos,
c.NombreCurso AS nombre_curso,
c.NivelCurso AS nivel_curso,
pe.idPlanDeEstudios AS id_plan_de_estudios,
pe.Nombre AS nombre_plan,
CAST(c.Duracion AS SIGNED INTEGER) AS duracion
FROM curso c
JOIN plandeestudios pe ON c.idPlanDeEstudios = pe.idPlanDeEstudios
WHERE c.Activo = 1
ORDER BY pe.Nombre, c.NombreCurso
AND pe.Activo = 1
ORDER BY pe.Nombre, c.NivelCurso
"#
)
.fetch_all(&mysql_pool)
@@ -576,6 +760,78 @@ pub async fn list_mysql_courses(
Ok(Json(courses))
}
/// GET /api/question-bank/mysql-plans - Get all study plans from PostgreSQL (imported from MySQL)
pub async fn get_mysql_plans(
Org(org_ctx): Org,
State(pool): State<PgPool>,
) -> Result<Json<Vec<MySqlPlanInfo>>, (StatusCode, String)> {
// Fetch all study plans from PostgreSQL
let plans: Vec<MySqlPlanInfo> = sqlx::query_as(
r#"
SELECT
mysql_id as "idPlanDeEstudios",
name as "NombrePlan"
FROM mysql_study_plans
WHERE organization_id = $1 AND is_active = true
ORDER BY name
"#
)
.bind(org_ctx.id)
.fetch_all(&pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to fetch plans: {}", e)))?;
Ok(Json(plans))
}
/// GET /api/question-bank/mysql-courses - Get courses filtered by plan from PostgreSQL
pub async fn get_mysql_courses_by_plan(
Org(org_ctx): Org,
State(pool): State<PgPool>,
Query(filters): Query<MySqlCoursesFilters>,
) -> Result<Json<Vec<MySqlCourseInfo>>, (StatusCode, String)> {
// Fetch courses filtered by plan from PostgreSQL
let courses: Vec<MySqlCourseInfo> = sqlx::query_as(
r#"
SELECT
c.mysql_id as "idCursos",
c.name as "NombreCurso",
c.level as "NivelCurso",
sp.mysql_id as "idPlanDeEstudios",
sp.name as "NombrePlan",
c.duracion as "Duracion"
FROM mysql_courses c
JOIN mysql_study_plans sp ON c.study_plan_id = sp.id
WHERE c.organization_id = $1
AND c.is_active = true
AND sp.mysql_id = $2
ORDER BY c.level
"#
)
.bind(org_ctx.id)
.bind(filters.plan_id)
.fetch_all(&pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to fetch courses: {}", e)))?;
Ok(Json(courses))
}
#[derive(Debug, Deserialize)]
pub struct MySqlCoursesFilters {
pub plan_id: i32,
}
#[derive(Debug, sqlx::FromRow, Serialize)]
pub struct MySqlPlanInfo {
#[sqlx(rename = "idPlanDeEstudios")]
#[serde(rename = "idPlanDeEstudios")]
pub id_plan_de_estudios: i32,
#[sqlx(rename = "NombrePlan")]
#[serde(rename = "NombrePlan")]
pub nombre_plan: String,
}
/// POST /api/question-bank/import-mysql-all - Import ALL questions from MySQL (bulk import)
pub async fn import_all_from_mysql(
Org(org_ctx): Org,
@@ -623,6 +879,8 @@ pub async fn import_all_from_mysql(
JOIN plandeestudios pe ON bp.idPlanDeEstudios = pe.idPlanDeEstudios
JOIN tipopregunta tp ON bp.idTipoPregunta = tp.idTipoPregunta
WHERE bp.activo = 1
AND pe.Activo = 1
AND c.Activo = 1
ORDER BY pe.Nombre, c.NombreCurso, bp.idPregunta
LIMIT 500
"#
@@ -754,11 +1012,24 @@ pub struct ImportResult {
#[derive(Debug, sqlx::FromRow, Serialize, Deserialize)]
pub struct MySqlCourseInfo {
#[sqlx(rename = "idCursos")]
#[serde(rename = "idCursos")]
pub id_cursos: i32,
#[sqlx(rename = "NombreCurso")]
#[serde(rename = "NombreCurso")]
pub nombre_curso: String,
#[sqlx(rename = "NivelCurso")]
#[serde(rename = "NivelCurso", skip_serializing_if = "Option::is_none")]
pub nivel_curso: Option<i32>,
#[sqlx(rename = "idPlanDeEstudios")]
#[serde(rename = "idPlanDeEstudios")]
pub id_plan_de_estudios: i32,
#[sqlx(rename = "NombrePlan")]
#[serde(rename = "NombrePlan")]
pub nombre_plan: String,
#[sqlx(rename = "Duracion")]
#[serde(rename = "Duracion", skip_serializing_if = "Option::is_none")]
pub duracion: Option<i32>, // Duration in hours (40=regular, 80=intensive)
}
// Excel import - pendiente de fix
@@ -17,6 +17,7 @@ use uuid::Uuid;
#[derive(Debug, Deserialize)]
pub struct TestTemplateFilters {
pub mysql_course_id: Option<i32>, // Filter by MySQL course ID
pub level: Option<CourseLevel>,
pub course_type: Option<CourseType>,
pub test_type: Option<TestType>,
@@ -36,12 +37,12 @@ pub async fn create_test_template(
let template: TestTemplate = sqlx::query_as(
r#"
INSERT INTO test_templates (
organization_id, created_by, name, description, level, course_type,
test_type, duration_minutes, passing_score, total_points,
organization_id, created_by, name, description, mysql_course_id,
level, course_type, test_type, duration_minutes, passing_score, total_points,
instructions, template_data, tags
)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)
RETURNING id, organization_id, created_by, name, description, level, course_type,
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)
RETURNING id, organization_id, mysql_course_id, name, description, level, course_type,
test_type, duration_minutes, passing_score, total_points, instructions,
template_data, tags, is_active, usage_count, created_at, updated_at
"#
@@ -50,8 +51,9 @@ pub async fn create_test_template(
.bind(claims.sub)
.bind(&payload.name)
.bind(&payload.description)
.bind(&payload.level)
.bind(&payload.course_type)
.bind(payload.mysql_course_id)
.bind(payload.level.as_ref())
.bind(payload.course_type.as_ref())
.bind(&payload.test_type)
.bind(payload.duration_minutes)
.bind(payload.passing_score)
@@ -78,6 +80,12 @@ pub async fn list_test_templates(
let mut query = String::from("SELECT * FROM test_templates WHERE organization_id = $1");
let mut param_count = 1;
// Filter by mysql_course_id
if filters.mysql_course_id.is_some() {
param_count += 1;
query.push_str(&format!(" AND mysql_course_id = ${}", param_count));
}
// Filter by level
if filters.level.is_some() {
param_count += 1;
@@ -116,6 +124,10 @@ pub async fn list_test_templates(
// Build query with dynamic binds
let mut sql_query = sqlx::query_as::<_, TestTemplate>(&query).bind(org_ctx.id);
if let Some(mysql_course_id) = &filters.mysql_course_id {
sql_query = sql_query.bind(mysql_course_id);
}
if let Some(level) = &filters.level {
sql_query = sql_query.bind(level);
}
@@ -220,22 +232,23 @@ pub async fn update_test_template(
let template: TestTemplate = sqlx::query_as(
r#"
UPDATE test_templates
SET
SET
name = COALESCE($3, name),
description = COALESCE($4, description),
level = COALESCE($5, level),
course_type = COALESCE($6, course_type),
test_type = COALESCE($7, test_type),
duration_minutes = COALESCE($8, duration_minutes),
passing_score = COALESCE($9, passing_score),
total_points = COALESCE($10, total_points),
instructions = COALESCE($11, instructions),
template_data = COALESCE($12, template_data),
tags = COALESCE($13, tags),
is_active = COALESCE($14, is_active),
mysql_course_id = COALESCE($5, mysql_course_id),
level = COALESCE($6, level),
course_type = COALESCE($7, course_type),
test_type = COALESCE($8, test_type),
duration_minutes = COALESCE($9, duration_minutes),
passing_score = COALESCE($10, passing_score),
total_points = COALESCE($11, total_points),
instructions = COALESCE($12, instructions),
template_data = COALESCE($13, template_data),
tags = COALESCE($14, tags),
is_active = COALESCE($15, is_active),
updated_at = NOW()
WHERE id = $1 AND organization_id = $2
RETURNING id, organization_id, created_by, name, description, level, course_type,
RETURNING id, organization_id, mysql_course_id, name, description, level, course_type,
test_type, duration_minutes, passing_score, total_points, instructions,
template_data, tags, is_active, usage_count, created_at, updated_at
"#
@@ -244,6 +257,7 @@ pub async fn update_test_template(
.bind(org_ctx.id)
.bind(payload.name)
.bind(payload.description)
.bind(payload.mysql_course_id)
.bind(payload.level)
.bind(payload.course_type)
.bind(payload.test_type)
@@ -615,70 +629,186 @@ pub struct ApplyTemplatePayload {
// ==================== RAG Question Generation ====================
/// POST /test-templates/generate-with-rag - Generate questions using RAG from MySQL question bank
/// POST /test-templates/generate-with-rag - Generate questions using RAG from imported MySQL question bank
/// Uses semantic search with pgvector embeddings when available, falls back to course_id filtering
pub async fn generate_questions_with_rag(
Org(org_ctx): Org,
claims: Claims,
State(pool): State<PgPool>,
Json(payload): Json<RagGenerationPayload>,
) -> Result<Json<Vec<TestTemplateQuestion>>, (StatusCode, String)> {
use common::ai::{self, generate_embedding};
use reqwest::Client;
use serde_json::json;
// 1. Fetch questions from external MySQL database (RAG context)
let mysql_url = std::env::var("MYSQL_DATABASE_URL")
.map_err(|_| (StatusCode::INTERNAL_SERVER_ERROR, "MYSQL_DATABASE_URL not configured".to_string()))?;
// Create MySQL pool connection
let mysql_pool = sqlx::MySqlPool::connect(&mysql_url)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to connect to MySQL: {}", e)))?;
// Fetch questions from MySQL bank filtered by course if provided
let mysql_questions: Vec<MySqlQuestion> = if let Some(course_id) = payload.course_id {
sqlx::query_as(
r#"
SELECT
bp.descripcion,
bp.idTipoPregunta AS id_tipo_pregunta,
c.NombreCurso AS nombre_curso,
pe.Nombre as plan_nombre
FROM bancopreguntas bp
JOIN curso c ON bp.idCursos = c.idCursos
JOIN plandeestudios pe ON bp.idPlanDeEstudios = pe.idPlanDeEstudios
WHERE bp.idCursos = ? AND bp.activo = 1
LIMIT 20
"#
)
.bind(course_id)
.fetch_all(&mysql_pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to fetch questions: {}", e)))?
} else {
sqlx::query_as(
r#"
SELECT
bp.descripcion,
bp.idTipoPregunta AS id_tipo_pregunta,
c.NombreCurso AS nombre_curso,
pe.Nombre as plan_nombre
FROM bancopreguntas bp
JOIN curso c ON bp.idCursos = c.idCursos
JOIN plandeestudios pe ON bp.idPlanDeEstudios = pe.idPlanDeEstudios
WHERE bp.activo = 1
LIMIT 20
"#
)
.fetch_all(&mysql_pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to fetch questions: {}", e)))?
};
mysql_pool.close().await;
let mut mysql_questions: Vec<QuestionBankForRAG> = Vec::new();
// If topic is provided, use semantic search; otherwise use course_id filtering
if let Some(topic) = &payload.topic {
// Try semantic search with embeddings
// Create client that accepts invalid certificates (for dev with self-signed certs)
let client = reqwest::Client::builder()
.danger_accept_invalid_certs(true)
.danger_accept_invalid_hostnames(true)
.build()
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("HTTP client error: {}", e)))?;
let ollama_url = ai::get_ollama_url();
let model = ai::get_embedding_model();
match generate_embedding(&client, &ollama_url, &model, topic).await {
Ok(response) => {
let pgvector = ai::embedding_to_pgvector(&response.embedding);
// Semantic search in question_bank
mysql_questions = sqlx::query_as(
r#"
SELECT
qb.question_text as descripcion,
qb.options,
COALESCE(
(qb.source_metadata->>'idPlanDeEstudios')::integer,
0
) as id_plan_de_estudios,
COALESCE(
qb.source_metadata->>'plan_nombre',
''
) as plan_nombre,
COALESCE(
(qb.source_metadata->>'nivel_curso')::integer,
NULL
) as nivel_curso,
1 - (qb.embedding <=> $1::vector) AS similarity
FROM question_bank qb
WHERE qb.organization_id = $2
AND qb.embedding IS NOT NULL
ORDER BY qb.embedding <=> $1::vector
LIMIT $3
"#
)
.bind(&pgvector)
.bind(org_ctx.id)
.bind(payload.num_questions.unwrap_or(5) * 3) // Get more for diversity
.fetch_all(&pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Semantic search failed: {}", e)))?;
tracing::info!("Semantic search found {} similar questions", mysql_questions.len());
}
Err(e) => {
tracing::warn!("Semantic search failed, falling back to keyword search: {}", e);
// Fall back to text search
mysql_questions = sqlx::query_as(
r#"
SELECT
qb.question_text as descripcion,
qb.options,
COALESCE(
(qb.source_metadata->>'idPlanDeEstudios')::integer,
0
) as id_plan_de_estudios,
COALESCE(
qb.source_metadata->>'plan_nombre',
''
) as plan_nombre,
COALESCE(
(qb.source_metadata->>'nivel_curso')::integer,
NULL
) as nivel_curso
FROM question_bank qb
WHERE qb.organization_id = $1
AND qb.question_text ILIKE $2
LIMIT $3
"#
)
.bind(org_ctx.id)
.bind(&format!("%{}%", topic))
.bind(payload.num_questions.unwrap_or(5) * 3)
.fetch_all(&pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Keyword search failed: {}", e)))?;
}
}
} else if let Some(course_id) = payload.course_id {
// Fetch questions from imported MySQL questions in PostgreSQL question_bank
// Filter by course_id if provided (mysql_course_id from imported metadata)
mysql_questions = sqlx::query_as(
r#"
SELECT
qb.question_text as descripcion,
qb.options,
COALESCE(
(qb.source_metadata->>'idPlanDeEstudios')::integer,
0
) as id_plan_de_estudios,
COALESCE(
qb.source_metadata->>'plan_nombre',
''
) as plan_nombre,
COALESCE(
(qb.source_metadata->>'nivel_curso')::integer,
NULL
) as nivel_curso
FROM question_bank qb
WHERE qb.organization_id = $1
AND qb.source = 'imported-mysql'
AND (qb.source_metadata->>'idCursos')::integer = $2
LIMIT 20
"#
)
.bind(org_ctx.id)
.bind(course_id)
.fetch_all(&pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to fetch questions: {}", e)))?;
} else {
// Fetch all imported MySQL questions for this organization
mysql_questions = sqlx::query_as(
r#"
SELECT
qb.question_text as descripcion,
qb.options,
COALESCE(
(qb.source_metadata->>'idPlanDeEstudios')::integer,
0
) as id_plan_de_estudios,
COALESCE(
qb.source_metadata->>'plan_nombre',
''
) as plan_nombre,
COALESCE(
(qb.source_metadata->>'nivel_curso')::integer,
NULL
) as nivel_curso
FROM question_bank qb
WHERE qb.organization_id = $1
AND qb.source = 'imported-mysql'
LIMIT 20
"#
)
.bind(org_ctx.id)
.fetch_all(&pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to fetch questions: {}", e)))?;
}
if mysql_questions.is_empty() && payload.course_id.is_some() {
return Err((StatusCode::NOT_FOUND, "No questions found in MySQL bank for this course".to_string()));
return Err((StatusCode::NOT_FOUND, "No questions found in imported question bank for this course. Please import questions from MySQL first.".to_string()));
}
// Determine course_type and level from imported data
let course_type = mysql_questions
.first()
.map(|q| get_course_type_from_plan(&q.plan_nombre))
.unwrap_or(CourseType::Regular);
let level = mysql_questions
.first()
.map(|q| get_course_level_from_mysql(q.nivel_curso, &q.plan_nombre, ""))
.unwrap_or(CourseLevel::Intermediate);
tracing::info!("Determined course_type: {:?}, level: {:?} from imported data", course_type, level);
// 2. Build RAG context from MySQL questions (lightweight format)
let rag_context: String = mysql_questions
.iter()
@@ -715,19 +845,25 @@ pub async fn generate_questions_with_rag(
Create {} ORIGINAL multiple-choice questions about: {}
Return ONLY a JSON array with this structure:
IMPORTANT - Return ONLY a JSON array with this EXACT structure:
[
{{
"question_text": "Question text",
"question_text": "The tourist got lost in the ______ of the city.",
"question_type": "multiple-choice",
"options": ["A", "B", "C", "D"],
"options": ["downtown", "countryside", "mountains", "desert"],
"correct_answer": 0,
"explanation": "Why this is correct",
"explanation": "Downtown is the main area of a city where tourists typically visit.",
"points": 1,
"skill_assessed": "reading"
}}
]
RULES FOR OPTIONS:
- Each option must be ONLY the answer text (1-3 words max)
- Do NOT include letters like "A.", "B.", "a)", "b)"
- Do NOT include "Option 1:", "Answer:", or any prefix
- Just the pure answer text (e.g., "downtown", "Paris", "True")
Skills: reading, listening, speaking, writing. Distribute across all 4."#,
rag_context,
num_questions,
@@ -777,21 +913,118 @@ pub async fn generate_questions_with_rag(
.and_then(|content| content.as_str())
.and_then(|content| serde_json::from_str::<serde_json::Value>(content).ok())
.and_then(|data| {
if let Some(questions) = data.get("questions").or(data.get("items")) {
questions.as_array().cloned()
} else if let Some(arr) = data.as_array() {
Some(arr.clone())
} else {
None
// Try multiple formats:
// 1. Standard array format: [...]
if let Some(arr) = data.as_array() {
return Some(arr.clone());
}
// 2. Wrapped format: {questions: [...]} or {items: [...]}
if let Some(questions) = data.get("questions").or(data.get("items")) {
return questions.as_array().cloned();
}
// 3. Object format with numbered keys: {q1: {...}, q2: {...}, ...}
if let Some(obj) = data.as_object() {
let questions: Vec<serde_json::Value> = obj.values().cloned().collect();
if !questions.is_empty() {
return Some(questions);
}
}
None
})
.unwrap_or_default();
// Helper function to clean options (remove "A.", "B.", "a)", etc.)
let clean_option = |opt: &str| -> String {
let opt = opt.trim();
// Remove patterns like "A.", "B.", "a)", "b)", "1.", "1)", "A)", "B)"
let patterns = [
(r"^[A-Za-z]\.\s*", ""), // "A. ", "B. "
(r"^[A-Za-z]\)\s*", ""), // "A) ", "B) "
(r"^\d+\.\s*", ""), // "1. ", "2. "
(r"^\d+\)\s*", ""), // "1) ", "2) "
(r"^Option\s+[A-Za-z]\.?\s*", ""), // "Option A. ", "Option B "
(r"^Answer\s*[:\.]?\s*", ""), // "Answer: ", "Answer. "
];
let mut cleaned = opt.to_string();
for (pattern, replacement) in patterns.iter() {
if let Ok(re) = regex::Regex::new(pattern) {
cleaned = re.replace(&cleaned, *replacement).to_string();
}
}
cleaned.trim().to_string()
};
// Helper function to shuffle options and adjust correct_answer index
let shuffle_options = |options: Vec<String>, correct_answer: Option<i64>| -> (Vec<String>, Option<i64>) {
use rand::seq::SliceRandom;
use rand::thread_rng;
if options.is_empty() || correct_answer.is_none() {
return (options, correct_answer);
}
let correct_idx = correct_answer.unwrap() as usize;
if correct_idx >= options.len() {
return (options, correct_answer);
}
// Store the correct answer text
let correct_answer_text = options[correct_idx].clone();
// Create a vector of indices and shuffle it
let mut indices: Vec<usize> = (0..options.len()).collect();
let mut rng = thread_rng();
indices.shuffle(&mut rng);
// Reorder options according to shuffled indices
let shuffled_options: Vec<String> = indices.iter().map(|&i| options[i].clone()).collect();
// Find the new position of the correct answer
let new_correct_idx = shuffled_options
.iter()
.position(|opt| opt == &correct_answer_text)
.map(|idx| idx as i64);
(shuffled_options, new_correct_idx)
};
// Convert to TestTemplateQuestion format
let generated_questions: Vec<TestTemplateQuestion> = questions_data
.iter()
.enumerate()
.map(|(idx, q)| {
// Get original options and correct answer
let original_options: Vec<String> = q
.get("options")
.and_then(|v| v.as_array())
.map(|arr| {
arr.iter()
.filter_map(|v| v.as_str())
.map(|s| clean_option(s))
.collect()
})
.unwrap_or_default();
let original_correct_idx: Option<usize> = q
.get("correct_answer")
.or(q.get("correct"))
.and_then(|v| v.as_i64())
.map(|idx| idx as usize);
// Shuffle options if we have valid data
let (options, correct_answer) = if !original_options.is_empty() && original_correct_idx.is_some() {
let correct_idx = original_correct_idx.unwrap();
if correct_idx < original_options.len() {
let (shuffled, new_correct_idx) = shuffle_options(original_options.clone(), Some(correct_idx as i64));
(Some(json!(shuffled)), new_correct_idx.map(|idx| json!(idx)))
} else {
(Some(json!(original_options)), q.get("correct_answer").or(q.get("correct")).cloned())
}
} else {
(Some(json!(original_options)), q.get("correct_answer").or(q.get("correct")).cloned())
};
TestTemplateQuestion {
id: Uuid::new_v4(),
template_id: Uuid::nil(),
@@ -799,14 +1032,15 @@ pub async fn generate_questions_with_rag(
question_order: idx as i32,
question_type: q.get("question_type").and_then(|v| v.as_str()).unwrap_or("multiple-choice").to_string(),
question_text: q.get("question_text").and_then(|v| v.as_str()).unwrap_or("Question").to_string(),
options: q.get("options").cloned(),
correct_answer: q.get("correct_answer").or(q.get("correct")).cloned(),
options,
correct_answer,
explanation: q.get("explanation").and_then(|v| v.as_str()).map(String::from),
points: q.get("points").and_then(|v| v.as_i64()).unwrap_or(1) as i32,
metadata: Some(json!({
"generated_by": "rag-ai",
"source": "mysql-bank",
"generated_at": chrono::Utc::now().to_rfc3339(),
"options_shuffled": true,
})),
created_at: chrono::Utc::now(),
}
@@ -874,15 +1108,64 @@ pub async fn generate_questions_with_rag(
#[derive(Debug, Deserialize)]
pub struct RagGenerationPayload {
pub course_id: Option<i32>, // MySQL course ID
pub course_id: Option<i32>, // MySQL course ID from imported metadata
pub topic: Option<String>,
pub num_questions: Option<i32>,
}
#[derive(Debug, sqlx::FromRow)]
struct QuestionBankForRAG {
descripcion: String,
options: Option<serde_json::Value>,
id_plan_de_estudios: i32,
plan_nombre: String,
nivel_curso: Option<i32>,
#[sqlx(default)]
similarity: Option<f32>,
}
#[derive(Debug, sqlx::FromRow)]
struct MySqlQuestion {
descripcion: String,
id_tipo_pregunta: i32,
nombre_curso: String,
plan_nombre: String,
nivel_curso: Option<i32>,
id_plan_de_estudios: i32,
}
/// Helper function to determine course type from plan name
fn get_course_type_from_plan(plan_name: &str) -> CourseType {
let plan_lower = plan_name.to_lowercase();
if plan_lower.contains("intensive") || plan_lower.contains("intensivo") {
CourseType::Intensive
} else {
CourseType::Regular
}
}
/// Helper function to determine course level from MySQL data
fn get_course_level_from_mysql(nivel_curso: Option<i32>, plan_nombre: &str, _nombre_curso: &str) -> CourseLevel {
// Try to determine level from nivel_curso field first
if let Some(nivel) = nivel_curso {
return match nivel {
1..=2 => CourseLevel::Beginner,
3..=4 => CourseLevel::Beginner_1,
5..=6 => CourseLevel::Beginner_2,
7..=8 => CourseLevel::Intermediate,
9..=10 => CourseLevel::Intermediate_1,
11..=12 => CourseLevel::Intermediate_2,
_ => CourseLevel::Advanced,
};
}
// Fallback: try to extract level from plan name
let plan_lower = plan_nombre.to_lowercase();
if plan_lower.contains("basic") || plan_lower.contains("beginner") {
CourseLevel::Beginner
} else if plan_lower.contains("intermediate") || plan_lower.contains("intermedio") {
CourseLevel::Intermediate
} else {
CourseLevel::Advanced
}
}
+23 -6
View File
@@ -10,6 +10,7 @@ mod handlers_rubrics;
mod handlers_test_templates;
mod handlers_question_bank;
mod handlers_admin;
mod handlers_embeddings;
mod webhooks;
use axum::{
@@ -343,9 +344,13 @@ async fn main() {
"/question-bank/import-mysql",
post(handlers_question_bank::import_from_mysql),
)
.route(
"/question-bank/mysql-plans",
get(handlers_question_bank::get_mysql_plans),
)
.route(
"/question-bank/mysql-courses",
get(handlers_question_bank::list_mysql_courses),
get(handlers_question_bank::get_mysql_courses_by_plan),
)
.route(
"/question-bank/import-mysql-all",
@@ -355,11 +360,23 @@ async fn main() {
"/question-bank/ai-generate",
post(handlers_question_bank::ai_generate_question),
)
// Excel import - pendiente de fix
// .route(
// "/question-bank/import-excel",
// post(handlers_question_bank::import_from_excel),
// )
// Embedding routes for semantic search
.route(
"/question-bank/embeddings/generate",
post(handlers_embeddings::generate_question_embeddings),
)
.route(
"/question-bank/semantic-search",
get(handlers_embeddings::semantic_search),
)
.route(
"/question-bank/similar/{id}",
get(handlers_embeddings::find_similar_questions),
)
.route(
"/question-bank/{id}/embedding/regenerate",
post(handlers_embeddings::regenerate_question_embedding),
)
// Admin routes
.route(
"/admin/token-usage",
@@ -0,0 +1,135 @@
-- PGVector Embeddings for Knowledge Base (LMS)
-- Enables semantic search for AI tutor chat with RAG
-- Enable pgvector extension (should already be enabled from CMS)
CREATE EXTENSION IF NOT EXISTS vector;
-- Add embedding column to knowledge_base table
-- Using 768 dimensions for nomic-embed-text model
ALTER TABLE knowledge_base
ADD COLUMN IF NOT EXISTS embedding vector(768);
-- Add embedding_updated_at timestamp
ALTER TABLE knowledge_base
ADD COLUMN IF NOT EXISTS embedding_updated_at TIMESTAMPTZ;
-- Create index for fast semantic search (IVFFlat for >10k rows)
-- Adjust lists parameter based on expected data size:
-- lists = rows / 1000 for < 1M rows
CREATE INDEX IF NOT EXISTS idx_knowledge_base_embeddings
ON knowledge_base
USING ivfflat (embedding vector_cosine_ops)
WITH (lists = 100);
-- Create index for filtering by embedding status
CREATE INDEX IF NOT EXISTS idx_knowledge_base_embedding_updated
ON knowledge_base (embedding_updated_at);
-- Function to search knowledge base by semantic similarity
CREATE OR REPLACE FUNCTION search_knowledge_semantic(
p_course_id UUID,
p_query_embedding vector(768),
p_limit INTEGER DEFAULT 10,
p_threshold REAL DEFAULT 0.5
)
RETURNS TABLE (
id UUID,
course_id UUID,
lesson_id UUID,
block_id UUID,
content_chunk TEXT,
similarity REAL,
metadata JSONB
) AS $$
BEGIN
RETURN QUERY
SELECT
kb.id,
kb.course_id,
kb.lesson_id,
kb.block_id,
kb.content_chunk,
1 - (kb.embedding <=> p_query_embedding) AS similarity,
kb.metadata
FROM knowledge_base kb
WHERE kb.course_id = p_course_id
AND kb.embedding IS NOT NULL
AND 1 - (kb.embedding <=> p_query_embedding) >= p_threshold
ORDER BY kb.embedding <=> p_query_embedding
LIMIT p_limit;
END;
$$ LANGUAGE plpgsql STABLE;
-- Function to search knowledge base across all courses (for admin/global search)
CREATE OR REPLACE FUNCTION search_knowledge_global(
p_query_embedding vector(768),
p_limit INTEGER DEFAULT 20,
p_threshold REAL DEFAULT 0.6
)
RETURNS TABLE (
id UUID,
course_id UUID,
course_name VARCHAR,
lesson_id UUID,
lesson_title VARCHAR,
content_chunk TEXT,
similarity REAL
) AS $$
BEGIN
RETURN QUERY
SELECT
kb.id,
kb.course_id,
c.name AS course_name,
kb.lesson_id,
l.title AS lesson_title,
kb.content_chunk,
1 - (kb.embedding <=> p_query_embedding) AS similarity
FROM knowledge_base kb
LEFT JOIN courses c ON c.id = kb.course_id
LEFT JOIN lessons l ON l.id = kb.lesson_id
WHERE kb.embedding IS NOT NULL
AND 1 - (kb.embedding <=> p_query_embedding) >= p_threshold
ORDER BY kb.embedding <=> p_query_embedding
LIMIT p_limit;
END;
$$ LANGUAGE plpgsql STABLE;
-- Function to get contextual chunks for a specific lesson
-- Combines semantic search with exact lesson matching
CREATE OR REPLACE FUNCTION get_lesson_context(
p_lesson_id UUID,
p_query_embedding vector(768),
p_limit INTEGER DEFAULT 5
)
RETURNS TABLE (
id UUID,
content_chunk TEXT,
similarity REAL,
is_exact_lesson BOOLEAN,
metadata JSONB
) AS $$
BEGIN
RETURN QUERY
SELECT
kb.id,
kb.content_chunk,
1 - (kb.embedding <=> p_query_embedding) AS similarity,
(kb.lesson_id = p_lesson_id) AS is_exact_lesson,
kb.metadata
FROM knowledge_base kb
WHERE kb.embedding IS NOT NULL
AND (kb.lesson_id = p_lesson_id OR 1 - (kb.embedding <=> p_query_embedding) >= 0.6)
ORDER BY
(kb.lesson_id = p_lesson_id) DESC,
kb.embedding <=> p_query_embedding
LIMIT p_limit;
END;
$$ LANGUAGE plpgsql STABLE;
-- Comments
COMMENT ON COLUMN knowledge_base.embedding IS 'Semantic embedding vector for RAG search (nomic-embed-text, 384 dimensions)';
COMMENT ON COLUMN knowledge_base.embedding_updated_at IS 'Timestamp when embedding was last generated';
COMMENT ON FUNCTION search_knowledge_semantic IS 'Search knowledge base by semantic similarity within a course';
COMMENT ON FUNCTION search_knowledge_global IS 'Search knowledge base across all courses (global admin search)';
COMMENT ON FUNCTION get_lesson_context IS 'Get contextual chunks for a lesson, prioritizing exact lesson match';
+85 -21
View File
@@ -2608,28 +2608,92 @@ pub async fn chat_with_tutor(
}
}
// 2.2 Knowledge Base Retrieval (RAG)
let search_results = sqlx::query(
r#"
SELECT content_chunk
FROM knowledge_base
WHERE organization_id = $1
AND search_vector @@ plainto_tsquery('english', $2)
LIMIT 3
"#,
)
.bind(org_ctx.id)
.bind(&payload.message)
.fetch_all(&pool)
.await
.unwrap_or_default();
// 2.2 Knowledge Base Retrieval (RAG) - Hybrid Search
// First try semantic search with embeddings (more accurate)
// Fall back to full-text search if embeddings not available
use common::ai::{self, generate_embedding};
let mut kb_context = String::new();
if !search_results.is_empty() {
kb_context.push_str("\n--- CONTEXTO ADICIONAL DE LA BASE DE CONOCIMIENTOS ---\n");
for row in search_results {
let chunk: String = row.get("content_chunk");
kb_context.push_str(&format!("Relevant Snippet: {}\n\n", chunk));
// Try semantic search with embeddings first
// Create client that accepts invalid certificates (for dev with self-signed certs)
let client = reqwest::Client::builder()
.danger_accept_invalid_certs(true)
.danger_accept_invalid_hostnames(true)
.build()
.map_err(|e| {
tracing::warn!("Failed to create HTTP client for embeddings: {}", e);
(StatusCode::INTERNAL_SERVER_ERROR, format!("HTTP client error: {}", e))
})?;
let ollama_url = ai::get_ollama_url();
let model = ai::get_embedding_model();
match generate_embedding(&client, &ollama_url, &model, &payload.message).await {
Ok(response) => {
let pgvector = ai::embedding_to_pgvector(&response.embedding);
// Semantic search with pgvector
let search_results = sqlx::query(
r#"
SELECT content_chunk, 1 - (embedding <=> $1::vector) AS similarity
FROM knowledge_base
WHERE organization_id = $2
AND embedding IS NOT NULL
ORDER BY embedding <=> $1::vector
LIMIT 5
"#,
)
.bind(&pgvector)
.bind(org_ctx.id)
.fetch_all(&pool)
.await
.unwrap_or_default();
// Filter by similarity threshold (0.5)
let relevant_results: Vec<_> = search_results
.into_iter()
.filter(|row| {
let similarity: f64 = row.get("similarity");
similarity >= 0.5
})
.collect();
if !relevant_results.is_empty() {
kb_context.push_str("\n--- CONTEXTO DE LA BASE DE CONOCIMIENTOS (Búsqueda Semántica) ---\n");
for row in relevant_results {
let chunk: String = row.get("content_chunk");
kb_context.push_str(&format!("Relevant Snippet: {}\n\n", chunk));
}
}
}
Err(e) => {
tracing::warn!("Semantic search failed, falling back to full-text search: {}", e);
// Fall back to full-text search
let search_results = sqlx::query(
r#"
SELECT content_chunk
FROM knowledge_base
WHERE organization_id = $1
AND search_vector @@ plainto_tsquery('english', $2)
LIMIT 3
"#,
)
.bind(org_ctx.id)
.bind(&payload.message)
.fetch_all(&pool)
.await
.unwrap_or_default();
if !search_results.is_empty() {
kb_context.push_str("\n--- CONTEXTO DE LA BASE DE CONOCIMIENTOS (Búsqueda Full-Text) ---\n");
for row in search_results {
let chunk: String = row.get("content_chunk");
kb_context.push_str(&format!("Relevant Snippet: {}\n\n", chunk));
}
}
}
}
@@ -0,0 +1,287 @@
//! Handlers for PGVector embeddings in Knowledge Base (LMS)
//! Enables semantic search for AI tutor chat with RAG
use axum::{
Json,
extract::{Path, Query, State},
http::StatusCode,
};
use common::ai::{self, generate_embedding};
use common::middleware::Org;
use reqwest::Client;
use serde::{Deserialize, Serialize};
use sqlx::PgPool;
use uuid::Uuid;
// ==================== Query Parameters ====================
#[derive(Debug, Deserialize)]
pub struct KnowledgeSearchFilters {
pub query: String,
pub course_id: Option<Uuid>,
pub lesson_id: Option<Uuid>,
pub limit: Option<i32>,
pub threshold: Option<f64>,
}
#[derive(Debug, Serialize, Deserialize, sqlx::FromRow)]
pub struct KnowledgeSearchResult {
pub id: Uuid,
pub course_id: Uuid,
pub lesson_id: Option<Uuid>,
pub block_id: Option<Uuid>,
pub content_chunk: String,
pub similarity: f64, // PostgreSQL vector similarity returns double precision
pub metadata: Option<serde_json::Value>,
}
#[derive(Debug, Serialize, Deserialize)]
pub struct GenerateKnowledgeEmbeddingsResult {
pub processed: i32,
pub failed: i32,
pub duration_ms: u64,
}
// ==================== Generate Embeddings ====================
/// POST /api/knowledge-base/embeddings/generate - Generate embeddings for all knowledge base entries
pub async fn generate_knowledge_embeddings(
Org(org_ctx): Org,
State(pool): State<PgPool>,
) -> Result<Json<GenerateKnowledgeEmbeddingsResult>, (StatusCode, String)> {
let start = std::time::Instant::now();
// Create client that accepts invalid certificates (for dev with self-signed certs)
let client = reqwest::Client::builder()
.danger_accept_invalid_certs(true)
.danger_accept_invalid_hostnames(true)
.build()
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("HTTP client error: {}", e)))?;
let ollama_url = ai::get_ollama_url();
let model = ai::get_embedding_model();
// Get knowledge base entries without embeddings
let entries: Vec<KnowledgeBaseEntry> = sqlx::query_as(
r#"
SELECT * FROM knowledge_base
WHERE organization_id = $1
AND (embedding IS NULL OR embedding_updated_at IS NULL)
ORDER BY created_at DESC
LIMIT 100
"#
)
.bind(org_ctx.id)
.fetch_all(&pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let total = entries.len();
let mut processed = 0;
let mut failed = 0;
for entry in entries {
// Generate embedding from content chunk
match generate_embedding(&client, &ollama_url, &model, &entry.content_chunk).await {
Ok(response) => {
let pgvector = ai::embedding_to_pgvector(&response.embedding);
// Update entry with embedding
let result: Result<(i64,), sqlx::Error> = sqlx::query_as(
r#"
UPDATE knowledge_base
SET embedding = $1::vector,
embedding_updated_at = NOW()
WHERE id = $2
RETURNING 1
"#
)
.bind(&pgvector)
.bind(entry.id)
.fetch_one(&pool)
.await;
if result.is_ok() {
processed += 1;
} else {
failed += 1;
}
}
Err(e) => {
tracing::error!(
"Failed to generate embedding for knowledge entry {}: {}",
entry.id,
e
);
failed += 1;
}
}
}
let duration_ms = start.elapsed().as_millis() as u64;
tracing::info!(
"Generated knowledge embeddings: {} processed, {} failed in {}ms",
processed,
failed,
duration_ms
);
Ok(Json(GenerateKnowledgeEmbeddingsResult {
processed,
failed,
duration_ms,
}))
}
/// POST /api/knowledge-base/{id}/embedding/regenerate - Regenerate embedding for a specific entry
pub async fn regenerate_knowledge_embedding(
Org(org_ctx): Org,
Path(entry_id): Path<Uuid>,
State(pool): State<PgPool>,
) -> Result<StatusCode, (StatusCode, String)> {
// Create client that accepts invalid certificates
let client = reqwest::Client::builder()
.danger_accept_invalid_certs(true)
.danger_accept_invalid_hostnames(true)
.build()
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("HTTP client error: {}", e)))?;
let ollama_url = ai::get_ollama_url();
let model = ai::get_embedding_model();
// Get entry
let entry: KnowledgeBaseEntry = sqlx::query_as(
"SELECT * FROM knowledge_base WHERE id = $1 AND organization_id = $2"
)
.bind(entry_id)
.bind(org_ctx.id)
.fetch_optional(&pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?
.ok_or((StatusCode::NOT_FOUND, "Knowledge base entry not found".to_string()))?;
// Generate embedding
let response = generate_embedding(&client, &ollama_url, &model, &entry.content_chunk)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("AI error: {}", e)))?;
let pgvector = ai::embedding_to_pgvector(&response.embedding);
// Update entry
sqlx::query(
r#"
UPDATE knowledge_base
SET embedding = $1::vector,
embedding_updated_at = NOW()
WHERE id = $2
"#
)
.bind(&pgvector)
.bind(entry_id)
.execute(&pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
Ok(StatusCode::OK)
}
// ==================== Semantic Search ====================
/// GET /api/knowledge-base/semantic-search - Search knowledge base by semantic similarity
pub async fn semantic_search_knowledge(
Org(org_ctx): Org,
State(pool): State<PgPool>,
Query(filters): Query<KnowledgeSearchFilters>,
) -> Result<Json<Vec<KnowledgeSearchResult>>, (StatusCode, String)> {
// Create client that accepts invalid certificates
let client = reqwest::Client::builder()
.danger_accept_invalid_certs(true)
.danger_accept_invalid_hostnames(true)
.build()
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("HTTP client error: {}", e)))?;
let ollama_url = ai::get_ollama_url();
let model = ai::get_embedding_model();
// Generate embedding for query
let embedding_response = generate_embedding(&client, &ollama_url, &model, &filters.query)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("AI error: {}", e)))?;
let pgvector = ai::embedding_to_pgvector(&embedding_response.embedding);
let limit = filters.limit.unwrap_or(10);
let threshold = filters.threshold.unwrap_or(0.5);
// Build query with optional filters
let mut query = String::from(
r#"
SELECT
id,
course_id,
lesson_id,
block_id,
content_chunk,
1 - (embedding <=> $1::vector) AS similarity,
metadata
FROM knowledge_base
WHERE organization_id = $2
AND embedding IS NOT NULL
AND 1 - (embedding <=> $1::vector) >= $3
"#
);
let mut param_idx = 3;
if let Some(course_id) = filters.course_id {
param_idx += 1;
query.push_str(&format!(" AND course_id = ${}", param_idx));
}
if let Some(lesson_id) = filters.lesson_id {
param_idx += 1;
query.push_str(&format!(" AND lesson_id = ${}", param_idx));
}
param_idx += 1;
query.push_str(&format!(" ORDER BY embedding <=> $1::vector LIMIT ${}", param_idx));
let mut sql_query = sqlx::query_as::<_, KnowledgeSearchResult>(&query)
.bind(&pgvector)
.bind(org_ctx.id)
.bind(threshold);
if let Some(course_id) = filters.course_id {
sql_query = sql_query.bind(course_id);
}
if let Some(lesson_id) = filters.lesson_id {
sql_query = sql_query.bind(lesson_id);
}
sql_query = sql_query.bind(limit);
let results = sql_query
.fetch_all(&pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
Ok(Json(results))
}
// ==================== Helper Structs ====================
#[derive(Debug, sqlx::FromRow, Clone)]
struct KnowledgeBaseEntry {
id: Uuid,
organization_id: Uuid,
course_id: Uuid,
lesson_id: Option<Uuid>,
block_id: Option<Uuid>,
content_chunk: String,
chunk_order: i32,
metadata: Option<serde_json::Value>,
#[allow(dead_code)]
created_at: chrono::DateTime<chrono::Utc>,
}
+14
View File
@@ -6,6 +6,7 @@ mod handlers_discussions;
mod handlers_notes;
mod handlers_payments;
mod handlers_peer_review;
mod handlers_embeddings;
mod lti;
mod jwks;
mod predictive;
@@ -149,6 +150,19 @@ async fn main() {
"/notifications/{id}/read",
post(handlers::mark_notification_as_read),
)
// Knowledge Base Embedding Routes for Semantic RAG
.route(
"/knowledge-base/embeddings/generate",
post(handlers_embeddings::generate_knowledge_embeddings),
)
.route(
"/knowledge-base/semantic-search",
get(handlers_embeddings::semantic_search_knowledge),
)
.route(
"/knowledge-base/{id}/embedding/regenerate",
post(handlers_embeddings::regenerate_knowledge_embedding),
)
// Discussion Forums Routes
.route(
"/courses/{id}/discussions",