From 024bd6e46d71c1704ef602359f54b7deb9089598 Mon Sep 17 00:00:00 2001 From: Nurfog Date: Tue, 7 Apr 2026 13:38:22 -0400 Subject: [PATCH] feat: enhance asset import functionality and unit tracking - Added WHISPER_URL environment variable to docker-compose for audio transcription service. - Updated Nginx configuration to increase timeout settings for API requests. - Enhanced asset ingestion process to extract unit numbers from ZIP entry paths, supporting various naming conventions. - Implemented logic to split intensive courses into two regular courses during asset import. - Added new fields to the Asset and QuestionBank models to track unit numbers and source asset links. - Introduced backward-compatible fallbacks for fetching study plans and courses from legacy MySQL database. - Improved error handling and progress tracking during ZIP file uploads in the frontend. - Created a new SQL migration to add unit_number and source_asset_id columns to the assets and question_bank tables, along with necessary indexes for performance. --- .env.example | 4 + deploy.sh | 22 +- docker-compose.yml | 1 + nginx/studio.conf | 8 +- .../20260407000000_add_unit_asset_link.sql | 12 + services/cms-service/src/handlers_assets.rs | 337 +++++++++++++----- .../cms-service/src/handlers_question_bank.rs | 136 ++++++- shared/common/src/models.rs | 3 + web/studio/Dockerfile | 9 +- web/studio/src/app/admin/materials/page.tsx | 227 +++++++++++- web/studio/src/lib/api.ts | 29 +- 11 files changed, 687 insertions(+), 101 deletions(-) create mode 100644 services/cms-service/migrations/20260407000000_add_unit_asset_link.sql diff --git a/.env.example b/.env.example index 8e7edb7..7c923cb 100644 --- a/.env.example +++ b/.env.example @@ -60,10 +60,14 @@ EXTERNAL_ID_TIPO_NOTA=1 # AWS S3 Configuration (audio storage) # Bucket: openccb-802726101181-us-east-2-an # ---------------------------------------- +ASSETS_STORAGE=s3 AWS_ACCESS_KEY_ID= AWS_SECRET_ACCESS_KEY= AWS_REGION=us-east-2 S3_BUCKET=openccb-802726101181-us-east-2-an +S3_ENDPOINT= +S3_PUBLIC_BASE_URL= +S3_FORCE_PATH_STYLE=false # ---------------------------------------- # Mercado Pago Configuration diff --git a/deploy.sh b/deploy.sh index f04624b..93bbae4 100755 --- a/deploy.sh +++ b/deploy.sh @@ -79,8 +79,14 @@ trap cleanup EXIT # Copiar archivos esenciales echo " πŸ“‹ Copiando archivos esenciales..." cp -r docker-compose.yml "$PROD_DIR/" 2>/dev/null || echo " ⚠️ docker-compose.yml no existe" -# NO copiar .env local - tiene configuraciones incorrectas para producciΓ³n -echo " ℹ️ .env local NO se copia - se generarΓ‘ uno correcto en el servidor" +# Usar .env local como fuente de verdad para producciΓ³n (si existe) +if [ -f ".env" ]; then + cp .env "$PROD_DIR/.env" + echo " βœ… .env local copiado (fuente de producciΓ³n)" +else + echo " ⚠️ .env local no existe; se usarΓ‘ .env.example como fallback" +fi +# .env.example se mantiene como plantilla/documentaciΓ³n cp -r .env.example "$PROD_DIR/" 2>/dev/null || true # NO copiar ubuntu.pem - solo se usa localmente para SSH @@ -347,17 +353,19 @@ echo " PROTOCOL: \$PROTOCOL" echo "" # ======================================== -# GENERAR .ENV CORRECTO PARA PRODUCCION +# RESOLVER .ENV PARA PRODUCCION # ======================================== -echo "Generando configuracion .env para produccion..." +echo "Resolviendo configuracion .env para produccion..." if [ ! -f ".env" ]; then - echo " Creando .env desde .env.example..." + echo " .env no existe en remoto; creando desde .env.example..." if [ -f ".env.example" ]; then cp .env.example .env else touch .env fi +else + echo " Usando .env existente (valores de produccion)" fi # Generar DB_PASSWORD seguro @@ -435,6 +443,10 @@ echo "NEXT_PUBLIC_CMS_API_URL=\$CMS_URL" >> .env echo "NEXT_PUBLIC_LMS_API_URL=\$LMS_URL" >> .env # Configurar S3 para almacenamiento de audio +if ! grep -q "^ASSETS_STORAGE=" .env || grep -q "^ASSETS_STORAGE=$" .env; then + sed -i "/^ASSETS_STORAGE=/d" .env 2>/dev/null || true + echo "ASSETS_STORAGE=s3" >> .env +fi if ! grep -q "^S3_BUCKET=" .env || grep -q "^S3_BUCKET=$" .env; then sed -i "/^S3_BUCKET=/d" .env 2>/dev/null || true echo "S3_BUCKET=openccb-802726101181-us-east-2-an" >> .env diff --git a/docker-compose.yml b/docker-compose.yml index b35d884..d507059 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -88,6 +88,7 @@ services: - DATABASE_URL=${CMS_DATABASE_URL} - MYSQL_DATABASE_URL=${MYSQL_DATABASE_URL} - SAM_DIAGNOSTICO_DATABASE_URL=${SAM_DIAGNOSTICO_DATABASE_URL} + - WHISPER_URL=${PROD_WHISPER_URL:-https://whisper.t-800.norteamericano.cl} - LMS_INTERNAL_URL=http://experience:3002 - NEXT_PUBLIC_LMS_API_URL=${NEXT_PUBLIC_LMS_API_URL} volumes: diff --git a/nginx/studio.conf b/nginx/studio.conf index 7281286..02b29b8 100644 --- a/nginx/studio.conf +++ b/nginx/studio.conf @@ -18,10 +18,10 @@ location /cms-api/ { proxy_set_header X-Forwarded-Proto $http_x_forwarded_proto; proxy_set_header Connection ""; proxy_http_version 1.1; - proxy_connect_timeout 120s; - proxy_send_timeout 3600s; - proxy_read_timeout 3600s; - send_timeout 3600s; + proxy_connect_timeout 300s; + proxy_send_timeout 7200s; + proxy_read_timeout 7200s; + send_timeout 7200s; } location /lms-api/ { diff --git a/services/cms-service/migrations/20260407000000_add_unit_asset_link.sql b/services/cms-service/migrations/20260407000000_add_unit_asset_link.sql new file mode 100644 index 0000000..7c7defe --- /dev/null +++ b/services/cms-service/migrations/20260407000000_add_unit_asset_link.sql @@ -0,0 +1,12 @@ +-- Add unit_number to assets: tracks which syllabus unit a file belongs to within a ZIP +ALTER TABLE assets ADD COLUMN IF NOT EXISTS unit_number INTEGER; + +-- Link question_bank RAG chunks to their source audio/video asset +-- This allows test creation (AI + manual) to attach the audio to exercises +ALTER TABLE question_bank ADD COLUMN IF NOT EXISTS source_asset_id UUID REFERENCES assets(id) ON DELETE SET NULL; +ALTER TABLE question_bank ADD COLUMN IF NOT EXISTS unit_number INTEGER; + +-- Index for fast lookup by unit +CREATE INDEX IF NOT EXISTS idx_assets_unit_number ON assets(organization_id, sam_plan_id, unit_number); +CREATE INDEX IF NOT EXISTS idx_qb_source_asset ON question_bank(source_asset_id) WHERE source_asset_id IS NOT NULL; +CREATE INDEX IF NOT EXISTS idx_qb_unit_number ON question_bank(organization_id, unit_number) WHERE unit_number IS NOT NULL; diff --git a/services/cms-service/src/handlers_assets.rs b/services/cms-service/src/handlers_assets.rs index 916bcec..1158960 100644 --- a/services/cms-service/src/handlers_assets.rs +++ b/services/cms-service/src/handlers_assets.rs @@ -16,6 +16,7 @@ use serde::{Deserialize, Serialize}; use serde_json::json; use sqlx::PgPool; use uuid::Uuid; +use std::collections::HashMap; use std::env; use std::path::Path as StdPath; use tokio::process::Command; @@ -585,6 +586,9 @@ pub async fn ingest_asset_for_rag( &client, &ollama_url, &model, + None, + None, + asset.unit_number, ) .await?; @@ -601,6 +605,38 @@ pub async fn ingest_asset_for_rag( /// - file: ZIP requerido /// - course_id: UUID opcional /// - ingest_rag: true/false opcional (default false) +/// Extracts a unit number from a ZIP entry path using the top-level folder name. +/// Supports: "Unit 1/...", "Unidad 1/...", "unit-01/...", "01/...", "1/..." +fn extract_unit_number(entry_name: &str) -> Option { + let parts: Vec<&str> = entry_name.splitn(2, '/').collect(); + if parts.len() < 2 { + return None; // file at ZIP root β€” no unit folder + } + let folder = parts[0].trim(); + if folder.is_empty() { + return None; + } + let lower = folder.to_lowercase(); + // Strip common textual prefixes, then parse leading digits + let stripped = lower + .trim_start_matches("unidad") + .trim_start_matches("unit") + .trim_start_matches('u') + .trim_start_matches(|c: char| !c.is_ascii_digit()); + let digits: String = stripped.chars().take_while(|c| c.is_ascii_digit()).collect(); + digits.parse().ok() +} + +struct ZipEntryData { + entry_name: String, + safe_filename: String, + content: Vec, + unit_number: Option, + guessed_mimetype: String, + is_audio_video: bool, + is_flv: bool, +} + pub async fn import_assets_zip( Org(org_ctx): Org, claims: Claims, @@ -613,6 +649,9 @@ pub async fn import_assets_zip( let mut sam_plan_id: Option = None; let mut sam_course_id: Option = None; let mut ingest_rag = false; + let mut split_to_regular = false; + let mut sam_course_id_r1: Option = None; + let mut sam_course_id_r2: Option = None; while let Some(mut field) = multipart .next_field() @@ -678,6 +717,23 @@ pub async fn import_assets_zip( sam_course_id = Some(id); } } + } else if name == "split_to_regular" { + if let Ok(txt) = field.text().await { + let v = txt.trim().to_lowercase(); + split_to_regular = v == "1" || v == "true" || v == "yes"; + } + } else if name == "sam_course_id_r1" { + if let Ok(txt) = field.text().await { + if let Ok(id) = txt.trim().parse::() { + sam_course_id_r1 = Some(id); + } + } + } else if name == "sam_course_id_r2" { + if let Ok(txt) = field.text().await { + if let Ok(id) = txt.trim().parse::() { + sam_course_id_r2 = Some(id); + } + } } } @@ -699,11 +755,84 @@ pub async fn import_assets_zip( return Err((StatusCode::BAD_REQUEST, "No ZIP file uploaded".to_string())); } + // ── Phase 1: collect all ZIP entries into memory ────────────────────────── + let mut all_entries: Vec = Vec::new(); + let mut unit_set: std::collections::BTreeSet = Default::default(); + + let len = archive.len(); + for i in 0..len { + let mut file = archive + .by_index(i) + .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("ZIP read error: {}", e)))?; + + if !file.is_file() { + continue; + } + let entry_name = file.name().to_string(); + if entry_name.starts_with("__MACOSX/") || entry_name.ends_with(".DS_Store") { + continue; + } + let safe_filename = StdPath::new(&entry_name) + .file_name() + .and_then(|s| s.to_str()) + .unwrap_or("unnamed") + .to_string(); + + let mut content = Vec::new(); + std::io::Read::read_to_end(&mut file, &mut content) + .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("ZIP entry read failed: {}", e)))?; + + let guessed_mimetype = mime_guess::from_path(&safe_filename) + .first_or_octet_stream() + .to_string(); + let is_flv = is_flv_media(&safe_filename, &guessed_mimetype); + let is_audio_video = is_flv + || guessed_mimetype.starts_with("audio/") + || guessed_mimetype.starts_with("video/"); + + let unit_number = extract_unit_number(&entry_name); + if let Some(u) = unit_number { + unit_set.insert(u); + } + + all_entries.push(ZipEntryData { + entry_name, + safe_filename, + content, + unit_number, + guessed_mimetype, + is_audio_video, + is_flv, + }); + } + + // ── Phase 1b: calculate split midpoint (intensive β†’ 2 regular courses) ─── + // For 8-10 units: first half β†’ regular 1, second half β†’ regular 2. + // Mid is the last unit number that goes to regular 1 (ceiling of N/2). + let split_midpoint: Option = if split_to_regular + && sam_course_id_r1.is_some() + && sam_course_id_r2.is_some() + && !unit_set.is_empty() + { + let units: Vec = unit_set.iter().cloned().collect(); + let mid_idx = (units.len() + 1) / 2; // ceiling: 8 β†’ 4, 9 β†’ 5, 10 β†’ 5 + Some(units[mid_idx - 1]) + } else { + None + }; + + // Sort: audio/video first so their asset IDs are known when text is ingested + all_entries.sort_by_key(|e| if e.is_audio_video { 0usize } else { 1 }); + + // ── Phase 2: process entries ─────────────────────────────────────────────── let mut imported_assets = 0usize; let mut rag_ingested_assets = 0usize; let mut rag_chunks_ingested = 0usize; let mut failed_entries: Vec = Vec::new(); + // unit_number β†’ (asset_id, public_url): populated from audio/video assets + let mut unit_audio_map: HashMap = HashMap::new(); + let rag_client = if ingest_rag { Some( reqwest::Client::builder() @@ -718,41 +847,28 @@ pub async fn import_assets_zip( let ollama_url = ai::get_ollama_url(); let model = ai::get_embedding_model(); - let len = archive.len(); - for i in 0..len { - let (entry_name, safe_filename, content): (String, String, Vec) = { - let mut file = archive - .by_index(i) - .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("ZIP read error: {}", e)))?; + for entry in all_entries { + let ZipEntryData { + entry_name, + safe_filename, + content, + unit_number, + guessed_mimetype, + is_audio_video, + is_flv, + } = entry; - if !file.is_file() { - continue; + // Determine effective sam_course_id based on split midpoint + let effective_sam_course_id = match (split_midpoint, unit_number) { + (Some(mid), Some(u)) => { + if u <= mid { sam_course_id_r1 } else { sam_course_id_r2 } } - - let entry_name = file.name().to_string(); - if entry_name.starts_with("__MACOSX/") || entry_name.ends_with(".DS_Store") { - continue; - } - - let safe_filename = std::path::Path::new(&entry_name) - .file_name() - .and_then(|s| s.to_str()) - .unwrap_or("unnamed") - .to_string(); - - let mut content = Vec::new(); - std::io::Read::read_to_end(&mut file, &mut content) - .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("ZIP entry read failed: {}", e)))?; - - (entry_name, safe_filename, content) + _ => sam_course_id, }; let asset_id = Uuid::new_v4(); - let guessed_mimetype = mime_guess::from_path(&safe_filename) - .first_or_octet_stream() - .to_string(); - let (storage_path, stored_filename, mimetype) = if is_flv_media(&safe_filename, &guessed_mimetype) { + let (storage_path, stored_filename, mimetype) = if is_flv { let temp_storage_filename = format!("{}.flv", asset_id); let temp_storage_path = format!("uploads/{}", temp_storage_filename); tokio::fs::create_dir_all("uploads") @@ -801,7 +917,7 @@ pub async fn import_assets_zip( .unwrap_or("") .to_string(); - let (db_storage_path, _asset_url) = if !storage_filename_for_s3.is_empty() { + let (db_storage_path, asset_public_url) = if !storage_filename_for_s3.is_empty() { if let Some((s3_path, public_url)) = maybe_push_local_file_to_s3( &storage_path, &storage_filename_for_s3, @@ -834,8 +950,8 @@ pub async fn import_assets_zip( let insert_result = sqlx::query( r#" - INSERT INTO assets (id, organization_id, uploaded_by, course_id, english_level, sam_plan_id, sam_course_id, filename, storage_path, mimetype, size_bytes) - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) + INSERT INTO assets (id, organization_id, uploaded_by, course_id, english_level, sam_plan_id, sam_course_id, unit_number, filename, storage_path, mimetype, size_bytes) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12) "#, ) .bind(asset_id) @@ -844,7 +960,8 @@ pub async fn import_assets_zip( .bind(course_id) .bind(&english_level) .bind(sam_plan_id) - .bind(sam_course_id) + .bind(effective_sam_course_id) + .bind(unit_number) .bind(&stored_filename) .bind(&db_storage_path) .bind(&mimetype) @@ -859,6 +976,13 @@ pub async fn import_assets_zip( imported_assets += 1; + // Track audio/video asset per unit for cross-linking with text RAG chunks + if is_audio_video { + if let Some(u) = unit_number { + unit_audio_map.entry(u).or_insert((asset_id, asset_public_url.clone())); + } + } + if ingest_rag { let asset = Asset { id: asset_id, @@ -867,7 +991,8 @@ pub async fn import_assets_zip( course_id, english_level: english_level.clone(), sam_plan_id, - sam_course_id, + sam_course_id: effective_sam_course_id, + unit_number, filename: stored_filename.clone(), storage_path: db_storage_path.clone(), mimetype: mimetype.clone(), @@ -875,6 +1000,16 @@ pub async fn import_assets_zip( created_at: chrono::Utc::now(), }; + // For text/PDF entries, look up the audio asset from the same unit + let (linked_audio_id, linked_audio_url) = if !is_audio_video { + match unit_number.and_then(|u| unit_audio_map.get(&u)) { + Some((aid, aurl)) => (Some(*aid), Some(aurl.clone())), + None => (None, None), + } + } else { + (None, None) + }; + match extract_asset_text(&asset).await { Ok(extracted) => { let trimmed = extracted.trim(); @@ -889,7 +1024,7 @@ pub async fn import_assets_zip( continue; } - let source_kind = if mimetype.starts_with("audio/") || mimetype.starts_with("video/") { + let source_kind = if is_audio_video { "audio-transcription" } else if mimetype.contains("pdf") { "pdf" @@ -897,7 +1032,7 @@ pub async fn import_assets_zip( "text" }; - let skill = if mimetype.starts_with("audio/") || mimetype.starts_with("video/") { + let skill = if is_audio_video { Some("listening") } else { Some("reading") @@ -915,6 +1050,9 @@ pub async fn import_assets_zip( client, &ollama_url, &model, + linked_audio_id, + linked_audio_url, + unit_number, ) .await { @@ -1007,6 +1145,9 @@ async fn ingest_chunks_to_question_bank( client: &reqwest::Client, ollama_url: &str, model: &str, + source_asset_id: Option, + audio_url: Option, + unit_number: Option, ) -> Result<(), (StatusCode, String)> { for (idx, chunk) in chunks.iter().enumerate() { let metadata = json!({ @@ -1017,6 +1158,7 @@ async fn ingest_chunks_to_question_bank( "source_kind": source_kind, "chunk_index": idx + 1, "chunk_total": chunks.len(), + "unit_number": unit_number, }); let inserted_id: Uuid = sqlx::query_scalar( @@ -1031,10 +1173,13 @@ async fn ingest_chunks_to_question_bank( skill_assessed, source, source_metadata, + source_asset_id, + audio_url, + unit_number, is_active, is_archived ) - VALUES ($1, $2, $3, 'short-answer', $4, 'medium', $5, 'imported-material', $6, true, false) + VALUES ($1, $2, $3, 'short-answer', $4, 'medium', $5, 'imported-material', $6, $7, $8, $9, true, false) RETURNING id "#, ) @@ -1044,6 +1189,9 @@ async fn ingest_chunks_to_question_bank( .bind("RAG material chunk from uploaded asset") .bind(skill) .bind(&metadata) + .bind(source_asset_id) + .bind(&audio_url) + .bind(unit_number) .fetch_one(pool) .await .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Insert failed: {}", e)))?; @@ -1139,54 +1287,81 @@ async fn extract_pdf_text_from_bytes(bytes: Vec) -> Result, filename: &str) -> Result { - let whisper_url = std::env::var("WHISPER_URL") - .unwrap_or_else(|_| "http://localhost:8000".to_string()); - let client = reqwest::Client::new(); - - let form = reqwest::multipart::Form::new() - .part( - "file", - reqwest::multipart::Part::bytes(file_data).file_name(filename.to_string()), - ) - .text("model", "whisper-1") - .text("response_format", "json"); - - let response = client - .post(format!("{}/v1/audio/transcriptions", whisper_url)) - .multipart(form) - .send() - .await - .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Whisper request failed: {}", e)))?; - - if !response.status().is_success() { - let status = response.status(); - let body = response.text().await.unwrap_or_default(); - return Err(( - StatusCode::BAD_GATEWAY, - format!("Whisper API error {}: {}", status, body), - )); + let mut whisper_urls: Vec = Vec::new(); + if let Ok(url) = std::env::var("WHISPER_URL") { + let trimmed = url.trim(); + if !trimmed.is_empty() { + whisper_urls.push(trimmed.trim_end_matches('/').to_string()); + } } - let transcription: serde_json::Value = response - .json() - .await - .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Invalid Whisper response: {}", e)))?; - - let text = transcription - .get("text") - .and_then(|v| v.as_str()) - .unwrap_or("") - .trim() - .to_string(); - - if text.is_empty() { - return Err(( - StatusCode::BAD_REQUEST, - "Whisper no pudo extraer texto del audio/video".to_string(), - )); + // Container-friendly fallbacks for common local deployments. + if whisper_urls.is_empty() { + whisper_urls.push("http://host.docker.internal:8000".to_string()); + whisper_urls.push("http://localhost:8000".to_string()); } - Ok(text) + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(300)) + .build() + .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Whisper HTTP client error: {}", e)))?; + + let mut last_error = String::new(); + + for base_url in whisper_urls { + let form = reqwest::multipart::Form::new() + .part( + "file", + reqwest::multipart::Part::bytes(file_data.clone()).file_name(filename.to_string()), + ) + .text("model", "whisper-1") + .text("response_format", "json"); + + let endpoint = format!("{}/v1/audio/transcriptions", base_url); + let response = match client.post(&endpoint).multipart(form).send().await { + Ok(r) => r, + Err(e) => { + last_error = format!("{} ({})", endpoint, e); + continue; + } + }; + + if !response.status().is_success() { + let status = response.status(); + let body = response.text().await.unwrap_or_default(); + last_error = format!("{} -> {}: {}", endpoint, status, body); + continue; + } + + let transcription: serde_json::Value = response + .json() + .await + .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Invalid Whisper response: {}", e)))?; + + let text = transcription + .get("text") + .and_then(|v| v.as_str()) + .unwrap_or("") + .trim() + .to_string(); + + if text.is_empty() { + return Err(( + StatusCode::BAD_REQUEST, + "Whisper no pudo extraer texto del audio/video".to_string(), + )); + } + + return Ok(text); + } + + Err(( + StatusCode::BAD_GATEWAY, + format!( + "Whisper request failed en todos los endpoints configurados. Ultimo error: {}", + last_error + ), + )) } fn chunk_text(text: &str, max_chars: usize) -> Vec { diff --git a/services/cms-service/src/handlers_question_bank.rs b/services/cms-service/src/handlers_question_bank.rs index 305db7f..e896ca4 100644 --- a/services/cms-service/src/handlers_question_bank.rs +++ b/services/cms-service/src/handlers_question_bank.rs @@ -864,7 +864,7 @@ pub async fn get_mysql_plans( State(pool): State, ) -> Result>, (StatusCode, String)> { // Read from SAM mirror in PostgreSQL with SAM-native fields. - let plans: Vec = sqlx::query_as( + let mut plans: Vec = sqlx::query_as( r#" SELECT idPlanDeEstudios AS id_plan_de_estudios, @@ -879,6 +879,111 @@ pub async fn get_mysql_plans( .await .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to fetch plans: {}", e)))?; + // Backward-compatible fallback: if SAM mirror is empty, use legacy metadata mirror. + if plans.is_empty() { + plans = sqlx::query_as( + r#" + SELECT + mysql_id AS id_plan_de_estudios, + name AS nombre_plan + FROM mysql_study_plans + WHERE organization_id = $1 AND is_active = TRUE + ORDER BY name + "#, + ) + .bind(org_ctx.id) + .fetch_all(&pool) + .await + .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to fetch legacy plans: {}", e)))?; + } + + // Last-resort auto-sync: if still empty, pull metadata from MySQL and persist it. + if plans.is_empty() { + match connect_mysql_pool("MYSQL_DATABASE_URL").await { + Ok(mysql_pool) => { + let mysql_plans: Result, sqlx::Error> = sqlx::query_as( + r#" + SELECT DISTINCT + pe.idPlanDeEstudios AS id_plan_de_estudios, + pe.Nombre AS nombre_plan + FROM plandeestudios pe + WHERE pe.Activo = 1 + ORDER BY pe.Nombre + "#, + ) + .fetch_all(&mysql_pool) + .await; + + let mysql_courses: Result, sqlx::Error> = sqlx::query_as( + r#" + SELECT DISTINCT + c.idCursos AS id_cursos, + c.NombreCurso AS nombre_curso, + c.NivelCurso AS nivel_curso, + pe.idPlanDeEstudios AS id_plan_de_estudios, + pe.Nombre AS nombre_plan, + c.Duracion AS duracion + FROM curso c + JOIN plandeestudios pe ON c.idPlanDeEstudios = pe.idPlanDeEstudios + WHERE c.Activo = 1 + AND pe.Activo = 1 + ORDER BY pe.Nombre, c.NivelCurso + "#, + ) + .fetch_all(&mysql_pool) + .await; + + match (mysql_plans, mysql_courses) { + (Ok(p), Ok(c)) => { + if let Err(err) = save_mysql_courses_and_plans(&pool, org_ctx.id, p, c).await { + tracing::warn!("Auto-sync MySQL metadata failed: {}", err); + } + } + (Err(e), _) => tracing::warn!("Auto-sync plans query failed: {}", e), + (_, Err(e)) => tracing::warn!("Auto-sync courses query failed: {}", e), + } + + mysql_pool.close().await; + } + Err(e) => { + tracing::warn!("Auto-sync could not connect to MySQL: {:?}", e); + } + } + + // Reload plans after auto-sync attempt. + plans = sqlx::query_as( + r#" + SELECT + idPlanDeEstudios AS id_plan_de_estudios, + Nombre AS nombre_plan + FROM sam_study_plans + WHERE organization_id = $1 AND Activo = TRUE + ORDER BY Nombre + "#, + ) + .bind(org_ctx.id) + .fetch_all(&pool) + .await + .unwrap_or_default(); + + if plans.is_empty() { + plans = sqlx::query_as( + r#" + SELECT + mysql_id AS id_plan_de_estudios, + name AS nombre_plan + FROM mysql_study_plans + WHERE organization_id = $1 AND is_active = TRUE + ORDER BY name + "#, + ) + .bind(org_ctx.id) + .fetch_all(&pool) + .await + .unwrap_or_default(); + } + } + Ok(Json(plans)) } @@ -889,7 +994,7 @@ pub async fn get_mysql_courses_by_plan( Query(filters): Query, ) -> Result>, (StatusCode, String)> { // Read from SAM mirror in PostgreSQL with SAM-native fields. - let courses: Vec = sqlx::query_as( + let mut courses: Vec = sqlx::query_as( r#" SELECT c.idCursos AS id_cursos, @@ -915,6 +1020,33 @@ pub async fn get_mysql_courses_by_plan( .await .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to fetch courses: {}", e)))?; + // Backward-compatible fallback: if SAM mirror is empty, use legacy metadata mirror. + if courses.is_empty() { + courses = sqlx::query_as( + r#" + SELECT + c.mysql_id AS id_cursos, + c.name AS nombre_curso, + c.level AS nivel_curso, + sp.mysql_id AS id_plan_de_estudios, + sp.name AS nombre_plan, + c.duracion::double precision AS duracion + FROM mysql_courses c + JOIN mysql_study_plans sp ON c.study_plan_id = sp.id + WHERE c.organization_id = $1 + AND c.is_active = TRUE + AND sp.is_active = TRUE + AND sp.mysql_id = $2 + ORDER BY c.level + "#, + ) + .bind(org_ctx.id) + .bind(filters.plan_id) + .fetch_all(&pool) + .await + .map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to fetch legacy courses: {}", e)))?; + } + Ok(Json(courses)) } diff --git a/shared/common/src/models.rs b/shared/common/src/models.rs index b84dc26..09a26aa 100644 --- a/shared/common/src/models.rs +++ b/shared/common/src/models.rs @@ -347,6 +347,7 @@ pub struct Asset { pub english_level: Option, pub sam_plan_id: Option, pub sam_course_id: Option, + pub unit_number: Option, pub filename: String, pub storage_path: String, pub mimetype: String, @@ -1402,6 +1403,8 @@ pub struct QuestionBank { pub updated_at: chrono::DateTime, pub embedding: Option, // PGVector embedding for semantic search pub embedding_updated_at: Option>, + pub source_asset_id: Option, // audio/video asset that originated this RAG chunk + pub unit_number: Option, // syllabus unit number from ZIP folder structure } #[derive(Debug, Serialize, Deserialize, Clone)] diff --git a/web/studio/Dockerfile b/web/studio/Dockerfile index cda4dc0..0bcc987 100644 --- a/web/studio/Dockerfile +++ b/web/studio/Dockerfile @@ -37,8 +37,13 @@ FROM node:20-slim AS runner WORKDIR /app ENV NODE_ENV production -# Install system dependencies for Rust binary -RUN apt-get update && apt-get install -y openssl ca-certificates && rm -rf /var/lib/apt/lists/* +# Install system dependencies for Rust binary and asset processing +RUN apt-get update && apt-get install -y \ + openssl \ + ca-certificates \ + ffmpeg \ + poppler-utils \ + && rm -rf /var/lib/apt/lists/* # Install sharp for Next.js image optimization RUN --mount=type=cache,target=/root/.npm npm install sharp diff --git a/web/studio/src/app/admin/materials/page.tsx b/web/studio/src/app/admin/materials/page.tsx index e5271a1..1844ddd 100644 --- a/web/studio/src/app/admin/materials/page.tsx +++ b/web/studio/src/app/admin/materials/page.tsx @@ -2,7 +2,7 @@ import React, { useMemo, useState } from 'react'; import { cmsApi, questionBankApi, MySqlPlan, MySqlCourse } from '@/lib/api'; -import { Upload, Database, FileArchive, CheckCircle2, AlertTriangle } from 'lucide-react'; +import { Upload, Database, FileArchive, CheckCircle2, AlertTriangle, Scissors } from 'lucide-react'; export default function AdminSharedMaterialsPage() { const [zipFile, setZipFile] = useState(null); @@ -12,7 +12,16 @@ export default function AdminSharedMaterialsPage() { const [courses, setCourses] = useState([]); const [selectedPlanId, setSelectedPlanId] = useState(''); const [selectedCourseId, setSelectedCourseId] = useState(''); + const [splitToRegular, setSplitToRegular] = useState(false); + const [regularPlanId, setRegularPlanId] = useState(''); + const [regularCourses, setRegularCourses] = useState([]); + const [selectedCourseIdR1, setSelectedCourseIdR1] = useState(''); + const [selectedCourseIdR2, setSelectedCourseIdR2] = useState(''); const [loading, setLoading] = useState(false); + const [uploadProgress, setUploadProgress] = useState(0); + const [phase, setPhase] = useState<'idle' | 'uploading' | 'processing' | 'done' | 'error'>('idle'); + const [startedAt, setStartedAt] = useState(null); + const [elapsedSeconds, setElapsedSeconds] = useState(0); const [result, setResult] = useState<{ imported_assets: number; rag_ingested_assets: number; @@ -22,6 +31,32 @@ export default function AdminSharedMaterialsPage() { const canUpload = useMemo(() => Boolean(zipFile) && !loading, [zipFile, loading]); + // Detect if the selected course is "intensive" (no trailing digit) and auto-detect + // the two corresponding regular courses (same name + " 1" and " 2"). + const selectedCourseName = useMemo( + () => courses.find((c) => c.idCursos === selectedCourseId)?.NombreCurso ?? '', + [courses, selectedCourseId], + ); + const isIntensiveCourse = useMemo( + () => Boolean(selectedCourseId) && !/\s*[12]$/.test(selectedCourseName.trim()), + [selectedCourseId, selectedCourseName], + ); + // Suggested regular course names: replace "INTENSIVE" with nothing or trim trailing "INTENSIVE" + const regularBaseName = useMemo(() => { + const name = selectedCourseName.trim(); + return name.replace(/\s*INTENSIVE\s*$/i, '').trim(); + }, [selectedCourseName]); + const regularCourse1 = useMemo( + () => regularCourses.find((c) => c.NombreCurso.trim() === `${regularBaseName} 1`) + ?? regularCourses.find((c) => /\s1$/.test(c.NombreCurso.trim())), + [regularCourses, regularBaseName], + ); + const regularCourse2 = useMemo( + () => regularCourses.find((c) => c.NombreCurso.trim() === `${regularBaseName} 2`) + ?? regularCourses.find((c) => /\s2$/.test(c.NombreCurso.trim())), + [regularCourses, regularBaseName], + ); + React.useEffect(() => { questionBankApi.getMySQLPlans().then(setPlans).catch(() => setPlans([])); }, []); @@ -30,10 +65,72 @@ export default function AdminSharedMaterialsPage() { if (!selectedPlanId) { setCourses([]); setSelectedCourseId(''); + setSplitToRegular(false); + setRegularPlanId(''); + setRegularCourses([]); return; } questionBankApi.getMySQLCoursesByPlan(selectedPlanId).then(setCourses).catch(() => setCourses([])); - }, [selectedPlanId]); + // Auto-detect sibling regular plan (swap INTENSIVO <-> REGULAR in plan name) + const intensivePlan = plans.find((p) => p.idPlanDeEstudios === selectedPlanId); + if (intensivePlan) { + const regularPlanName = intensivePlan.NombrePlan.replace(/INTENSIVO/i, 'REGULAR').trim(); + const sibling = plans.find((p) => p.NombrePlan.toUpperCase() === regularPlanName.toUpperCase()); + if (sibling) { + setRegularPlanId(sibling.idPlanDeEstudios); + questionBankApi.getMySQLCoursesByPlan(sibling.idPlanDeEstudios).then(setRegularCourses).catch(() => setRegularCourses([])); + } else { + setRegularPlanId(''); + setRegularCourses([]); + } + } + }, [selectedPlanId, plans]); + + // Load courses for manually selected regular plan + React.useEffect(() => { + if (!regularPlanId) return; + questionBankApi.getMySQLCoursesByPlan(regularPlanId).then(setRegularCourses).catch(() => setRegularCourses([])); + }, [regularPlanId]); + + // Auto-fill regular course IDs when intensive course is selected and split is on + React.useEffect(() => { + if (splitToRegular && isIntensiveCourse) { + setSelectedCourseIdR1(regularCourse1?.idCursos ?? ''); + setSelectedCourseIdR2(regularCourse2?.idCursos ?? ''); + } + }, [splitToRegular, isIntensiveCourse, regularCourse1, regularCourse2]); + + React.useEffect(() => { + if (!loading || !startedAt) { + return; + } + + const timer = window.setInterval(() => { + const seconds = Math.floor((Date.now() - startedAt) / 1000); + setElapsedSeconds(seconds); + }, 1000); + + return () => { + window.clearInterval(timer); + }; + }, [loading, startedAt]); + + const formatElapsed = (seconds: number): string => { + const mins = Math.floor(seconds / 60); + const secs = seconds % 60; + return `${String(mins).padStart(2, '0')}:${String(secs).padStart(2, '0')}`; + }; + + const statusText = + phase === 'uploading' + ? `Subiendo ZIP... ${uploadProgress}%` + : phase === 'processing' + ? 'Procesando contenido en servidor (esto puede tardar varios minutos para ZIPs grandes)...' + : phase === 'done' + ? 'Importacion completada' + : phase === 'error' + ? 'Importacion con error' + : 'Sin proceso activo'; const handleUpload = async () => { if (!zipFile) { @@ -43,6 +140,10 @@ export default function AdminSharedMaterialsPage() { try { setLoading(true); + setPhase('uploading'); + setUploadProgress(0); + setStartedAt(Date.now()); + setElapsedSeconds(0); setResult(null); const response = await cmsApi.importAssetsZip( zipFile, @@ -51,10 +152,19 @@ export default function AdminSharedMaterialsPage() { englishLevel || undefined, selectedPlanId || undefined, selectedCourseId || undefined, + (pct) => { + setUploadProgress(pct); + setPhase(pct >= 100 ? 'processing' : 'uploading'); + }, + splitToRegular, + selectedCourseIdR1 || undefined, + selectedCourseIdR2 || undefined, ); setResult(response); + setPhase('done'); alert('Importacion ZIP finalizada.'); } catch (error) { + setPhase('error'); console.error('ZIP import failed:', error); const msg = error instanceof Error ? error.message : 'Error al importar ZIP'; alert(msg); @@ -79,7 +189,11 @@ export default function AdminSharedMaterialsPage() {

Importar ZIP de Materiales

-

Se cargan a biblioteca compartida (sin curso especifico).

+

+ Organiza el ZIP en carpetas por unidad: Unit 1/,{' '} + Unit 2/, etc. Los audios/videos se vinculan + automaticamente a los ejercicios de su unidad. +

@@ -115,6 +229,7 @@ export default function AdminSharedMaterialsPage() { const value = e.target.value ? Number(e.target.value) : ''; setSelectedPlanId(value); setSelectedCourseId(''); + setSplitToRegular(false); }} className="w-full rounded-lg border border-slate-300 bg-white px-3 py-2 text-sm" > @@ -132,6 +247,7 @@ export default function AdminSharedMaterialsPage() { onChange={(e) => { const value = e.target.value ? Number(e.target.value) : ''; setSelectedCourseId(value); + setSplitToRegular(false); const selected = courses.find((c) => c.idCursos === value); if (selected?.NivelCurso !== undefined && selected?.NivelCurso !== null) { const n = selected.NivelCurso; @@ -153,6 +269,88 @@ export default function AdminSharedMaterialsPage() { + {/* Split to regular courses β€” only shown for intensive courses */} + {isIntensiveCourse && ( +
+ +

+ Las unidades 1..N/2 van al curso regular 1 y N/2+1..N al regular 2. + Para 8-10 unidades esto resulta en 4-5 unidades por curso regular. +

+ + {splitToRegular && ( +
+ {/* Plan regular β€” may be auto-detected or chosen manually */} +
+ + +
+ +
+
+ + +
+
+ + +
+
+
+ )} +
+ )} +