feat: enhance asset import functionality and unit tracking

- Added WHISPER_URL environment variable to docker-compose for audio transcription service.
- Updated Nginx configuration to increase timeout settings for API requests.
- Enhanced asset ingestion process to extract unit numbers from ZIP entry paths, supporting various naming conventions.
- Implemented logic to split intensive courses into two regular courses during asset import.
- Added new fields to the Asset and QuestionBank models to track unit numbers and source asset links.
- Introduced backward-compatible fallbacks for fetching study plans and courses from legacy MySQL database.
- Improved error handling and progress tracking during ZIP file uploads in the frontend.
- Created a new SQL migration to add unit_number and source_asset_id columns to the assets and question_bank tables, along with necessary indexes for performance.
This commit is contained in:
2026-04-07 13:38:22 -04:00
parent 7f9b9d69ae
commit 024bd6e46d
11 changed files with 687 additions and 101 deletions
+4
View File
@@ -60,10 +60,14 @@ EXTERNAL_ID_TIPO_NOTA=1
# AWS S3 Configuration (audio storage)
# Bucket: openccb-802726101181-us-east-2-an
# ----------------------------------------
ASSETS_STORAGE=s3
AWS_ACCESS_KEY_ID=
AWS_SECRET_ACCESS_KEY=
AWS_REGION=us-east-2
S3_BUCKET=openccb-802726101181-us-east-2-an
S3_ENDPOINT=
S3_PUBLIC_BASE_URL=
S3_FORCE_PATH_STYLE=false
# ----------------------------------------
# Mercado Pago Configuration
+17 -5
View File
@@ -79,8 +79,14 @@ trap cleanup EXIT
# Copiar archivos esenciales
echo " 📋 Copiando archivos esenciales..."
cp -r docker-compose.yml "$PROD_DIR/" 2>/dev/null || echo " ⚠️ docker-compose.yml no existe"
# NO copiar .env local - tiene configuraciones incorrectas para producción
echo " ️ .env local NO se copia - se generará uno correcto en el servidor"
# Usar .env local como fuente de verdad para producción (si existe)
if [ -f ".env" ]; then
cp .env "$PROD_DIR/.env"
echo " ✅ .env local copiado (fuente de producción)"
else
echo " ⚠️ .env local no existe; se usará .env.example como fallback"
fi
# .env.example se mantiene como plantilla/documentación
cp -r .env.example "$PROD_DIR/" 2>/dev/null || true
# NO copiar ubuntu.pem - solo se usa localmente para SSH
@@ -347,17 +353,19 @@ echo " PROTOCOL: \$PROTOCOL"
echo ""
# ========================================
# GENERAR .ENV CORRECTO PARA PRODUCCION
# RESOLVER .ENV PARA PRODUCCION
# ========================================
echo "Generando configuracion .env para produccion..."
echo "Resolviendo configuracion .env para produccion..."
if [ ! -f ".env" ]; then
echo " Creando .env desde .env.example..."
echo " .env no existe en remoto; creando desde .env.example..."
if [ -f ".env.example" ]; then
cp .env.example .env
else
touch .env
fi
else
echo " Usando .env existente (valores de produccion)"
fi
# Generar DB_PASSWORD seguro
@@ -435,6 +443,10 @@ echo "NEXT_PUBLIC_CMS_API_URL=\$CMS_URL" >> .env
echo "NEXT_PUBLIC_LMS_API_URL=\$LMS_URL" >> .env
# Configurar S3 para almacenamiento de audio
if ! grep -q "^ASSETS_STORAGE=" .env || grep -q "^ASSETS_STORAGE=$" .env; then
sed -i "/^ASSETS_STORAGE=/d" .env 2>/dev/null || true
echo "ASSETS_STORAGE=s3" >> .env
fi
if ! grep -q "^S3_BUCKET=" .env || grep -q "^S3_BUCKET=$" .env; then
sed -i "/^S3_BUCKET=/d" .env 2>/dev/null || true
echo "S3_BUCKET=openccb-802726101181-us-east-2-an" >> .env
+1
View File
@@ -88,6 +88,7 @@ services:
- DATABASE_URL=${CMS_DATABASE_URL}
- MYSQL_DATABASE_URL=${MYSQL_DATABASE_URL}
- SAM_DIAGNOSTICO_DATABASE_URL=${SAM_DIAGNOSTICO_DATABASE_URL}
- WHISPER_URL=${PROD_WHISPER_URL:-https://whisper.t-800.norteamericano.cl}
- LMS_INTERNAL_URL=http://experience:3002
- NEXT_PUBLIC_LMS_API_URL=${NEXT_PUBLIC_LMS_API_URL}
volumes:
+4 -4
View File
@@ -18,10 +18,10 @@ location /cms-api/ {
proxy_set_header X-Forwarded-Proto $http_x_forwarded_proto;
proxy_set_header Connection "";
proxy_http_version 1.1;
proxy_connect_timeout 120s;
proxy_send_timeout 3600s;
proxy_read_timeout 3600s;
send_timeout 3600s;
proxy_connect_timeout 300s;
proxy_send_timeout 7200s;
proxy_read_timeout 7200s;
send_timeout 7200s;
}
location /lms-api/ {
@@ -0,0 +1,12 @@
-- Add unit_number to assets: tracks which syllabus unit a file belongs to within a ZIP
ALTER TABLE assets ADD COLUMN IF NOT EXISTS unit_number INTEGER;
-- Link question_bank RAG chunks to their source audio/video asset
-- This allows test creation (AI + manual) to attach the audio to exercises
ALTER TABLE question_bank ADD COLUMN IF NOT EXISTS source_asset_id UUID REFERENCES assets(id) ON DELETE SET NULL;
ALTER TABLE question_bank ADD COLUMN IF NOT EXISTS unit_number INTEGER;
-- Index for fast lookup by unit
CREATE INDEX IF NOT EXISTS idx_assets_unit_number ON assets(organization_id, sam_plan_id, unit_number);
CREATE INDEX IF NOT EXISTS idx_qb_source_asset ON question_bank(source_asset_id) WHERE source_asset_id IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_qb_unit_number ON question_bank(organization_id, unit_number) WHERE unit_number IS NOT NULL;
+256 -81
View File
@@ -16,6 +16,7 @@ use serde::{Deserialize, Serialize};
use serde_json::json;
use sqlx::PgPool;
use uuid::Uuid;
use std::collections::HashMap;
use std::env;
use std::path::Path as StdPath;
use tokio::process::Command;
@@ -585,6 +586,9 @@ pub async fn ingest_asset_for_rag(
&client,
&ollama_url,
&model,
None,
None,
asset.unit_number,
)
.await?;
@@ -601,6 +605,38 @@ pub async fn ingest_asset_for_rag(
/// - file: ZIP requerido
/// - course_id: UUID opcional
/// - ingest_rag: true/false opcional (default false)
/// Extracts a unit number from a ZIP entry path using the top-level folder name.
/// Supports: "Unit 1/...", "Unidad 1/...", "unit-01/...", "01/...", "1/..."
fn extract_unit_number(entry_name: &str) -> Option<i32> {
let parts: Vec<&str> = entry_name.splitn(2, '/').collect();
if parts.len() < 2 {
return None; // file at ZIP root — no unit folder
}
let folder = parts[0].trim();
if folder.is_empty() {
return None;
}
let lower = folder.to_lowercase();
// Strip common textual prefixes, then parse leading digits
let stripped = lower
.trim_start_matches("unidad")
.trim_start_matches("unit")
.trim_start_matches('u')
.trim_start_matches(|c: char| !c.is_ascii_digit());
let digits: String = stripped.chars().take_while(|c| c.is_ascii_digit()).collect();
digits.parse().ok()
}
struct ZipEntryData {
entry_name: String,
safe_filename: String,
content: Vec<u8>,
unit_number: Option<i32>,
guessed_mimetype: String,
is_audio_video: bool,
is_flv: bool,
}
pub async fn import_assets_zip(
Org(org_ctx): Org,
claims: Claims,
@@ -613,6 +649,9 @@ pub async fn import_assets_zip(
let mut sam_plan_id: Option<i32> = None;
let mut sam_course_id: Option<i32> = None;
let mut ingest_rag = false;
let mut split_to_regular = false;
let mut sam_course_id_r1: Option<i32> = None;
let mut sam_course_id_r2: Option<i32> = None;
while let Some(mut field) = multipart
.next_field()
@@ -678,6 +717,23 @@ pub async fn import_assets_zip(
sam_course_id = Some(id);
}
}
} else if name == "split_to_regular" {
if let Ok(txt) = field.text().await {
let v = txt.trim().to_lowercase();
split_to_regular = v == "1" || v == "true" || v == "yes";
}
} else if name == "sam_course_id_r1" {
if let Ok(txt) = field.text().await {
if let Ok(id) = txt.trim().parse::<i32>() {
sam_course_id_r1 = Some(id);
}
}
} else if name == "sam_course_id_r2" {
if let Ok(txt) = field.text().await {
if let Ok(id) = txt.trim().parse::<i32>() {
sam_course_id_r2 = Some(id);
}
}
}
}
@@ -699,11 +755,84 @@ pub async fn import_assets_zip(
return Err((StatusCode::BAD_REQUEST, "No ZIP file uploaded".to_string()));
}
// ── Phase 1: collect all ZIP entries into memory ──────────────────────────
let mut all_entries: Vec<ZipEntryData> = Vec::new();
let mut unit_set: std::collections::BTreeSet<i32> = Default::default();
let len = archive.len();
for i in 0..len {
let mut file = archive
.by_index(i)
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("ZIP read error: {}", e)))?;
if !file.is_file() {
continue;
}
let entry_name = file.name().to_string();
if entry_name.starts_with("__MACOSX/") || entry_name.ends_with(".DS_Store") {
continue;
}
let safe_filename = StdPath::new(&entry_name)
.file_name()
.and_then(|s| s.to_str())
.unwrap_or("unnamed")
.to_string();
let mut content = Vec::new();
std::io::Read::read_to_end(&mut file, &mut content)
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("ZIP entry read failed: {}", e)))?;
let guessed_mimetype = mime_guess::from_path(&safe_filename)
.first_or_octet_stream()
.to_string();
let is_flv = is_flv_media(&safe_filename, &guessed_mimetype);
let is_audio_video = is_flv
|| guessed_mimetype.starts_with("audio/")
|| guessed_mimetype.starts_with("video/");
let unit_number = extract_unit_number(&entry_name);
if let Some(u) = unit_number {
unit_set.insert(u);
}
all_entries.push(ZipEntryData {
entry_name,
safe_filename,
content,
unit_number,
guessed_mimetype,
is_audio_video,
is_flv,
});
}
// ── Phase 1b: calculate split midpoint (intensive → 2 regular courses) ───
// For 8-10 units: first half → regular 1, second half → regular 2.
// Mid is the last unit number that goes to regular 1 (ceiling of N/2).
let split_midpoint: Option<i32> = if split_to_regular
&& sam_course_id_r1.is_some()
&& sam_course_id_r2.is_some()
&& !unit_set.is_empty()
{
let units: Vec<i32> = unit_set.iter().cloned().collect();
let mid_idx = (units.len() + 1) / 2; // ceiling: 8 → 4, 9 → 5, 10 → 5
Some(units[mid_idx - 1])
} else {
None
};
// Sort: audio/video first so their asset IDs are known when text is ingested
all_entries.sort_by_key(|e| if e.is_audio_video { 0usize } else { 1 });
// ── Phase 2: process entries ───────────────────────────────────────────────
let mut imported_assets = 0usize;
let mut rag_ingested_assets = 0usize;
let mut rag_chunks_ingested = 0usize;
let mut failed_entries: Vec<String> = Vec::new();
// unit_number → (asset_id, public_url): populated from audio/video assets
let mut unit_audio_map: HashMap<i32, (Uuid, String)> = HashMap::new();
let rag_client = if ingest_rag {
Some(
reqwest::Client::builder()
@@ -718,41 +847,28 @@ pub async fn import_assets_zip(
let ollama_url = ai::get_ollama_url();
let model = ai::get_embedding_model();
let len = archive.len();
for i in 0..len {
let (entry_name, safe_filename, content): (String, String, Vec<u8>) = {
let mut file = archive
.by_index(i)
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("ZIP read error: {}", e)))?;
for entry in all_entries {
let ZipEntryData {
entry_name,
safe_filename,
content,
unit_number,
guessed_mimetype,
is_audio_video,
is_flv,
} = entry;
if !file.is_file() {
continue;
// Determine effective sam_course_id based on split midpoint
let effective_sam_course_id = match (split_midpoint, unit_number) {
(Some(mid), Some(u)) => {
if u <= mid { sam_course_id_r1 } else { sam_course_id_r2 }
}
let entry_name = file.name().to_string();
if entry_name.starts_with("__MACOSX/") || entry_name.ends_with(".DS_Store") {
continue;
}
let safe_filename = std::path::Path::new(&entry_name)
.file_name()
.and_then(|s| s.to_str())
.unwrap_or("unnamed")
.to_string();
let mut content = Vec::new();
std::io::Read::read_to_end(&mut file, &mut content)
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("ZIP entry read failed: {}", e)))?;
(entry_name, safe_filename, content)
_ => sam_course_id,
};
let asset_id = Uuid::new_v4();
let guessed_mimetype = mime_guess::from_path(&safe_filename)
.first_or_octet_stream()
.to_string();
let (storage_path, stored_filename, mimetype) = if is_flv_media(&safe_filename, &guessed_mimetype) {
let (storage_path, stored_filename, mimetype) = if is_flv {
let temp_storage_filename = format!("{}.flv", asset_id);
let temp_storage_path = format!("uploads/{}", temp_storage_filename);
tokio::fs::create_dir_all("uploads")
@@ -801,7 +917,7 @@ pub async fn import_assets_zip(
.unwrap_or("")
.to_string();
let (db_storage_path, _asset_url) = if !storage_filename_for_s3.is_empty() {
let (db_storage_path, asset_public_url) = if !storage_filename_for_s3.is_empty() {
if let Some((s3_path, public_url)) = maybe_push_local_file_to_s3(
&storage_path,
&storage_filename_for_s3,
@@ -834,8 +950,8 @@ pub async fn import_assets_zip(
let insert_result = sqlx::query(
r#"
INSERT INTO assets (id, organization_id, uploaded_by, course_id, english_level, sam_plan_id, sam_course_id, filename, storage_path, mimetype, size_bytes)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)
INSERT INTO assets (id, organization_id, uploaded_by, course_id, english_level, sam_plan_id, sam_course_id, unit_number, filename, storage_path, mimetype, size_bytes)
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
"#,
)
.bind(asset_id)
@@ -844,7 +960,8 @@ pub async fn import_assets_zip(
.bind(course_id)
.bind(&english_level)
.bind(sam_plan_id)
.bind(sam_course_id)
.bind(effective_sam_course_id)
.bind(unit_number)
.bind(&stored_filename)
.bind(&db_storage_path)
.bind(&mimetype)
@@ -859,6 +976,13 @@ pub async fn import_assets_zip(
imported_assets += 1;
// Track audio/video asset per unit for cross-linking with text RAG chunks
if is_audio_video {
if let Some(u) = unit_number {
unit_audio_map.entry(u).or_insert((asset_id, asset_public_url.clone()));
}
}
if ingest_rag {
let asset = Asset {
id: asset_id,
@@ -867,7 +991,8 @@ pub async fn import_assets_zip(
course_id,
english_level: english_level.clone(),
sam_plan_id,
sam_course_id,
sam_course_id: effective_sam_course_id,
unit_number,
filename: stored_filename.clone(),
storage_path: db_storage_path.clone(),
mimetype: mimetype.clone(),
@@ -875,6 +1000,16 @@ pub async fn import_assets_zip(
created_at: chrono::Utc::now(),
};
// For text/PDF entries, look up the audio asset from the same unit
let (linked_audio_id, linked_audio_url) = if !is_audio_video {
match unit_number.and_then(|u| unit_audio_map.get(&u)) {
Some((aid, aurl)) => (Some(*aid), Some(aurl.clone())),
None => (None, None),
}
} else {
(None, None)
};
match extract_asset_text(&asset).await {
Ok(extracted) => {
let trimmed = extracted.trim();
@@ -889,7 +1024,7 @@ pub async fn import_assets_zip(
continue;
}
let source_kind = if mimetype.starts_with("audio/") || mimetype.starts_with("video/") {
let source_kind = if is_audio_video {
"audio-transcription"
} else if mimetype.contains("pdf") {
"pdf"
@@ -897,7 +1032,7 @@ pub async fn import_assets_zip(
"text"
};
let skill = if mimetype.starts_with("audio/") || mimetype.starts_with("video/") {
let skill = if is_audio_video {
Some("listening")
} else {
Some("reading")
@@ -915,6 +1050,9 @@ pub async fn import_assets_zip(
client,
&ollama_url,
&model,
linked_audio_id,
linked_audio_url,
unit_number,
)
.await
{
@@ -1007,6 +1145,9 @@ async fn ingest_chunks_to_question_bank(
client: &reqwest::Client,
ollama_url: &str,
model: &str,
source_asset_id: Option<Uuid>,
audio_url: Option<String>,
unit_number: Option<i32>,
) -> Result<(), (StatusCode, String)> {
for (idx, chunk) in chunks.iter().enumerate() {
let metadata = json!({
@@ -1017,6 +1158,7 @@ async fn ingest_chunks_to_question_bank(
"source_kind": source_kind,
"chunk_index": idx + 1,
"chunk_total": chunks.len(),
"unit_number": unit_number,
});
let inserted_id: Uuid = sqlx::query_scalar(
@@ -1031,10 +1173,13 @@ async fn ingest_chunks_to_question_bank(
skill_assessed,
source,
source_metadata,
source_asset_id,
audio_url,
unit_number,
is_active,
is_archived
)
VALUES ($1, $2, $3, 'short-answer', $4, 'medium', $5, 'imported-material', $6, true, false)
VALUES ($1, $2, $3, 'short-answer', $4, 'medium', $5, 'imported-material', $6, $7, $8, $9, true, false)
RETURNING id
"#,
)
@@ -1044,6 +1189,9 @@ async fn ingest_chunks_to_question_bank(
.bind("RAG material chunk from uploaded asset")
.bind(skill)
.bind(&metadata)
.bind(source_asset_id)
.bind(&audio_url)
.bind(unit_number)
.fetch_one(pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Insert failed: {}", e)))?;
@@ -1139,54 +1287,81 @@ async fn extract_pdf_text_from_bytes(bytes: Vec<u8>) -> Result<String, (StatusCo
}
async fn transcribe_media_bytes(file_data: Vec<u8>, filename: &str) -> Result<String, (StatusCode, String)> {
let whisper_url = std::env::var("WHISPER_URL")
.unwrap_or_else(|_| "http://localhost:8000".to_string());
let client = reqwest::Client::new();
let form = reqwest::multipart::Form::new()
.part(
"file",
reqwest::multipart::Part::bytes(file_data).file_name(filename.to_string()),
)
.text("model", "whisper-1")
.text("response_format", "json");
let response = client
.post(format!("{}/v1/audio/transcriptions", whisper_url))
.multipart(form)
.send()
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Whisper request failed: {}", e)))?;
if !response.status().is_success() {
let status = response.status();
let body = response.text().await.unwrap_or_default();
return Err((
StatusCode::BAD_GATEWAY,
format!("Whisper API error {}: {}", status, body),
));
let mut whisper_urls: Vec<String> = Vec::new();
if let Ok(url) = std::env::var("WHISPER_URL") {
let trimmed = url.trim();
if !trimmed.is_empty() {
whisper_urls.push(trimmed.trim_end_matches('/').to_string());
}
}
let transcription: serde_json::Value = response
.json()
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Invalid Whisper response: {}", e)))?;
let text = transcription
.get("text")
.and_then(|v| v.as_str())
.unwrap_or("")
.trim()
.to_string();
if text.is_empty() {
return Err((
StatusCode::BAD_REQUEST,
"Whisper no pudo extraer texto del audio/video".to_string(),
));
// Container-friendly fallbacks for common local deployments.
if whisper_urls.is_empty() {
whisper_urls.push("http://host.docker.internal:8000".to_string());
whisper_urls.push("http://localhost:8000".to_string());
}
Ok(text)
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(300))
.build()
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Whisper HTTP client error: {}", e)))?;
let mut last_error = String::new();
for base_url in whisper_urls {
let form = reqwest::multipart::Form::new()
.part(
"file",
reqwest::multipart::Part::bytes(file_data.clone()).file_name(filename.to_string()),
)
.text("model", "whisper-1")
.text("response_format", "json");
let endpoint = format!("{}/v1/audio/transcriptions", base_url);
let response = match client.post(&endpoint).multipart(form).send().await {
Ok(r) => r,
Err(e) => {
last_error = format!("{} ({})", endpoint, e);
continue;
}
};
if !response.status().is_success() {
let status = response.status();
let body = response.text().await.unwrap_or_default();
last_error = format!("{} -> {}: {}", endpoint, status, body);
continue;
}
let transcription: serde_json::Value = response
.json()
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Invalid Whisper response: {}", e)))?;
let text = transcription
.get("text")
.and_then(|v| v.as_str())
.unwrap_or("")
.trim()
.to_string();
if text.is_empty() {
return Err((
StatusCode::BAD_REQUEST,
"Whisper no pudo extraer texto del audio/video".to_string(),
));
}
return Ok(text);
}
Err((
StatusCode::BAD_GATEWAY,
format!(
"Whisper request failed en todos los endpoints configurados. Ultimo error: {}",
last_error
),
))
}
fn chunk_text(text: &str, max_chars: usize) -> Vec<String> {
@@ -864,7 +864,7 @@ pub async fn get_mysql_plans(
State(pool): State<PgPool>,
) -> Result<Json<Vec<MySqlPlanInfo>>, (StatusCode, String)> {
// Read from SAM mirror in PostgreSQL with SAM-native fields.
let plans: Vec<MySqlPlanInfo> = sqlx::query_as(
let mut plans: Vec<MySqlPlanInfo> = sqlx::query_as(
r#"
SELECT
idPlanDeEstudios AS id_plan_de_estudios,
@@ -879,6 +879,111 @@ pub async fn get_mysql_plans(
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to fetch plans: {}", e)))?;
// Backward-compatible fallback: if SAM mirror is empty, use legacy metadata mirror.
if plans.is_empty() {
plans = sqlx::query_as(
r#"
SELECT
mysql_id AS id_plan_de_estudios,
name AS nombre_plan
FROM mysql_study_plans
WHERE organization_id = $1 AND is_active = TRUE
ORDER BY name
"#,
)
.bind(org_ctx.id)
.fetch_all(&pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to fetch legacy plans: {}", e)))?;
}
// Last-resort auto-sync: if still empty, pull metadata from MySQL and persist it.
if plans.is_empty() {
match connect_mysql_pool("MYSQL_DATABASE_URL").await {
Ok(mysql_pool) => {
let mysql_plans: Result<Vec<MySqlPlanInfo>, sqlx::Error> = sqlx::query_as(
r#"
SELECT DISTINCT
pe.idPlanDeEstudios AS id_plan_de_estudios,
pe.Nombre AS nombre_plan
FROM plandeestudios pe
WHERE pe.Activo = 1
ORDER BY pe.Nombre
"#,
)
.fetch_all(&mysql_pool)
.await;
let mysql_courses: Result<Vec<MySqlCourseInfo>, sqlx::Error> = sqlx::query_as(
r#"
SELECT DISTINCT
c.idCursos AS id_cursos,
c.NombreCurso AS nombre_curso,
c.NivelCurso AS nivel_curso,
pe.idPlanDeEstudios AS id_plan_de_estudios,
pe.Nombre AS nombre_plan,
c.Duracion AS duracion
FROM curso c
JOIN plandeestudios pe ON c.idPlanDeEstudios = pe.idPlanDeEstudios
WHERE c.Activo = 1
AND pe.Activo = 1
ORDER BY pe.Nombre, c.NivelCurso
"#,
)
.fetch_all(&mysql_pool)
.await;
match (mysql_plans, mysql_courses) {
(Ok(p), Ok(c)) => {
if let Err(err) = save_mysql_courses_and_plans(&pool, org_ctx.id, p, c).await {
tracing::warn!("Auto-sync MySQL metadata failed: {}", err);
}
}
(Err(e), _) => tracing::warn!("Auto-sync plans query failed: {}", e),
(_, Err(e)) => tracing::warn!("Auto-sync courses query failed: {}", e),
}
mysql_pool.close().await;
}
Err(e) => {
tracing::warn!("Auto-sync could not connect to MySQL: {:?}", e);
}
}
// Reload plans after auto-sync attempt.
plans = sqlx::query_as(
r#"
SELECT
idPlanDeEstudios AS id_plan_de_estudios,
Nombre AS nombre_plan
FROM sam_study_plans
WHERE organization_id = $1 AND Activo = TRUE
ORDER BY Nombre
"#,
)
.bind(org_ctx.id)
.fetch_all(&pool)
.await
.unwrap_or_default();
if plans.is_empty() {
plans = sqlx::query_as(
r#"
SELECT
mysql_id AS id_plan_de_estudios,
name AS nombre_plan
FROM mysql_study_plans
WHERE organization_id = $1 AND is_active = TRUE
ORDER BY name
"#,
)
.bind(org_ctx.id)
.fetch_all(&pool)
.await
.unwrap_or_default();
}
}
Ok(Json(plans))
}
@@ -889,7 +994,7 @@ pub async fn get_mysql_courses_by_plan(
Query(filters): Query<MySqlCoursesFilters>,
) -> Result<Json<Vec<MySqlCourseInfo>>, (StatusCode, String)> {
// Read from SAM mirror in PostgreSQL with SAM-native fields.
let courses: Vec<MySqlCourseInfo> = sqlx::query_as(
let mut courses: Vec<MySqlCourseInfo> = sqlx::query_as(
r#"
SELECT
c.idCursos AS id_cursos,
@@ -915,6 +1020,33 @@ pub async fn get_mysql_courses_by_plan(
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to fetch courses: {}", e)))?;
// Backward-compatible fallback: if SAM mirror is empty, use legacy metadata mirror.
if courses.is_empty() {
courses = sqlx::query_as(
r#"
SELECT
c.mysql_id AS id_cursos,
c.name AS nombre_curso,
c.level AS nivel_curso,
sp.mysql_id AS id_plan_de_estudios,
sp.name AS nombre_plan,
c.duracion::double precision AS duracion
FROM mysql_courses c
JOIN mysql_study_plans sp ON c.study_plan_id = sp.id
WHERE c.organization_id = $1
AND c.is_active = TRUE
AND sp.is_active = TRUE
AND sp.mysql_id = $2
ORDER BY c.level
"#,
)
.bind(org_ctx.id)
.bind(filters.plan_id)
.fetch_all(&pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Failed to fetch legacy courses: {}", e)))?;
}
Ok(Json(courses))
}
+3
View File
@@ -347,6 +347,7 @@ pub struct Asset {
pub english_level: Option<String>,
pub sam_plan_id: Option<i32>,
pub sam_course_id: Option<i32>,
pub unit_number: Option<i32>,
pub filename: String,
pub storage_path: String,
pub mimetype: String,
@@ -1402,6 +1403,8 @@ pub struct QuestionBank {
pub updated_at: chrono::DateTime<chrono::Utc>,
pub embedding: Option<String>, // PGVector embedding for semantic search
pub embedding_updated_at: Option<chrono::DateTime<chrono::Utc>>,
pub source_asset_id: Option<Uuid>, // audio/video asset that originated this RAG chunk
pub unit_number: Option<i32>, // syllabus unit number from ZIP folder structure
}
#[derive(Debug, Serialize, Deserialize, Clone)]
+7 -2
View File
@@ -37,8 +37,13 @@ FROM node:20-slim AS runner
WORKDIR /app
ENV NODE_ENV production
# Install system dependencies for Rust binary
RUN apt-get update && apt-get install -y openssl ca-certificates && rm -rf /var/lib/apt/lists/*
# Install system dependencies for Rust binary and asset processing
RUN apt-get update && apt-get install -y \
openssl \
ca-certificates \
ffmpeg \
poppler-utils \
&& rm -rf /var/lib/apt/lists/*
# Install sharp for Next.js image optimization
RUN --mount=type=cache,target=/root/.npm npm install sharp
+223 -4
View File
@@ -2,7 +2,7 @@
import React, { useMemo, useState } from 'react';
import { cmsApi, questionBankApi, MySqlPlan, MySqlCourse } from '@/lib/api';
import { Upload, Database, FileArchive, CheckCircle2, AlertTriangle } from 'lucide-react';
import { Upload, Database, FileArchive, CheckCircle2, AlertTriangle, Scissors } from 'lucide-react';
export default function AdminSharedMaterialsPage() {
const [zipFile, setZipFile] = useState<File | null>(null);
@@ -12,7 +12,16 @@ export default function AdminSharedMaterialsPage() {
const [courses, setCourses] = useState<MySqlCourse[]>([]);
const [selectedPlanId, setSelectedPlanId] = useState<number | ''>('');
const [selectedCourseId, setSelectedCourseId] = useState<number | ''>('');
const [splitToRegular, setSplitToRegular] = useState(false);
const [regularPlanId, setRegularPlanId] = useState<number | ''>('');
const [regularCourses, setRegularCourses] = useState<MySqlCourse[]>([]);
const [selectedCourseIdR1, setSelectedCourseIdR1] = useState<number | ''>('');
const [selectedCourseIdR2, setSelectedCourseIdR2] = useState<number | ''>('');
const [loading, setLoading] = useState(false);
const [uploadProgress, setUploadProgress] = useState(0);
const [phase, setPhase] = useState<'idle' | 'uploading' | 'processing' | 'done' | 'error'>('idle');
const [startedAt, setStartedAt] = useState<number | null>(null);
const [elapsedSeconds, setElapsedSeconds] = useState(0);
const [result, setResult] = useState<{
imported_assets: number;
rag_ingested_assets: number;
@@ -22,6 +31,32 @@ export default function AdminSharedMaterialsPage() {
const canUpload = useMemo(() => Boolean(zipFile) && !loading, [zipFile, loading]);
// Detect if the selected course is "intensive" (no trailing digit) and auto-detect
// the two corresponding regular courses (same name + " 1" and " 2").
const selectedCourseName = useMemo(
() => courses.find((c) => c.idCursos === selectedCourseId)?.NombreCurso ?? '',
[courses, selectedCourseId],
);
const isIntensiveCourse = useMemo(
() => Boolean(selectedCourseId) && !/\s*[12]$/.test(selectedCourseName.trim()),
[selectedCourseId, selectedCourseName],
);
// Suggested regular course names: replace "INTENSIVE" with nothing or trim trailing "INTENSIVE"
const regularBaseName = useMemo(() => {
const name = selectedCourseName.trim();
return name.replace(/\s*INTENSIVE\s*$/i, '').trim();
}, [selectedCourseName]);
const regularCourse1 = useMemo(
() => regularCourses.find((c) => c.NombreCurso.trim() === `${regularBaseName} 1`)
?? regularCourses.find((c) => /\s1$/.test(c.NombreCurso.trim())),
[regularCourses, regularBaseName],
);
const regularCourse2 = useMemo(
() => regularCourses.find((c) => c.NombreCurso.trim() === `${regularBaseName} 2`)
?? regularCourses.find((c) => /\s2$/.test(c.NombreCurso.trim())),
[regularCourses, regularBaseName],
);
React.useEffect(() => {
questionBankApi.getMySQLPlans().then(setPlans).catch(() => setPlans([]));
}, []);
@@ -30,10 +65,72 @@ export default function AdminSharedMaterialsPage() {
if (!selectedPlanId) {
setCourses([]);
setSelectedCourseId('');
setSplitToRegular(false);
setRegularPlanId('');
setRegularCourses([]);
return;
}
questionBankApi.getMySQLCoursesByPlan(selectedPlanId).then(setCourses).catch(() => setCourses([]));
}, [selectedPlanId]);
// Auto-detect sibling regular plan (swap INTENSIVO <-> REGULAR in plan name)
const intensivePlan = plans.find((p) => p.idPlanDeEstudios === selectedPlanId);
if (intensivePlan) {
const regularPlanName = intensivePlan.NombrePlan.replace(/INTENSIVO/i, 'REGULAR').trim();
const sibling = plans.find((p) => p.NombrePlan.toUpperCase() === regularPlanName.toUpperCase());
if (sibling) {
setRegularPlanId(sibling.idPlanDeEstudios);
questionBankApi.getMySQLCoursesByPlan(sibling.idPlanDeEstudios).then(setRegularCourses).catch(() => setRegularCourses([]));
} else {
setRegularPlanId('');
setRegularCourses([]);
}
}
}, [selectedPlanId, plans]);
// Load courses for manually selected regular plan
React.useEffect(() => {
if (!regularPlanId) return;
questionBankApi.getMySQLCoursesByPlan(regularPlanId).then(setRegularCourses).catch(() => setRegularCourses([]));
}, [regularPlanId]);
// Auto-fill regular course IDs when intensive course is selected and split is on
React.useEffect(() => {
if (splitToRegular && isIntensiveCourse) {
setSelectedCourseIdR1(regularCourse1?.idCursos ?? '');
setSelectedCourseIdR2(regularCourse2?.idCursos ?? '');
}
}, [splitToRegular, isIntensiveCourse, regularCourse1, regularCourse2]);
React.useEffect(() => {
if (!loading || !startedAt) {
return;
}
const timer = window.setInterval(() => {
const seconds = Math.floor((Date.now() - startedAt) / 1000);
setElapsedSeconds(seconds);
}, 1000);
return () => {
window.clearInterval(timer);
};
}, [loading, startedAt]);
const formatElapsed = (seconds: number): string => {
const mins = Math.floor(seconds / 60);
const secs = seconds % 60;
return `${String(mins).padStart(2, '0')}:${String(secs).padStart(2, '0')}`;
};
const statusText =
phase === 'uploading'
? `Subiendo ZIP... ${uploadProgress}%`
: phase === 'processing'
? 'Procesando contenido en servidor (esto puede tardar varios minutos para ZIPs grandes)...'
: phase === 'done'
? 'Importacion completada'
: phase === 'error'
? 'Importacion con error'
: 'Sin proceso activo';
const handleUpload = async () => {
if (!zipFile) {
@@ -43,6 +140,10 @@ export default function AdminSharedMaterialsPage() {
try {
setLoading(true);
setPhase('uploading');
setUploadProgress(0);
setStartedAt(Date.now());
setElapsedSeconds(0);
setResult(null);
const response = await cmsApi.importAssetsZip(
zipFile,
@@ -51,10 +152,19 @@ export default function AdminSharedMaterialsPage() {
englishLevel || undefined,
selectedPlanId || undefined,
selectedCourseId || undefined,
(pct) => {
setUploadProgress(pct);
setPhase(pct >= 100 ? 'processing' : 'uploading');
},
splitToRegular,
selectedCourseIdR1 || undefined,
selectedCourseIdR2 || undefined,
);
setResult(response);
setPhase('done');
alert('Importacion ZIP finalizada.');
} catch (error) {
setPhase('error');
console.error('ZIP import failed:', error);
const msg = error instanceof Error ? error.message : 'Error al importar ZIP';
alert(msg);
@@ -79,7 +189,11 @@ export default function AdminSharedMaterialsPage() {
</div>
<div>
<h2 className="font-bold text-slate-900 dark:text-white">Importar ZIP de Materiales</h2>
<p className="text-xs text-slate-500 dark:text-gray-500">Se cargan a biblioteca compartida (sin curso especifico).</p>
<p className="text-xs text-slate-500 dark:text-gray-500">
Organiza el ZIP en carpetas por unidad: <code className="bg-slate-100 px-1 rounded">Unit 1/</code>,{' '}
<code className="bg-slate-100 px-1 rounded">Unit 2/</code>, etc. Los audios/videos se vinculan
automaticamente a los ejercicios de su unidad.
</p>
</div>
</div>
@@ -115,6 +229,7 @@ export default function AdminSharedMaterialsPage() {
const value = e.target.value ? Number(e.target.value) : '';
setSelectedPlanId(value);
setSelectedCourseId('');
setSplitToRegular(false);
}}
className="w-full rounded-lg border border-slate-300 bg-white px-3 py-2 text-sm"
>
@@ -132,6 +247,7 @@ export default function AdminSharedMaterialsPage() {
onChange={(e) => {
const value = e.target.value ? Number(e.target.value) : '';
setSelectedCourseId(value);
setSplitToRegular(false);
const selected = courses.find((c) => c.idCursos === value);
if (selected?.NivelCurso !== undefined && selected?.NivelCurso !== null) {
const n = selected.NivelCurso;
@@ -153,6 +269,88 @@ export default function AdminSharedMaterialsPage() {
</select>
</div>
{/* Split to regular courses — only shown for intensive courses */}
{isIntensiveCourse && (
<div className="rounded-lg border border-indigo-200 bg-indigo-50 p-4 space-y-4">
<label className="flex items-center gap-3 text-sm text-indigo-900">
<input
type="checkbox"
checked={splitToRegular}
onChange={(e) => setSplitToRegular(e.target.checked)}
/>
<Scissors className="w-4 h-4 flex-shrink-0" />
<span className="font-medium">
Dividir unidades en 2 cursos regulares (intensivo = regular 1 + regular 2)
</span>
</label>
<p className="text-xs text-indigo-700 ml-7">
Las unidades 1..N/2 van al curso regular 1 y N/2+1..N al regular 2.
Para 8-10 unidades esto resulta en 4-5 unidades por curso regular.
</p>
{splitToRegular && (
<div className="ml-7 space-y-4">
{/* Plan regular — may be auto-detected or chosen manually */}
<div className="space-y-1">
<label className="block text-xs font-medium text-indigo-800">
Plan de Estudios Regular
</label>
<select
value={regularPlanId}
onChange={(e) => {
const v = e.target.value ? Number(e.target.value) : '';
setRegularPlanId(v);
setSelectedCourseIdR1('');
setSelectedCourseIdR2('');
}}
className="w-full rounded-lg border border-indigo-300 bg-white px-3 py-2 text-sm"
>
<option value="">Seleccionar plan regular</option>
{plans.map((p) => (
<option key={p.idPlanDeEstudios} value={p.idPlanDeEstudios}>{p.NombrePlan}</option>
))}
</select>
</div>
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
<div className="space-y-1">
<label className="block text-xs font-medium text-indigo-800">
Curso Regular 1 (unidades 1..N/2)
</label>
<select
value={selectedCourseIdR1}
onChange={(e) => setSelectedCourseIdR1(e.target.value ? Number(e.target.value) : '')}
disabled={!regularPlanId}
className="w-full rounded-lg border border-indigo-300 bg-white px-3 py-2 text-sm disabled:opacity-60"
>
<option value="">Seleccionar</option>
{regularCourses.map((c) => (
<option key={c.idCursos} value={c.idCursos}>{c.NombreCurso}</option>
))}
</select>
</div>
<div className="space-y-1">
<label className="block text-xs font-medium text-indigo-800">
Curso Regular 2 (unidades N/2+1..N)
</label>
<select
value={selectedCourseIdR2}
onChange={(e) => setSelectedCourseIdR2(e.target.value ? Number(e.target.value) : '')}
disabled={!regularPlanId}
className="w-full rounded-lg border border-indigo-300 bg-white px-3 py-2 text-sm disabled:opacity-60"
>
<option value="">Seleccionar</option>
{regularCourses.map((c) => (
<option key={c.idCursos} value={c.idCursos}>{c.NombreCurso}</option>
))}
</select>
</div>
</div>
</div>
)}
</div>
)}
<div className="space-y-2">
<label className="block text-sm font-medium text-slate-700 dark:text-gray-300">Nivel de Ingles para este ZIP</label>
<select
@@ -182,6 +380,27 @@ export default function AdminSharedMaterialsPage() {
<Upload className="w-4 h-4" />
{loading ? 'Importando...' : 'Importar ZIP Compartido'}
</button>
{(loading || phase === 'done' || phase === 'error') && (
<div className="rounded-lg border border-slate-200 bg-slate-50 p-4 space-y-3">
<div className="flex items-center justify-between text-sm">
<span className="font-medium text-slate-800">Estado del proceso</span>
<span className="text-slate-600">Tiempo: {formatElapsed(elapsedSeconds)}</span>
</div>
<div className="w-full h-2 rounded-full bg-slate-200 overflow-hidden">
<div
className="h-full bg-indigo-600 transition-all duration-300"
style={{ width: `${phase === 'processing' ? 100 : uploadProgress}%` }}
/>
</div>
<p className="text-sm text-slate-700">{statusText}</p>
<p className="text-xs text-slate-500">
Nota: esta importacion ZIP corre en la misma solicitud (no crea fila en Tasks), por eso aqui ves el estado en vivo.
</p>
</div>
)}
</div>
{result && (
@@ -228,4 +447,4 @@ export default function AdminSharedMaterialsPage() {
)}
</div>
);
}
}
+26 -3
View File
@@ -974,6 +974,10 @@ export const cmsApi = {
englishLevel?: string,
samPlanId?: number,
samCourseId?: number,
onProgress?: (pct: number) => void,
splitToRegular = false,
samCourseIdR1?: number,
samCourseIdR2?: number,
): Promise<AssetZipImportResult> => {
return new Promise((resolve, reject) => {
const formData = new FormData();
@@ -983,6 +987,11 @@ export const cmsApi = {
if (englishLevel) formData.append('english_level', englishLevel);
if (samPlanId) formData.append('sam_plan_id', String(samPlanId));
if (samCourseId) formData.append('sam_course_id', String(samCourseId));
if (splitToRegular) {
formData.append('split_to_regular', 'true');
if (samCourseIdR1) formData.append('sam_course_id_r1', String(samCourseIdR1));
if (samCourseIdR2) formData.append('sam_course_id_r2', String(samCourseIdR2));
}
const xhr = new XMLHttpRequest();
xhr.open('POST', `${API_BASE_URL}/api/assets/import-zip`);
@@ -996,15 +1005,29 @@ export const cmsApi = {
if (xhr.status >= 200 && xhr.status < 300) {
resolve(JSON.parse(xhr.responseText));
} else {
let msg = 'ZIP import failed';
let msg = `ZIP import failed (HTTP ${xhr.status})`;
try {
msg = JSON.parse(xhr.responseText).message || msg;
} catch { }
const parsed = JSON.parse(xhr.responseText);
msg = parsed.message || parsed.error || msg;
} catch {
const raw = (xhr.responseText || '').trim();
if (raw) {
const compact = raw.replace(/\s+/g, ' ').slice(0, 240);
msg = `${msg}: ${compact}`;
}
}
reject(new Error(msg));
}
};
xhr.onerror = () => reject(new Error('Network error'));
if (onProgress) {
xhr.upload.onprogress = (event) => {
if (!event.lengthComputable) return;
const pct = Math.round((event.loaded / event.total) * 100);
onProgress(Math.max(0, Math.min(100, pct)));
};
}
xhr.send(formData);
});
},