chore: update code structure for improved readability and maintainability

This commit is contained in:
2026-04-24 10:16:45 -04:00
parent e72f479639
commit 466f74b717
6 changed files with 377 additions and 8 deletions
+247 -3
View File
@@ -62,22 +62,36 @@ pub struct ReviewAiLogResponse {
pub reviewed: bool,
}
/// Peso de cada señal para calcular el risk_score ponderado.
/// Señales de mayor impacto tienen peso > 1.
fn signal_weight(signal: &str) -> i32 {
match signal {
"sensitive_data_mention" => 3,
"url_fabrication" | "citation_without_rag" | "knowledge_disclaimer" => 2,
_ => 1,
}
}
fn risk_signals_from_log(response: &str, has_rag_context: bool, output_tokens: i32) -> Vec<String> {
let mut signals = Vec::new();
let response_lc = response.to_lowercase();
// ── Señal 1: ausencia de contexto RAG ────────────────────────────────────
if !has_rag_context {
signals.push("missing_rag_context".to_string());
}
// ── Señal 2: tokens de salida excesivos ──────────────────────────────────
if output_tokens >= 900 {
signals.push("high_output_tokens".to_string());
}
// ── Señal 3: respuesta muy larga en caracteres ───────────────────────────
if response.chars().count() >= 2200 {
signals.push("long_response".to_string());
}
// ── Señal 4: lenguaje de certeza absoluta ────────────────────────────────
let absolute_claim_markers = [
"siempre",
"nunca",
@@ -85,8 +99,10 @@ fn risk_signals_from_log(response: &str, has_rag_context: bool, output_tokens: i
"sin duda",
"100%",
"completamente seguro",
"es un hecho que",
"está demostrado que",
"es imposible que",
];
if absolute_claim_markers
.iter()
.any(|marker| response_lc.contains(marker))
@@ -94,7 +110,18 @@ fn risk_signals_from_log(response: &str, has_rag_context: bool, output_tokens: i
signals.push("absolute_claim_language".to_string());
}
let citation_like_markers = ["segun el documento", "fuente:", "referencia:", "[1]", "[2]"];
// ── Señal 5: citas/referencias sin contexto RAG ──────────────────────────
let citation_like_markers = [
"segun el documento",
"según el documento",
"fuente:",
"referencia:",
"[1]",
"[2]",
"[3]",
"(ver bibliografía)",
"de acuerdo con la fuente",
];
if !has_rag_context
&& citation_like_markers
.iter()
@@ -103,9 +130,95 @@ fn risk_signals_from_log(response: &str, has_rag_context: bool, output_tokens: i
signals.push("citation_without_rag".to_string());
}
// ── Señal 6: IA admite desconocer (paradoja: responde igualmente) ────────
let disclaimer_markers = [
"no tengo información sobre",
"no puedo confirmar",
"no tengo acceso a",
"desconozco",
"no estoy seguro de si",
"no cuento con información",
"no tengo conocimiento de",
"mis datos de entrenamiento",
"más allá de mi conocimiento",
];
if disclaimer_markers
.iter()
.any(|marker| response_lc.contains(marker))
{
signals.push("knowledge_disclaimer".to_string());
}
// ── Señal 7: URLs fabricadas sin RAG ─────────────────────────────────────
if !has_rag_context && (response_lc.contains("https://") || response_lc.contains("http://")) {
signals.push("url_fabrication".to_string());
}
// ── Señal 8: datos sensibles/personales mencionados ──────────────────────
let sensitive_markers = [
"contraseña",
"password",
"cédula",
"cedula",
"número de tarjeta",
"numero de tarjeta",
"cvv",
" pin ",
"número de cuenta",
"numero de cuenta",
"datos bancarios",
"información personal",
"número de identificación",
];
if sensitive_markers
.iter()
.any(|marker| response_lc.contains(marker))
{
signals.push("sensitive_data_mention".to_string());
}
// ── Señal 9: certeza alta sin RAG ("la respuesta es…") ───────────────────
let high_certainty_markers = [
"la respuesta correcta es",
"la solución correcta es",
"la respuesta es ",
"el resultado exacto es",
"está comprobado que",
"es correcto afirmar que",
"queda demostrado que",
];
if !has_rag_context
&& high_certainty_markers
.iter()
.any(|marker| response_lc.contains(marker))
{
signals.push("high_certainty_no_rag".to_string());
}
// ── Señal 10: contenido repetido (posible loop/alucinación) ──────────────
{
let sentences: Vec<&str> = response
.split(['.', '\n'])
.map(str::trim)
.filter(|s| s.chars().count() >= 40)
.collect();
let mut seen = std::collections::HashMap::<&str, usize>::new();
for s in &sentences {
*seen.entry(s).or_insert(0) += 1;
}
if seen.values().any(|&count| count >= 3) {
signals.push("repeated_content".to_string());
}
}
signals
}
/// Calcula el score ponderado sumando el peso de cada señal detectada.
pub fn weighted_risk_score(signals: &[String]) -> i32 {
signals.iter().map(|s| signal_weight(s)).sum()
}
fn build_excerpt(text: &str, max_chars: usize) -> String {
let mut out = String::new();
for ch in text.chars().take(max_chars) {
@@ -189,6 +302,7 @@ pub async fn list_ai_audit_logs(
if risk_signals.is_empty() {
continue;
}
let risk_score = weighted_risk_score(&risk_signals);
items.push(AiAuditItem {
id: row.get("id"),
@@ -198,7 +312,7 @@ pub async fn list_ai_audit_logs(
model: row.get("model"),
output_tokens,
has_rag_context,
risk_score: risk_signals.len() as i32,
risk_score,
risk_signals,
response_excerpt: build_excerpt(&response, 240),
reviewed: row.get("reviewed"),
@@ -268,3 +382,133 @@ pub async fn review_ai_audit_log(
reviewed: payload.reviewed,
}))
}
// ─────────────────────────────────────────────────────────────────────────────
// Métricas de auditoría IA
// ─────────────────────────────────────────────────────────────────────────────
#[derive(Debug, Deserialize)]
pub struct AiAuditMetricsFilters {
pub days: Option<i32>,
}
#[derive(Debug, Serialize)]
pub struct AiAuditMetrics {
pub days: i32,
pub total_chat_logs: i64,
pub total_flagged: i64,
pub total_reviewed: i64,
pub flagged_pct: f64,
pub reviewed_pct: f64,
pub signal_counts: std::collections::HashMap<String, i64>,
pub weighted_score_distribution: WeightedScoreDist,
}
#[derive(Debug, Serialize)]
pub struct WeightedScoreDist {
pub low: i64, // score 12
pub medium: i64, // score 35
pub high: i64, // score ≥ 6
}
pub async fn get_ai_audit_metrics(
Org(org_ctx): Org,
claims: Claims,
State(pool): State<PgPool>,
Query(filters): Query<AiAuditMetricsFilters>,
) -> Result<Json<AiAuditMetrics>, (StatusCode, String)> {
ensure_audit_reviewer_role(&claims)?;
let days = filters.days.unwrap_or(30).clamp(1, 365);
// Totales agregados de la BD
let totals = sqlx::query(
r#"
SELECT
COUNT(*)::BIGINT AS total_chat,
SUM(CASE WHEN COALESCE((request_metadata->>'audit_reviewed')::boolean, false) THEN 1 ELSE 0 END)::BIGINT AS reviewed
FROM ai_usage_logs
WHERE organization_id = $1
AND request_type = 'chat'
AND created_at >= NOW() - ($2 || ' days')::interval
"#,
)
.bind(org_ctx.id)
.bind(days)
.fetch_one(&pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Error métricas de auditoría: {}", e)))?;
let total_chat_logs: i64 = totals.get("total_chat");
let total_reviewed: i64 = totals.get("reviewed");
// Escaneamos respuestas para computar señales en memoria
let rows = sqlx::query(
r#"
SELECT output_tokens, response, request_metadata
FROM ai_usage_logs
WHERE organization_id = $1
AND request_type = 'chat'
AND created_at >= NOW() - ($2 || ' days')::interval
"#,
)
.bind(org_ctx.id)
.bind(days)
.fetch_all(&pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("Error escaneando logs: {}", e)))?;
let mut signal_counts: std::collections::HashMap<String, i64> = std::collections::HashMap::new();
let mut total_flagged: i64 = 0;
let mut dist = WeightedScoreDist { low: 0, medium: 0, high: 0 };
for row in &rows {
let metadata: Option<Value> = row.get("request_metadata");
let has_rag = metadata
.as_ref()
.and_then(|m| m.get("has_rag"))
.and_then(|v| v.as_bool())
.unwrap_or(false);
let output_tokens: i32 = row.get("output_tokens");
let response: String = row.get::<Option<String>, _>("response").unwrap_or_default();
let signals = risk_signals_from_log(&response, has_rag, output_tokens);
if signals.is_empty() {
continue;
}
total_flagged += 1;
let score = weighted_risk_score(&signals);
match score {
1..=2 => dist.low += 1,
3..=5 => dist.medium += 1,
_ => dist.high += 1,
}
for s in signals {
*signal_counts.entry(s).or_insert(0) += 1;
}
}
let flagged_pct = if total_chat_logs > 0 {
(total_flagged as f64 / total_chat_logs as f64) * 100.0
} else {
0.0
};
let reviewed_pct = if total_flagged > 0 {
(total_reviewed as f64 / total_flagged as f64) * 100.0
} else {
0.0
};
Ok(Json(AiAuditMetrics {
days,
total_chat_logs,
total_flagged,
total_reviewed,
flagged_pct,
reviewed_pct,
signal_counts,
weighted_score_distribution: dist,
}))
}
+4
View File
@@ -250,6 +250,10 @@ async fn main() {
"/ai/audit/logs/{id}/review",
post(handlers_ai_audit::review_ai_audit_log),
)
.route(
"/ai/audit/metrics",
get(handlers_ai_audit::get_ai_audit_metrics),
)
.route(
"/ai/data-ethics/summary",
get(handlers_data_ethics::get_data_ethics_summary),