Use CDF, not PDF with artificial 'blocked' data
TODO: Figure out proper multivariate CDF
This commit is contained in:
parent
22163cc030
commit
97d4622cd4
131
src/analysis.rs
131
src/analysis.rs
|
@ -2,7 +2,7 @@ use crate::{BridgeInfo, BridgeInfoType};
|
||||||
use lox_library::proto::trust_promotion::UNTRUSTED_INTERVAL;
|
use lox_library::proto::trust_promotion::UNTRUSTED_INTERVAL;
|
||||||
use nalgebra::{Cholesky, DMatrix, DVector};
|
use nalgebra::{Cholesky, DMatrix, DVector};
|
||||||
use rand::Rng;
|
use rand::Rng;
|
||||||
use statrs::distribution::{Continuous, MultivariateNormal, Normal};
|
use statrs::distribution::{ContinuousCDF, MultivariateNormal, Normal};
|
||||||
use std::{
|
use std::{
|
||||||
cmp::{max, min},
|
cmp::{max, min},
|
||||||
collections::{BTreeMap, HashSet},
|
collections::{BTreeMap, HashSet},
|
||||||
|
@ -333,52 +333,29 @@ impl Analyzer for NormalAnalyzer {
|
||||||
let alpha = 1.0 - confidence;
|
let alpha = 1.0 - confidence;
|
||||||
|
|
||||||
let (mean_vec, sd_vec, cov_mat) = Self::stats(&[bridge_ips, negative_reports]);
|
let (mean_vec, sd_vec, cov_mat) = Self::stats(&[bridge_ips, negative_reports]);
|
||||||
|
let bridge_ips_mean = mean_vec[0];
|
||||||
let negative_reports_mean = mean_vec[1];
|
let negative_reports_mean = mean_vec[1];
|
||||||
let bridge_ips_sd = sd_vec[0];
|
let bridge_ips_sd = sd_vec[0];
|
||||||
let negative_reports_sd = sd_vec[1];
|
let negative_reports_sd = sd_vec[1];
|
||||||
|
|
||||||
// Artificially create data for alternative hypothesis
|
/*
|
||||||
let num_days = bridge_ips.len() as usize;
|
let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
|
||||||
let mut bridge_ips_blocked = vec![0; num_days];
|
let pdf = mvn.pdf(&DVector::from_vec(vec![
|
||||||
let mut negative_reports_blocked = vec![0; num_days];
|
bridge_ips_today as f64,
|
||||||
let bridge_ips_deviation = (2.0 * bridge_ips_sd).round() as u32;
|
negative_reports_today as f64,
|
||||||
for i in 0..num_days {
|
]));
|
||||||
// Suppose bridge stats will go down by 2 SDs
|
*/
|
||||||
bridge_ips_blocked[i] = if bridge_ips_deviation > bridge_ips[i] {
|
|
||||||
0
|
|
||||||
} else {
|
|
||||||
bridge_ips[i] - bridge_ips_deviation
|
|
||||||
};
|
|
||||||
// Suppose negative reports will go up by 2 SDs
|
|
||||||
negative_reports_blocked[i] =
|
|
||||||
negative_reports[i] + (2.0 * negative_reports_sd).round() as u32;
|
|
||||||
}
|
|
||||||
let (mean_vec_blocked, _sd_vec_blocked, cov_mat_blocked) =
|
|
||||||
Self::stats(&[&bridge_ips_blocked, &negative_reports_blocked]);
|
|
||||||
|
|
||||||
let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
|
// Model each variable in isolation. We use 1 - the CDF for
|
||||||
let pdf = mvn.pdf(&DVector::from_vec(vec![
|
// negative reports because more negative reports is worse.
|
||||||
bridge_ips_today as f64,
|
let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd).unwrap();
|
||||||
negative_reports_today as f64,
|
let bip_cdf = bip_normal.cdf(bridge_ips_today as f64);
|
||||||
]));
|
|
||||||
|
|
||||||
let mvn_blocked = MultivariateNormal::new(mean_vec_blocked, cov_mat_blocked).unwrap();
|
|
||||||
let pdf_blocked = mvn_blocked.pdf(&DVector::from_vec(vec![
|
|
||||||
bridge_ips_today as f64,
|
|
||||||
negative_reports_today as f64,
|
|
||||||
]));
|
|
||||||
|
|
||||||
// Also model negative reports in isolation
|
|
||||||
let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd).unwrap();
|
let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd).unwrap();
|
||||||
let nr_pdf = nr_normal.pdf(negative_reports_today as f64);
|
let nr_cdf = 1.0 - nr_normal.cdf(negative_reports_today as f64);
|
||||||
let nr_normal_blocked = Normal::new(
|
|
||||||
negative_reports_mean + 2.0 * negative_reports_sd,
|
|
||||||
negative_reports_sd,
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
let nr_pdf_blocked = nr_normal_blocked.pdf(negative_reports_today as f64);
|
|
||||||
|
|
||||||
(pdf / pdf_blocked).ln() < alpha || (nr_pdf / nr_pdf_blocked).ln() < alpha
|
// For now, just look at each variable in isolation
|
||||||
|
// TODO: How do we do a multivariate normal CDF?
|
||||||
|
bip_cdf < alpha || nr_cdf < alpha
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Evaluate invite-only bridge with lv3+ users submitting positive reports
|
/// Evaluate invite-only bridge with lv3+ users submitting positive reports
|
||||||
|
@ -400,67 +377,33 @@ impl Analyzer for NormalAnalyzer {
|
||||||
|
|
||||||
let (mean_vec, sd_vec, cov_mat) =
|
let (mean_vec, sd_vec, cov_mat) =
|
||||||
Self::stats(&[bridge_ips, negative_reports, positive_reports]);
|
Self::stats(&[bridge_ips, negative_reports, positive_reports]);
|
||||||
|
let bridge_ips_mean = mean_vec[0];
|
||||||
let negative_reports_mean = mean_vec[1];
|
let negative_reports_mean = mean_vec[1];
|
||||||
|
let positive_reports_mean = mean_vec[2];
|
||||||
let bridge_ips_sd = sd_vec[0];
|
let bridge_ips_sd = sd_vec[0];
|
||||||
let negative_reports_sd = sd_vec[1];
|
let negative_reports_sd = sd_vec[1];
|
||||||
let positive_reports_sd = sd_vec[2];
|
let positive_reports_sd = sd_vec[2];
|
||||||
|
|
||||||
// Artificially create data for alternative hypothesis
|
/*
|
||||||
let num_days = bridge_ips.len() as usize;
|
let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
|
||||||
let mut bridge_ips_blocked = vec![0; num_days];
|
let pdf = mvn.pdf(&DVector::from_vec(vec![
|
||||||
let mut negative_reports_blocked = vec![0; num_days];
|
bridge_ips_today as f64,
|
||||||
let mut positive_reports_blocked = vec![0; num_days];
|
negative_reports_today as f64,
|
||||||
let bridge_ips_deviation = (2.0 * bridge_ips_sd).round() as u32;
|
positive_reports_today as f64,
|
||||||
let positive_reports_deviation = (2.0 * positive_reports_sd).round() as u32;
|
]));
|
||||||
for i in 0..num_days {
|
*/
|
||||||
// Suppose positive reports will go down by 2 SDs
|
|
||||||
positive_reports_blocked[i] = if positive_reports_deviation > positive_reports[i] {
|
|
||||||
0
|
|
||||||
} else {
|
|
||||||
positive_reports[i] - positive_reports_deviation
|
|
||||||
};
|
|
||||||
// Suppose bridge stats will go down by 2 SDs
|
|
||||||
bridge_ips_blocked[i] = if bridge_ips_deviation > bridge_ips[i] {
|
|
||||||
0
|
|
||||||
} else {
|
|
||||||
bridge_ips[i] - bridge_ips_deviation
|
|
||||||
};
|
|
||||||
// Suppose each user who would have submitted a positive report but
|
|
||||||
// didn't submits a negative report instead.
|
|
||||||
negative_reports_blocked[i] =
|
|
||||||
negative_reports[i] + positive_reports[i] - positive_reports_blocked[i];
|
|
||||||
}
|
|
||||||
let (mean_vec_blocked, _sd_vec_blocked, cov_mat_blocked) = Self::stats(&[
|
|
||||||
&bridge_ips_blocked,
|
|
||||||
&negative_reports_blocked,
|
|
||||||
&positive_reports_blocked,
|
|
||||||
]);
|
|
||||||
|
|
||||||
let mvn = MultivariateNormal::new(mean_vec, cov_mat).unwrap();
|
// Model each variable in isolation. We use 1 - the CDF for
|
||||||
let pdf = mvn.pdf(&DVector::from_vec(vec![
|
// negative reports because more negative reports is worse.
|
||||||
bridge_ips_today as f64,
|
let bip_normal = Normal::new(bridge_ips_mean, bridge_ips_sd).unwrap();
|
||||||
negative_reports_today as f64,
|
let bip_cdf = bip_normal.cdf(bridge_ips_today as f64);
|
||||||
positive_reports_today as f64,
|
|
||||||
]));
|
|
||||||
|
|
||||||
let mvn_blocked = MultivariateNormal::new(mean_vec_blocked, cov_mat_blocked).unwrap();
|
|
||||||
let pdf_blocked = mvn_blocked.pdf(&DVector::from_vec(vec![
|
|
||||||
bridge_ips_today as f64,
|
|
||||||
negative_reports_today as f64,
|
|
||||||
positive_reports_today as f64,
|
|
||||||
]));
|
|
||||||
|
|
||||||
// Also model negative reports in isolation
|
|
||||||
let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd).unwrap();
|
let nr_normal = Normal::new(negative_reports_mean, negative_reports_sd).unwrap();
|
||||||
let nr_pdf = nr_normal.pdf(negative_reports_today as f64);
|
let nr_cdf = 1.0 - nr_normal.cdf(negative_reports_today as f64);
|
||||||
// Note we do NOT make this a function of positive signals
|
let pr_normal = Normal::new(positive_reports_mean, positive_reports_sd).unwrap();
|
||||||
let nr_normal_blocked = Normal::new(
|
let pr_cdf = pr_normal.cdf(positive_reports_today as f64);
|
||||||
negative_reports_mean + 2.0 * negative_reports_sd,
|
|
||||||
negative_reports_sd,
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
let nr_pdf_blocked = nr_normal_blocked.pdf(negative_reports_today as f64);
|
|
||||||
|
|
||||||
(pdf / pdf_blocked).ln() < alpha || (nr_pdf / nr_pdf_blocked).ln() < alpha
|
// For now, just look at each variable in isolation
|
||||||
|
// TODO: How do we do a multivariate normal CDF?
|
||||||
|
bip_cdf < alpha || nr_cdf < alpha || pr_cdf < alpha
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue