lox-simulation/scripts/plot-results.py

#!/usr/bin/env python3

import matplotlib
import matplotlib.pyplot as pyplot
from mpl_toolkits.axes_grid1.inset_locator import zoomed_inset_axes,mark_inset
import math
import csv
import json
import sys

# Pass experiment number as first arg
experiment_num = int(sys.argv[1])

# Pass number of trials as second arg (used to average results)
num_trials = int(sys.argv[2])

# (Pass list of *-bridge.csv files as remaining args)

# Artificially truncate to this many days if we ran for longer
num_days = 500

# Max number of days for Troll Patrol to detect censorship. If it
# doesn't detect it within this time, we count it as a false negative.
max_number_of_days_to_detect = 10

# Use bigger font size
if experiment_num == 1:
    matplotlib.rcParams.update({'font.size': 14})
else:
    matplotlib.rcParams.update({'font.size': 14})

# Adjust width of experiment 1 figures
width = 7.2

# Get mean of list of numbers
def mean(my_list):
    if len(my_list) == 0:
        return None

    sum = 0
    for i in my_list:
        sum += i
    return sum / len(my_list)

# Get stddev of list of numbers
def std_dev(my_list):
    if len(my_list) == 0:
        return None

    avg = mean(my_list)
    sum = 0
    for i in my_list:
        sum += (i - avg)**2
    sum /= len(my_list)
    return math.sqrt(sum)

# Independent variable
if experiment_num == 1:
    # Probability user submits reports
    # (note flooding does not use 0, so plot from index 1)
    ind_var = [0.0, 0.01, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
else:
    # Harshness
    ind_var = [0, 1, 2, 3, 4]

# Raw detection times for violin plots
overt = [ [] for i in range(len(ind_var)) ]
flooding = [ [] for i in range(len(ind_var)) ]

# Get {True,False} {Positives,Negatives} for our trials
overt_tp = [ [] for i in range(len(ind_var)) ]
overt_tn = [ [] for i in range(len(ind_var)) ]
overt_fp = [ [] for i in range(len(ind_var)) ]
overt_fn = [ [] for i in range(len(ind_var)) ]

flooding_tp = [ [] for i in range(len(ind_var)) ]
flooding_tn = [ [] for i in range(len(ind_var)) ]
flooding_fp = [ [] for i in range(len(ind_var)) ]
flooding_fn = [ [] for i in range(len(ind_var)) ]

# Remaining arguments should be *-bridges.csv files containing info on bridges
for bfile in sys.argv[3:]:
    with open(bfile,'r') as bcsv:
        # Read data on bridges from CSV
        bridges = csv.reader(bcsv, delimiter=',')

        # Get censor_secrecy and ind_var from simulation config
        sfile = bfile[:-(len("bridges.csv"))] + "simulation_config.json"
        with open(sfile,'r') as sjson:
            config = json.load(sjson)
            secrecy = config["censor_secrecy"]
            if experiment_num == 1:
                var = config["prob_user_submits_reports"]
                index = ind_var.index(var)
            else:
                tfile = bfile[:-(len("bridges.csv"))] + "troll_patrol_config.json"
                with open(tfile,'r') as tjson:
                    tconfig = json.load(tjson)
                    # max_threshold used as harshness
                    var = tconfig["max_threshold"]
                    index = ind_var.index(var)

        # Get start date so we can ignore events after 500 days
        startfile = bfile[:-(len("bridges.csv"))] + "start.csv"
        with open(startfile,'r') as startcsv:
            start_dates = csv.reader(startcsv, delimiter=',')
            start_row = next(start_dates)
            start_date = int(start_row[0])
            end_date = start_date + num_days - 1

        # Raw detection times for violin plot
        detection_times = []

        # {True,False} {Positives,Negatives}
        true_pos = 0
        true_neg = 0
        false_pos = 0
        false_neg = 0

        for row in bridges:
            if row[0] == "Full stats per bridge:" or row[0] == "Fingerprint":
                continue

            # row[0] is the bridge fingerprint
            first_distributed = int(row[1])
            first_real_user = int(row[2])
            first_blocked = int(row[3])
            first_detected_blocked = int(row[4])
            # row[5] is first positive report

            # Treat anything after the end date like it didn't happen
            if first_distributed > end_date:
                first_distributed = 0
            if first_real_user > end_date:
                first_real_user = 0
            if first_blocked > end_date:
                first_blocked = 0
            if first_detected_blocked > end_date:
                first_detected_blocked = 0

            # Ignore bridges with no users
            if first_real_user == 0:
                continue

            # Did we identify correctly?

            # Negative classification
            if first_detected_blocked == 0:
                if first_blocked == 0:
                    true_neg += 1
                else:
                    false_neg += 1
            # Positive classification
            else:
                if first_blocked == 0 or first_detected_blocked < first_blocked:
                    false_pos += 1
                # If we didn't detect it in time, consider it a false
                # negative, even if we eventually detected it
                elif first_detected_blocked - first_blocked > max_number_of_days_to_detect:
                    false_neg += 1
                else:
                    true_pos += 1

                    # Add data point to plot in violin plot
                    detection_times.append(first_detected_blocked - first_blocked)

        if secrecy == "Flooding":
            # Add raw data for violin plot
            flooding[index].extend(detection_times)

            flooding_tp[index].append(true_pos)
            flooding_tn[index].append(true_neg)
            flooding_fp[index].append(false_pos)
            flooding_fn[index].append(false_neg)
        else:
            # Add raw data for violin plot
            overt[index].extend(detection_times)

            overt_tp[index].append(true_pos)
            overt_tn[index].append(true_neg)
            overt_fp[index].append(false_pos)
            overt_fn[index].append(false_neg)

# We may not have results for all values of the independent variable. If
# we have a smaller set of values, track them.
ind_var_overt = []
ind_var_flooding = []

# Get precision and recall for each trial

overt_precision_means = []
overt_precision_stddevs = []
overt_recall_means = []
overt_recall_stddevs = []

# Get mean and stddev precision and recall
for i in range(len(ind_var)):
    precisions = []
    recalls = []

    # If we have data, add its index to the list
    if len(overt_tp[i]) > 0:
        ind_var_overt.append(i)

    # Compute precision and recall for each trial
    for j in range(len(overt_tp[i])):
        precisions.append(overt_tp[i][j] / (overt_tp[i][j] + overt_fp[i][j]))
        recalls.append(overt_tp[i][j] / (overt_tp[i][j] + overt_fn[i][j]))

    # Add their means and stddevs to the appropriate lists
    overt_precision_means.append(mean(precisions))
    overt_precision_stddevs.append(std_dev(precisions))
    overt_recall_means.append(mean(recalls))
    overt_recall_stddevs.append(std_dev(recalls))

flooding_precision_means = []
flooding_precision_stddevs = []
flooding_recall_means = []
flooding_recall_stddevs = []

# Get mean and stddev precision and recall
for i in range(len(ind_var)):
    precisions = []
    recalls = []

    # If we have data, add its index to the list
    if len(flooding_tp[i]) > 0:
        ind_var_flooding.append(i)

    # Compute precision and recall for each trial
    for j in range(len(flooding_tp[i])):
        precisions.append(flooding_tp[i][j] / (flooding_tp[i][j] + flooding_fp[i][j]))
        recalls.append(flooding_tp[i][j] / (flooding_tp[i][j] + flooding_fn[i][j]))

    # Add their means and stddevs to the appropriate lists
    flooding_precision_means.append(mean(precisions))
    flooding_precision_stddevs.append(std_dev(precisions))
    flooding_recall_means.append(mean(recalls))
    flooding_recall_stddevs.append(std_dev(recalls))

# Plot our data

# Violin plots

# Overt censor
if experiment_num == 1:
    pyplot.violinplot([overt[i] for i in ind_var_overt], positions=[ind_var[i] for i in ind_var_overt], widths=0.04)
    pyplot.title("Time to Detect Censorship (Overt Censor)")
    pyplot.xlabel("Probability of users submitting reports")
    pyplot.ylabel("Days to detect censorship")
    pyplot.ylim(bottom=0)
    pyplot.savefig("results/figure-2b.png")
    pyplot.cla()

else:
    pyplot.violinplot([overt[i] for i in ind_var_overt], positions=[ind_var[i] for i in ind_var_overt])
    pyplot.title("Time to Detect Censorship (Overt Censor)")
    pyplot.xlabel("Harshness")
    pyplot.xticks(ind_var)
    pyplot.ylabel("Days to detect censorship")
    pyplot.ylim(bottom=0)
    pyplot.savefig("results/figure-3b.png")
    pyplot.cla()

# Flooding censor (should be orange)
if experiment_num == 1:
    #pyplot.figure().set_figwidth(width)
    fv = pyplot.violinplot([flooding[i] for i in ind_var_flooding], positions=[ind_var[i] for i in ind_var_flooding], widths=0.045)
else:
    fv = pyplot.violinplot([flooding[i] for i in ind_var_flooding], positions=[ind_var[i] for i in ind_var_flooding])

# Make it orange regardless of experiment number
for pc in fv["bodies"]:
    pc.set_facecolor("orange")
    pc.set_edgecolor("orange")
for part in ("cbars", "cmins", "cmaxes"):
    fv[part].set_edgecolor("orange")

if experiment_num == 1:
    pyplot.title("Time to Detect Censorship (Flooding Censor)")
    pyplot.xlabel("Probability of users submitting reports")
    pyplot.ylabel("Days to detect censorship")
    pyplot.ylim(bottom=0)
    pyplot.savefig("results/figure-2c.png")
    pyplot.cla()

else:
    pyplot.title("Time to Detect Censorship (Flooding Censor)")
    pyplot.xlabel("Harshness")
    pyplot.xticks(ind_var)
    pyplot.ylabel("Days to detect censorship")
    pyplot.ylim(bottom=0)
    pyplot.savefig("results/figure-3c.png")
    pyplot.cla()

# Precision vs. Recall

if experiment_num == 1:
    # Also plot recall alone
    pyplot.ylim(0,1)
    ax = pyplot
    ax.errorbar([ind_var[i] for i in ind_var_overt], [overt_recall_means[i] for i in ind_var_overt], [overt_recall_stddevs[i] for i in ind_var_overt], linestyle="solid", marker='o', capsize=3)
    ax.errorbar([ind_var[i] for i in ind_var_flooding], [flooding_recall_means[i] for i in ind_var_flooding], [flooding_recall_stddevs[i] for i in ind_var_flooding], linestyle="dotted", marker='v', capsize=3)
    pyplot.xlabel("Probability of users submitting reports")
    pyplot.xlim(0,1)
    pyplot.ylabel("Recall")
    pyplot.ylim(0,1)
    pyplot.title("Proportion of Blocked Bridges Detected")
    pyplot.legend(["Overt censor", "Flooding censor"], loc = "lower right")
    pyplot.savefig("results/figure-2a.png")
    pyplot.cla()

else:
    pyplot.xlim(0,1)
    pyplot.ylim(0,1.02)
    ax = pyplot.axes()
    ax.errorbar([overt_recall_means[i] for i in ind_var_overt], [overt_precision_means[i] for i in ind_var_overt], xerr=[overt_recall_stddevs[i] for i in ind_var_overt], yerr=[overt_precision_stddevs[i] for i in ind_var_overt], marker='o', capsize=3, linestyle="solid")
    ax.errorbar([flooding_recall_means[i] for i in ind_var_flooding], [flooding_precision_means[i] for i in ind_var_flooding], xerr=[flooding_recall_stddevs[i] for i in ind_var_flooding], yerr=[flooding_precision_stddevs[i] for i in ind_var_flooding], marker='v', capsize=3, linestyle="dotted")
    pyplot.xlabel("Recall")
    pyplot.xlim(0,1)
    pyplot.ylabel("Precision")
    pyplot.ylim(0,1.02)
    pyplot.title("Precision vs. Recall")
    pyplot.legend(["Overt censor", "Flooding censor"], loc = "lower left")

    # Zoom in on relevant part
    axins = zoomed_inset_axes(ax, zoom=1.75, bbox_to_anchor=(-0.325, -0.125, 1, 1), bbox_transform=ax.transAxes)
    axins.errorbar([overt_recall_means[i] for i in ind_var_overt], [overt_precision_means[i] for i in ind_var_overt], xerr=[overt_recall_stddevs[i] for i in ind_var_overt], yerr=[overt_precision_stddevs[i] for i in ind_var_overt], marker='o', capsize=3, linestyle="solid")
    axins.errorbar([flooding_recall_means[i] for i in ind_var_flooding], [flooding_precision_means[i] for i in ind_var_flooding], xerr=[flooding_recall_stddevs[i] for i in ind_var_flooding], yerr=[flooding_precision_stddevs[i] for i in ind_var_flooding], marker='v', capsize=3, linestyle="dotted")
    pyplot.xlim(0.75,1)
    pyplot.ylim(0.7,1.02)
    mark_inset(ax, axins, loc1=2, loc2=4)
    pyplot.savefig("results/figure-3a.png")
    pyplot.cla()

# Format mean +- standard deviation with correct sigfigs and rounding.
# I couldn't find an existing solution for this, so here's my awkward approach.
def fmt(data, multiple_trials=True):
    # If we only run one trial, just use the count without standard deviation
    if not multiple_trials:
        return f"{data[0]}"

    # Get mean and standard deviation
    m = mean(data)
    s = std_dev(data)

    if s == 0:
        return f"{round(m)}$\\pm$0"

    # We have max 3600 bridges, so we will certainly never see this many.
    n = 10000
    while round(s / n) < 1:
        n /= 10
    s = round(s / n) * n
    m = round(m / n) * n

    if s >= 1:
        s = int(round(s))
    elif s >= 0.1:
        s = int(round(s*10)) / 10

    # We have a pesky 0.6000000...1 that causes problems. This is to handle that.
    if m >= 1:
        m = int(round(m))
    elif m >= 0.1:
        m = int(round(m*10)) / 10

    return f"{m}$\\pm${s}"

def fmt_pr(m, s, multiple_trials=True):
    # If we only run one trial, round to 3 decimal places and don't
    # include standard deviations
    if not multiple_trials:
        m = int(round(m*1000)) / 1000
        return f"{m}"

    n = 1.0
    while s > 0 and round(s / n) < 1:
        n /= 10
    s = round(s / n) * n
    m = round(m / n) * n

    if s >= 0.1:
        s = int(round(s*10)) / 10
        m = int(round(m*10)) / 10
    elif s >= 0.01:
        s = int(round(s*100)) / 100
        m = int(round(m*100)) / 100
    elif s >= 0.001:
        s = int(round(s*1000)) / 1000
        m = int(round(m*1000)) / 1000
    elif s >= 0.0001:
        s = int(round(s*10000)) / 10000
        m = int(round(m*10000)) / 10000
    elif s >= 0.00001:
        s = int(round(s*100000)) / 100000
        m = int(round(m*100000)) / 100000
    elif s >= 0.000001:
        s = int(round(s*1000000)) / 1000000
        m = int(round(m*1000000)) / 1000000

    return f"{m}$\\pm${s}"

# Output raw data as lines of table

standalone_table_preamble = """\\documentclass{article}
\\usepackage{standalone}
\\usepackage{array}
\\newcolumntype{C}[1]{>{\\centering\\arraybackslash}p{#1}}
\\begin{document}"""

# Use appropriate variables for this experiment
if experiment_num == 1:
    ind_var_str = "Prob. users submit reports"

    # Make 2 different tables, one for overt censor and one for flooding censor
    with open("results/experiment-1-table-overt.tex", 'w') as f:
        print(standalone_table_preamble, file=f)
        print("""\\begin{table*}
\\caption[Results of experiment 1 with overt censor]{Results of the first experiment with the \\textbf{overt censor}, specifically the mean and standard deviation number of true positives, true negatives, false positives, and false negatives for each set of trials. The independent variable in this experiment is the probability of users submitting reports.}
\\label{experiment-1-results-overt}
\\centering
\\begin{tabular}[p]{|C{0.1\\textwidth}|C{0.1\\textwidth}|C{0.105\\textwidth}|C{0.1\\textwidth}|C{0.105\\textwidth}|c|c|}""", file=f)
        print("\\hline", file=f)
        print("\\textbf{" + ind_var_str + "} & \\textbf{True positives} & \\textbf{True negatives} & \\textbf{False positives} & \\textbf{False negatives} & \\textbf{Precision} & \\textbf{Recall} \\\\", file=f)
        print("\\hline", file=f)
        print("\\hline", file=f)
        for i in ind_var_overt:
            print(f"{ind_var[i]} & {fmt(overt_tp[i], num_trials>1)} & {fmt(overt_tn[i], num_trials>1)} & {fmt(overt_fp[i], num_trials>1)} & {fmt(overt_fn[i], num_trials>1)} & {fmt_pr(overt_precision_means[i], overt_precision_stddevs[i], num_trials>1)} & {fmt_pr(overt_recall_means[i], overt_recall_stddevs[i], num_trials>1)}\\\\", file=f)
            print("\\hline", file=f)
        print("\\end{tabular}", file=f)
        print("\\end{table*}", file=f)
        print("\\end{document}", file=f)

    with open("results/experiment-1-table-flooding.tex", 'w') as f:
        print(standalone_table_preamble, file=f)
        print("""\\begin{table*}
\\caption[Results of experiment 1 with flooding censor]{Results of the first experiment with the \\textbf{flooding censor}, specifically the mean and standard deviation number of true positives, true negatives, false positives, and false negatives for each set of trials. The independent variable in this experiment is the probability of users submitting reports. When Troll Patrol does not detect that bridges are blocked, Lox does not allow users to migrate to new bridges, so the number of overall bridges in the simulation does not grow. This accounts for the low number of overall bridges when the number of positive classifications (both true and false) is low.}
\\label{experiment-1-results-flooding}
\\centering
\\begin{tabular}[p]{|C{0.1\\textwidth}|C{0.1\\textwidth}|C{0.105\\textwidth}|C{0.1\\textwidth}|C{0.105\\textwidth}|c|c|}""", file=f)
        print("\\hline", file=f)
        print("\\textbf{" + ind_var_str + "} & \\textbf{True positives} & \\textbf{True negatives} & \\textbf{False positives} & \\textbf{False negatives} & \\textbf{Precision} & \\textbf{Recall} \\\\", file=f)
        print("\\hline", file=f)
        print("\\hline", file=f)
        for i in ind_var_flooding:
            print(f"{ind_var[i]} & {fmt(flooding_tp[i], num_trials>1)} & {fmt(flooding_tn[i], num_trials>1)} & {fmt(flooding_fp[i], num_trials>1)} & {fmt(flooding_fn[i], num_trials>1)} & {fmt_pr(flooding_precision_means[i], flooding_precision_stddevs[i], num_trials>1)} & {fmt_pr(flooding_recall_means[i], flooding_recall_stddevs[i], num_trials>1)} \\\\", file=f)
            print("\\hline", file=f)
        print("\\end{tabular}", file=f)
        print("\\end{table*}", file=f)
        print("\\end{document}", file=f)
else:
    # Make 2 tables for experiment 2
    with open("results/experiment-2-table-overt.tex", 'w') as f:
        print(standalone_table_preamble, file=f)
        print("""\\begin{table*}
    \\caption[Results of experiment 2 with overt censor]{Results of the second experiment with the \\textbf{overt censor}, specifically the mean and standard deviation number of true positives, true negatives, false positives, and false negatives for each set of trials. The independent variable in this experiment is the harshness of the classifier.}
    \\label{experiment-2-results-overt}
    \\centering
    \\begin{tabular}[t]{|C{0.115\\textwidth}|C{0.1\\textwidth}|C{0.105\\textwidth}|C{0.1\\textwidth}|C{0.105\\textwidth}|c|c|}""", file=f)
        print("\\hline", file=f)
        print("\\textbf{Harshness} & \\textbf{True positives} & \\textbf{True negatives} & \\textbf{False positives} & \\textbf{False negatives} & \\textbf{Precision} & \\textbf{Recall} \\\\", file=f)
        print("\\hline", file=f)
        print("\\hline", file=f)
        for i in ind_var_overt:
            print(f"{ind_var[i]} & {fmt(overt_tp[i], num_trials>1)} & {fmt(overt_tn[i], num_trials>1)} & {fmt(overt_fp[i], num_trials>1)} & {fmt(overt_fn[i], num_trials>1)} & {fmt_pr(overt_precision_means[i], overt_precision_stddevs[i], num_trials>1)} & {fmt_pr(overt_recall_means[i], overt_recall_stddevs[i], num_trials>1)}\\\\", file=f)
            print("\\hline", file=f)
        print("\\end{tabular}", file=f)
        print("\\end{table*}", file=f)
        print("\\end{document}", file=f)

    with open("results/experiment-2-table-flooding.tex", 'w') as f:
        print(standalone_table_preamble, file=f)
        print("""\\begin{table*}
    \\caption[Results of experiment 2 with flooding censor]{Results of the second experiment with the \\textbf{flooding censor}, specifically the mean and standard deviation number of true positives, true negatives, false positives, and false negatives for each set of trials. The independent variable in this experiment is the harshness of the classifier.}
    \\label{experiment-2-results-flooding}
    \\centering
    \\begin{tabular}[t]{|C{0.115\\textwidth}|C{0.1\\textwidth}|C{0.105\\textwidth}|C{0.1\\textwidth}|C{0.105\\textwidth}|c|c|}""", file=f)
        print("\\hline", file=f)
        print("\\textbf{Harshness} & \\textbf{True positives} & \\textbf{True negatives} & \\textbf{False positives} & \\textbf{False negatives} & \\textbf{Precision} & \\textbf{Recall} \\\\", file=f)
        print("\\hline", file=f)
        print("\\hline", file=f)
        for i in ind_var_flooding:
            print(f"{ind_var[i]} & {fmt(flooding_tp[i], num_trials>1)} & {fmt(flooding_tn[i], num_trials>1)} & {fmt(flooding_fp[i], num_trials>1)} & {fmt(flooding_fn[i], num_trials>1)} & {fmt_pr(flooding_precision_means[i], flooding_precision_stddevs[i], num_trials>1)} & {fmt_pr(flooding_recall_means[i], flooding_recall_stddevs[i], num_trials>1)} \\\\", file=f)
            print("\\hline", file=f)
        print("\\end{tabular}", file=f)
        print("\\end{table*}", file=f)
        print("\\end{document}", file=f)