plot_coverage complete, producing 5 figures

4e6a1139 · Jörg Martin · 407403b0 · 4e6a1139
Commit 4e6a1139 authored 3 years ago by Jörg Martin
--- a/Experiments/plot_coverage.py
+++ b/Experiments/plot_coverage.py
 """
-Compute metrics for datasets for which there is not necessarily a ground truth.
+Compute the coverage for various coverage factors and compare them with 
-Results will be stored in the results folder
+with the corresponding q or theoretical coverage. Results will be stored
+in various plots in the results/figures folder.
+TODO: Do the latter
 """
 import importlib
 import os
@@ -174,8 +176,19 @@ def compute_coverages(data, eiv, number_of_draws,
 ####
-def plot_data(eiv_coverages, noneiv_coverages, color='r',
+def coverage_diagonal_plot(eiv_coverages, noneiv_coverages, color,
-        against_theoretical = False):
+        against_theoretical = False, label = ''):
+    """
+    Plot numerical coverages against q (used coverage value), if
+    against_theoretical is False, or the theoretical coverage, if
+    `against_theoretical` is True.
+    :param eiv_coverages: The output of `compute_coverages` with `eiv=True`
+    :param noneiv_coverages: The output of `compute_coverages` with `eiv=False`
+    :param color: String, denoting the color.
+    :param against_theoretical: Boolean, see above.
+    :param label: String. Will be combined with "eiv_"/"noneiv_" and included
+    as label in the plot.
+    """
    eiv_numerical_coverage, eiv_theoretical_coverage = eiv_coverages
    noneiv_numerical_coverage, noneiv_theoretical_coverage = noneiv_coverages
    if eiv_numerical_coverage is not None and eiv_theoretical_coverage is not None:
@@ -186,7 +199,7 @@ def plot_data(eiv_coverages, noneiv_coverages, color='r',
        else:
            x_values = np.array(q_range)
        plt.plot(x_values, mean_eiv_numerical_coverage,
-                color=color, linestyle='solid')
+                color=color, linestyle='solid', label=f'eiv_{label}')
        plt.fill_between(x_values,
                mean_eiv_numerical_coverage - std_eiv_numerical_coverage,
                mean_eiv_numerical_coverage + std_eiv_numerical_coverage, 
@@ -199,28 +212,147 @@ def plot_data(eiv_coverages, noneiv_coverages, color='r',
        else:
            x_values = np.array(q_range)
        plt.plot(x_values, mean_noneiv_numerical_coverage,
-                color=color, linestyle='dashed')
+                color=color, linestyle='dashed', label=f'noneiv_{label}')
        plt.fill_between(x_values,
                mean_noneiv_numerical_coverage - std_noneiv_numerical_coverage,
                mean_noneiv_numerical_coverage + std_noneiv_numerical_coverage,
                color=color, alpha=0.3)
+def coverage_residual_plot(eiv_coverages, noneiv_coverages, color,
+        absolute_values=True, against_theoretical=False, label=''):
+    """
+    Plot deviation of numerical coverages to q (used coverage value), if
+    against_theoretical is False, or to the theoretical coverage, if
+    `against_theoretical` is True. On the x axis q is plotted (in contrast to
+            coverage_diagonal_plot).
+    :param eiv_coverages: The output of `compute_coverages` with `eiv=True`
+    :param noneiv_coverages: The output of `compute_coverages` with `eiv=False`
+    :param color: String, denoting the color.
+    :param absolute_values:Boolean. If True (default) the absolute value of the
+    differences will be plotted.
+    :param against_theoretical: Boolean, see above.
+    :param label: String. Will be combined with "eiv_"/"noneiv_" and included
+    as label in the plot.
+    """
+    if not against_theoretical:
+        eiv_coverages = eiv_coverages[0]
+        noneiv_coverages = noneiv_coverages[0]
+        assert len(eiv_coverages.shape) == 2 and\
+                eiv_coverages.shape[0] == len(q_range)
+        assert len(eiv_coverages.shape) == 2 and\
+                eiv_coverages.shape[0] == len(q_range)
+        assert len(noneiv_coverages.shape) == 2
+        eiv_cov_res =  eiv_coverages - q_range.reshape((-1,1))
+        noneiv_cov_res = noneiv_coverages - q_range.reshape((-1,1))
+    else:
+        eiv_cov_res =  eiv_coverages[0] - eiv_coverages[1]
+        noneiv_cov_res =  noneiv_coverages[0] - noneiv_coverages[1]
+    if absolute_values:
+        eiv_cov_res = np.abs(eiv_cov_res)
+        noneiv_cov_res = np.abs(noneiv_cov_res)
+    mean_eiv_cov_res = np.mean(eiv_cov_res, axis=-1)
+    std_eiv_cov_res = np.std(eiv_cov_res, axis=-1)
+    mean_noneiv_cov_res = np.mean(noneiv_cov_res, axis=-1)
+    std_noneiv_cov_res = np.std(noneiv_cov_res, axis=-1)
+    plt.plot(q_range, mean_eiv_cov_res,
+            color=color, linestyle='solid', label=f'eiv_{label}')
+    plt.fill_between(q_range,
+            mean_eiv_cov_res - std_eiv_cov_res,
+            mean_eiv_cov_res + std_eiv_cov_res, 
+            color=color, alpha=0.5)
+    plt.plot(q_range, mean_noneiv_cov_res,
+            color=color, linestyle='dashed', label=f'noneiv_{label}')
+    plt.fill_between(q_range,
+            mean_noneiv_cov_res - std_noneiv_cov_res,
+            mean_noneiv_cov_res + std_noneiv_cov_res,
+            color=color, alpha=0.3)
+# create figures, together with title and axis labels
 plt.figure(1)
 plt.clf()
-datasets = ['energy', 'kin8nm', 'yacht', 'wine']
+plt.title('Coverage for datasets with ground truth')
-colors = ['red', 'blue', 'green', 'purple']
+plt.xlabel('q')
-use_ground_truth = False
+plt.ylabel('coverage')
-against_theoretical = True
+plt.figure(2)
-for data, color in zip(datasets, colors):
+plt.clf()
-    eiv_coverages = compute_coverages(data=data, eiv=True,
+plt.title('Coverage of noisy labels (vs. theory)')
-            number_of_draws=[100,5],
+plt.xlabel('th. coverage')
-            use_ground_truth=use_ground_truth)
+plt.ylabel('coverage')
-    noneiv_coverages = compute_coverages(data=data, eiv=False,
+plt.figure(3)
-            number_of_draws=100,
+plt.clf()
-            use_ground_truth=use_ground_truth)
+plt.title('Coverage of noisy labels (vs. q)')
-    plot_data(eiv_coverages, noneiv_coverages, color=color,
+plt.xlabel('q')
-            against_theoretical=against_theoretical)
+plt.ylabel('coverage')
+plt.figure(4)
+plt.clf()
+plt.title('Theoretical deviation Coverage for datasets with ground truth')
+plt.xlabel('q')
+plt.ylabel('deviation cov from q')
+plt.figure(5)
+plt.clf()
+plt.title('Deviation (theory) of noisy Coverage')
+plt.xlabel('q')
+plt.ylabel('deviation cov from theor. cov')
+# datasets to plot and their coloring
+datasets = ['linear', 'quadratic','yacht','wine','power']
+colors = ['red', 'blue','green','purple','orange','cyan']
+# loop through data
+for use_ground_truth in [False, True]:
+    for data, color in zip(datasets, colors):
+        # compute coverages
+        eiv_coverages = compute_coverages(data=data, eiv=True,
+                number_of_draws=[100,5],
+                use_ground_truth=use_ground_truth)
+        noneiv_coverages = compute_coverages(data=data, eiv=False,
+                number_of_draws=100,
+                use_ground_truth=use_ground_truth)
+        # If no result was produced (no ground truth although required), skip
+        if eiv_coverages[0] is None and eiv_coverages[1] is None:
+            assert noneiv_coverages[0] is None and noneiv_coverages[1] is None
+            break
+        # create plots
+        if use_ground_truth:
+            plt.figure(1)
+            coverage_diagonal_plot(eiv_coverages, noneiv_coverages, color=color,
+                    against_theoretical=False, label=data)
+            plt.figure(4)
+            coverage_residual_plot(eiv_coverages, noneiv_coverages, color=color,
+                    against_theoretical=False, label=data)
+        else:
+            plt.figure(2)
+            coverage_diagonal_plot(eiv_coverages, noneiv_coverages, color=color,
+                    against_theoretical=True, label=data)
+            plt.figure(3)
+            coverage_residual_plot(eiv_coverages, noneiv_coverages, color=color,
+                    against_theoretical=False, label=data)
+            plt.figure(5)
+            coverage_residual_plot(eiv_coverages, noneiv_coverages, color=color,
+                    against_theoretical=True, label=data)
+# add diagonals, where meaningful
+plt.figure(1)
 x_diag = np.linspace(0.0, 1.0)
 plt.plot(x_diag, x_diag, color='k', linestyle='dotted' )
+plt.figure(2)
+x_diag = np.linspace(0.0, 1.0)
+plt.plot(x_diag, x_diag, color='k', linestyle='dotted' )
+plt.figure(3)
+x_diag = np.linspace(0.0, 1.0)
+plt.plot(x_diag, x_diag, color='k', linestyle='dotted' )
+# add legends
+for fig_nr in range(1,6):
+    plt.figure(fig_nr)
+    plt.legend()
+# show plots
 plt.show()