Updated documentation

1c5fc0e4 · Jörg Martin · d259b2f1 · 1c5fc0e4 · 1c5fc0e4 · 1c5fc0e4
Commit 1c5fc0e4 authored 4 years ago by Jörg Martin
--- a/README.md
+++ b/README.md
@@ -39,7 +39,7 @@ In the Anaconda shell then type the following command, with `<path-to-zipped-fol

        pip install <path-to-source-folder>

-3. **From the repository** *(not enabled yet)*: Insert the following in the Anaconda shell and hit enter (if you are behind proxy server you have to add after `install` the option `--proxy=<proxy-url>:<port-number>` with `<proxy-url>` and `<port-number>` replaced accordingly.):
+3. **From the repository** *(not enabled yet)*: Insert the following in the Anaconda shell and hit enter (if you are behind proxy server you might have to add after `install` the option `--proxy=<proxy-url>:<port-number>` with `<proxy-url>` and `<port-number>` replaced accordingly.):

        pip install git+https://gitlab1.ptb.de/JoergMartin/sample_size_program.git
 	
@@ -76,6 +76,8 @@ When asked if you really want to remove, press `y`. If you created a `.bat` file
 ### Avoiding Anaconda
 The advantage of using Anaconda is that all scientific packages (those needed here are: `matplotlib`, `numpy`, `scipy`) come pre-installed and will, if all goes well, work out of the box. However, as a full environment it might be too bloated for some users. If you want to use your own python interpreter you only have to ensure that `matplotlib`, `numpy` and `scipy` are or can be installed by the `pip` command above, which on some OS (like Windows) can require you to install a C++ building tool. Once you are sure that the packages for scientific computing are or can be installed you can proceed as above. If you are thinking of using the default python interpreter of your system you probably want to create a virtual environment using the `venv` module, especially if you are on a Unix-like system.

+### Using just the algorithm without the GUI
+The package `vpvc_algorithm` can be used as a standalone package for sample size planning based on the VPVC without a GUI. The algorithm itself is implemented in the module `vpvc_algorithm.ssd_framework` and its dependencies. The classes `poi_ssd_framework` and `normal_ssd_framework` are behind the computations done in the GUI. If you want to implement a custom sample size planning environment (for instance for another data distribution and/or a different prior) inherit a class from `generic_ssd_framework` following the instructions of its docstring.

 # License


--- a/vpvc_algorithm/optimize.py
+++ b/vpvc_algorithm/optimize.py
+"""
+A small library for optimizing criteria over integers
+"""
+
 import numpy as np

 def int_midpoint(a,b):

--- a/vpvc_algorithm/parameter_from_quantile.py
+++ b/vpvc_algorithm/parameter_from_quantile.py
@@ -4,6 +4,13 @@ import scipy.optimize


 def gamma_parameters(median, upper_quartile, a_min=0+1e-2, a_max=1e5):
+    """
+    Returns the alpha (shape) and beta (rate) of a gamma distribution with the specified `median` and `upper_quartile`.
+    :param median: positive float
+    :param upper_quartile: positive float, should be larger than `median`
+    :param a_min: Smallest alpha value that will be considered (positive float)
+    :param a_max: Largest alpha value that will be considered (positive float, larger than `a_min`)
+    """
    ppf = scipy.stats.gamma.ppf
    assert median < upper_quartile
    # get alpha
@@ -16,6 +23,13 @@ def gamma_parameters(median, upper_quartile, a_min=0+1e-2, a_max=1e5):
    return (alpha, beta)

 def inv_gamma_parameters(median, upper_quartile, a_min=2+1e-1, a_max=1e5):
+    """
+    Returns the alpha (shape) and beta (scale) of an inverse gamma distribution with the specified `median` and `upper_quartile`.
+    :param median: positive float
+    :param upper_quartile: positive float, should be larger than `median`
+    :param a_min: Smallest alpha value that will be considered (positive float)
+    :param a_max: Largest alpha value that will be considered (positive float, larger than `a_min`)
+    """
    ppf = scipy.stats.invgamma.ppf
    assert median < upper_quartile
    # get alpha
@@ -29,12 +43,22 @@ def inv_gamma_parameters(median, upper_quartile, a_min=2+1e-1, a_max=1e5):


 def normal_parameters(median, upper_quartile):
+    """
+    Returns the mean and variance of a normal distribution with `median` and `upper_quartile`
+    :param median: float
+    :param upper_quartile: float, should be larger than `median`
+    """
+    assert upper_quartile > median
    ppf = scipy.stats.norm.ppf
    mu = median
    var = (upper_quartile/ppf(0.75))**2
    return mu, var

 def normal_inv_gamma_parameters(mu_median, mu_upper_quartile, var_median, var_upper_quartile):
+    """
+    Returns the mean (mu_0), scaling (occasionally called lambda), the shape (alpha) and scale (beta) of a Normal inverse gamma distribution.
+    Combination of `parameter_from_quantile.normal_parameters` and `parameter_from_quantile.inv_gamma_parameters`. See their documentation for further information. *It is assumed that the shape parameter alpha is larger than 2, so that the distribution has a variance*. See also `parameter_from_quantile.max_inv_gamma_upper_quartile`
+    """
    mu_0, mu_var = normal_parameters(mu_median, mu_upper_quartile)
    alpha, beta = inv_gamma_parameters(var_median, var_upper_quartile) 
    assert alpha > 2.0
@@ -43,6 +67,11 @@ def normal_inv_gamma_parameters(mu_median, mu_upper_quartile, var_median, var_up
    return mu_0, lamb, alpha, beta

 def max_inv_gamma_upper_quartile(median, a_min=2+1e-5):
+    """
+    Returns the maximal upper quartile of an inverse gamma distribution with given `median` so that the distribution has a variance (that is the the shape parameter is larger than 2).
+    :param median: float, should be positive
+    :param a_min: For numerical issue instead of alpha=2 this number will be taken as threshold (defaults to 2+1e-5)
+    """
    ppf = lambda q: scipy.stats.invgamma.ppf(q, a_min)
    return ppf(0.75)/ppf(0.5) * median
    
--- a/vpvc_algorithm/ssd_framework.py
+++ b/vpvc_algorithm/ssd_framework.py
+"""
+Sample size planning frameworks based on the variation of the posterior variance criterion (VPVC).
+
+Currently there are three methods implemented:
+    + `generic_ssd_framework`: generic model, can be used for inheriting
+    + `poi_ssd_framework`: For a Poisson data distribution
+    + `normal_ssd_framework`: For a Normal data distribution
+To create your own framework inherit from `generic_ssd_framework` by following the instructions in its `__init__`.
+"""
+
 import math

 import numpy as np
@@ -14,13 +24,22 @@ import numpy


 class generic_ssd_framework():
+    """
+    This is a generic class for sample size determination (SSD) based on the posterior variance, mostly thought to be used for inheritance.
+
+    Inheriting
+    ==========
+    In order to inherit from this class you should provide:
+    - A method apv with input n (expectation of the posterior variance under the prior predictive)
+    - A method vpv with input n (variance of the posterior variance)
+    - An attribute optimizer that takes as argument a criterion, a minimal n and a maximimal n as in the module optimize (you can also run `super(generic_ssd_framework,self).__init__()` within the `__init__` of your inherited class)
+    To hand over prior knowledge reimplement the `__init__` method. Users might also want to implement, for their convenience, a `make_inference` method as in `poi_ssd_framework` or `normal_ssd_framework`, which is however not mandatory.
+    Usage
+    =====
+    - To do sample size planning execute based on the VPVC use ssd_vpvc
+    - To do sample size planning using the more classical APVC uss ssd_apvc
+    """
    def __init__(self):
-        """
-        This is a generic class for sample size determination (SSD) based on the posterior variance. In order to inherit from this class you should provide:
-        - An attribute optimizer that takes as argument a criterion, a minimal n and a maximimal as in the module optimize
-        - A method apv (average posterior variance)
-        - A method vpv (fluctuations off apv)
-        """
        self.optimizer = optimize.brute_force

    def apv(self, n): 
@@ -45,12 +64,14 @@ class generic_ssd_framework():
        criterion = lambda n: self.apv(n) - epsilon**2 <= 0
        return self.optimizer(criterion, n_min, n_max)

-    def ssd_var(self, epsilon, n_min=1, n_max=n_max):
-        criterion = lambda n: self.vpv(n) - epsilon**2 <= 0
-        return self.optimizer(criterion, n_min, n_max)
-
-
 class poi_ssd_framework(generic_ssd_framework):
+    """
+    Framework for VPVC sample size planning for Poisson distributed data, inherited of generic_ssd_framework. The prior_knowledge can be specified as **one** of the following three dictionaries, that should specify for each key a float
+    :param prior_knowledge: Should contain 'mean' and 'std'
+    :param hyperparameters: Should contain 'alpha' and 'beta'
+    :param quartiles: Should contain 'median' and 'upper_quartile'
+    To do sample size planning use the methods ssd_vpvc or ssd_apvc.
+    """
    def __init__(self, prior_knowledge=None, hyperparameters=None, quartiles=None):
       # Determine alpha and beta or raise error
       if prior_knowledge is not None:
@@ -74,7 +95,7 @@ class poi_ssd_framework(generic_ssd_framework):
               raise KeyError('The quartiles should at least contain "median" and "upper_quartile"')
       # fix brute_force as standard optimizer
       self.optimizer = optimize.brute_force
-                                             
+
    def apv(self, n):
        n_beta = n + self.beta
        return self.alpha/self.beta * 1/n_beta
@@ -130,37 +151,34 @@ class poi_ssd_framework(generic_ssd_framework):
        return post_mean, post_std


-
-                                                 
 class normal_ssd_framework(generic_ssd_framework):
+    """
+    Framework for VPVC sample size planning for Normally distributed data, inherited of generic_ssd_framework. The prior_knowledge can be specified as **one** of the following three dictionaries, that should specify for each key a float
+    :param prior_knowledge: Should contain 'mu_mean' , 'mu_std', 'sigma_squared_mean', 'sigma_squared_std'. 
+    :param hyperparameters: Should contain 'mu_0', 'lambda', 'alpha' and 'beta' 
+    :param quartiles: Should contain 'mu_median', 'mu_upper_quartile', 'sigma_median' and 'sigma_upper_quartile'
+    To do sample size planning use the methods `ssd_vpvc` or `ssd_apvc`.
+    """
    def __init__(self, prior_knowledge=None, hyperparameters=None, quartiles=None):
        # Determine alpha, beta and lambda or raise error
        if prior_knowledge is not None:
            try:
-                mu_std, sigma_squared_mean, sigma_squared_std = prior_knowledge['mu_std'], prior_knowledge['sigma_squared_mean'], prior_knowledge['sigma_squared_std']
+                mu_mean, mu_std, sigma_squared_mean, sigma_squared_std = prior_knowledge['mu_mean'], prior_knowledge['mu_std'], prior_knowledge['sigma_squared_mean'], prior_knowledge['sigma_squared_std']
            except KeyError:
-                raise KeyError('The prior knowledge should at least specify "mu_std", "sigma_squared_mean" and "sigma_squared_std"')
-            try:
-                # not really used in computation below
-                # but can be handy for reference
-                self.mu_0 = prior_knowledge['mu_mean']
-            except KeyError:
-                self.mu_0 = 0.0
+                raise KeyError('The prior knowledge should contain "mu_mean", "mu_std", "sigma_squared_mean" and "sigma_squared_std"')
            assert mu_std>0 
            assert sigma_squared_mean>0 and sigma_squared_std>0
+            self.mu_0 = mu_mean
            self.lamb = mu_std**2/sigma_squared_mean
            self.alpha = 2 + (sigma_squared_mean/sigma_squared_std)**2
            self.beta = (self.alpha-1) * sigma_squared_mean
        if hyperparameters is not None:
            try:
-                self.alpha, self.beta, self.lamb = hyperparameters['alpha'], hyperparameters['beta'], hyperparameters['lambda']
-            except KeyError:
-                raise KeyError('The hyperparameters should at least contain "alpha", "beta" and "lambda and "lambda"')
-            assert alpha>0 and beta>0
-            try:
-                self.mu_0 = hyperparameters['mu_0']
+                self.mu_0, self.alpha, self.beta, self.lamb = hyperparameters['mu_0'], hyperparameters['alpha'], hyperparameters['beta'], hyperparameters['lambda']
            except KeyError:
-                self.mu_0 = 0.0
+                raise KeyError('The hyperparameters should contain "mu_0", "lambda",  "alpha" and "beta"' )
+            # alpha>1.0 is enough for the APVC
+            assert alpha>1.0 and beta>0
        if quartiles is not None:
            try:
                mu_median, mu_upper_quartile = quartiles['mu_median'], quartiles['mu_upper_quartile']

--- a/vpvc_gui.py
+++ b/vpvc_gui.py
+"""
+Run the gui for the sample size planning based on the VPVC.
+Start the gui by importing this module. For a different fontsize hardcode the value below to a different integer.
+
+The graphical user interface is actually managed by the package vpvc_interface, the algorithmic backbone is the package vpvc_algorithm (especially the module vpvc_algorithm.ssd_framework), which can be used as a standalone package (without a GUI) for sample size planning based on the VPVC.
+"""
 from vpvc_interface.gui import run_gui

 run_gui(fontsize=11)

--- a/vpvc_interface/gui.py
+++ b/vpvc_interface/gui.py
+"""
+This module binds together `vpvc_interface.setupmenu`, `vpvc_interface.`resultmenu and `vpvc_algorithm.ssd_framework` to a create gui in tkinter for sample size planning. Use the function `run_gui` to start the gui.
+"""
 from importlib import reload

 import tkinter as tk
@@ -21,6 +24,13 @@ n_max = vpvc_algorithm.ssd_framework.n_max
 fontsize=11
 
 def run_gui(fontsize=fontsize):
+    """
+    Starts the `tkinter` gui for sample size planning using the variaton of the posterior variance criterion (VPVC) by setting together various objects from vpvc_interface:
+    - vpvc_interface.setupmenu: Allows to specify the data distribution type, prior knowledge and the precision parameters epsilon and k
+    - vpvc_interface.resultmenu: Comes with a Button to allow for the computation of the sample size, a `matplotlib` plot of the VPVC and a small inference menu.
+    In addition a `status_bar`, showing the content of `status_variable` is displayed at the bottom.
+    :param fontsize: Specify another `fontsize` that shall be used throughout the gui. There is no shortcut for adjusting the fontsize during runtime.
+    """
    # main window
    root = tk.Tk()
    # preliminaries

--- a/vpvc_interface/resultmenu.py
+++ b/vpvc_interface/resultmenu.py
-
+"""
+This module contains classes that are designed to return the results of sample size planning in a GUI.
+"""
 import os
 import tkinter as tk
 from PIL import Image
@@ -12,6 +14,14 @@ from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolb
 import vpvc_interface.color_settings as color_settings

 class PlotMenu():
+    """
+    A menu to plot the dependency of the VPVC on the sample size n
+    :param master: Master frame
+    :param font: A `tk.tkFont`
+    :param status_variable: Should be a `tk.StringVar`. Errors and Messages will be written into this variable
+    :param figure_par: Size of the figure
+    :param n_max: The maximal sample shown in the plot
+    """
    def __init__(self, master, font, status_variable, figure_par = {'figsize': (4,4), 'dpi': 80}, n_max = 1e6):
        self.master = master
        self.font = font
@@ -54,6 +64,14 @@ class PlotMenu():


 class ComputeButton():
+    """
+    Button that executes `command` once its pressed.
+    :param master: Master frame
+    :param font: A `tk.tkFont`
+    :param status_variable: Should be a `tk.StringVar`. Errors and Messages will be written into this variable
+    :param text: A string, the text on the button
+    :param command: Command to execute once the button is pressed. Is supposed to take no arguments.
+    """
    def __init__(self, master, font, status_variable, text, command):
        self.master = master
        self.font = font
@@ -69,6 +87,13 @@ class ComputeButton():


 class InferenceMenu():
+    """
+    A menu to perform Bayesian inference
+    :param master: Master frame
+    :param font: A `tk.tkFont`
+    :param status_variable: Should be a `tk.StringVar`. Errors will be written into this variable
+    :label_icons: These png's will be displayed in the inference menu.
+    """
    def __init__(self, master, font, status_variable, label_icons=
            {'Normal': ('mean.png', 'std.png'),
             'Poisson': ('mean.png',)}):
@@ -168,12 +193,22 @@ class InferenceMenu():


 class ResultMenu():
+    """
+    A wrapper putting resultmenu.ComputeButton, resultmenu.PlotMenu and resultmenu.InferenceMenu together
+    :param master: Master frame
+    :param font: A `tk.tkFont`
+    :param status_variable: Should be a `tk.StringVar`. Errors will be written into this variable
+    :param ssd_command: Command to perform the sample size determination. Is supposed to take one argument (n_max) and return a list of outputs (n, ssd, k, eps, distribution_name). 
+    :param text: will be forwarded to ComputeButton
+    :param figure_par: Will be forwarded to PlotMenu
+    :param n_max: Will be forwarded to PlotMenu and ssd_command
+    """
    borderwidth='20pt'
-    def __init__(self, master, font, status_variable, command, text='Compute sample size', figure_par = {'figsize': (3,3), 'dpi': 100}, n_max=1e6):
+    def __init__(self, master, font, status_variable, ssd_command, text='Compute sample size', figure_par = {'figsize': (3,3), 'dpi': 100}, n_max=1e6):
        self.master = master
        self.font = font
        self.status_variable = status_variable
-        self.command = command
+        self.ssd_command = ssd_command
        self.n_max = n_max
        self.button_ssd = tk.Frame(master)
        self.buttonframe = tk.Frame(self.button_ssd)
@@ -209,7 +244,7 @@ class ResultMenu():

    def Computation(self):
        n_max = self.n_max
-        n, ssd, k, eps, distribution_name = self.command(n_max)
+        n, ssd, k, eps, distribution_name = self.ssd_command(n_max)
        self.samplesizetext.configure(state='normal')
        self.samplesizetext.delete("1.0", tk.END)
        if n == np.inf:
@@ -222,3 +257,6 @@ class ResultMenu():
        self.plotmenu.plot(n=n, ssd=ssd, k=k, eps=eps) 
        self.inferencemenu.draw(distribution_name, ssd, n)

+
+
+
--- a/vpvc_interface/setupmenu.py
+++ b/vpvc_interface/setupmenu.py
+"""
+This module contains classes for entering fundamental information for a sample size planning in a GUI. The information entered here can be used by the classes of the module `resultmenu`
+"""
 import tkinter as tk
 import tkinter.ttk as ttk
 import tkinter.font as TkFont
@@ -10,6 +13,14 @@ import vpvc_algorithm.ssd_framework as ssd_framework
 from vpvc_algorithm.parameter_from_quantile import max_inv_gamma_upper_quartile

 class DistributionMenu():
+    """
+    A drop down menu for specifying the sample size
+    :param master: Master frame
+    :param font: A `tk.TkFont`
+    :param status_variable: Should be a `tk.StringVar`. Errors and Messages will be written into this variable
+    :param command: A command that will be executed if an entry from the drop down menu is selected
+    :param Distributions: A list of strings - the possible entries of the dropdownmenu
+    """
    def __init__(self, master, font, status_variable, command, Distributions = ['Normal', 'Poisson']):
        self.font = font
        self.status_variable = status_variable
@@ -27,7 +38,14 @@ class DistributionMenu():


 class PriorMenu():
-    def __init__(self, master, font, status_variable, distribution_name, frame_size = {'width': 350, 'height': 160}):
+    """
+    A Menu for inserting the prior knowledge using quartiles
+    :param master: Master Frame
+    :param font: A `tk.tkFont`
+    :param status_variable: Should be a `tk.StringVar`. Errors and Messages will be written into this variable
+    :param distribution_name: A string that should match either 'Poisson' or 'Normal'
+    """
+    def __init__(self, master, font, status_variable, distribution_name):
        self.master = master
        self.font = font
        self.status_variable = status_variable
@@ -172,6 +190,12 @@ class PriorMenu():


 class PrecisionMenu():
+    """
+    A menu for entering the precision parameters epsilon and k 
+    :param master: Master frame
+    :param font: A `tk.tkFont`
+    :param status_variable: Should be a `tk.StringVar`. Errors and Messages will be written into this variable
+    """
    def __init__(self, master, font, status_variable):
        self.master = master
        self.font = font
@@ -204,6 +228,12 @@ class PrecisionMenu():


 class SetupMenu():
+    """
+    A wrapper putting setupmenu.DistributionMenu, setupmenu.PriorMenu and setupmenu.PrecisionMenu together
+    :param master: Master frame
+    :param font: A `tk.tkFont`
+    :param status_variable: Should be a `tk.StringVar`. Errors and Messages will be written into this variable
+    """
    borderwidth = "18pt"
    def __init__(self, master, font, status_variable):
        self.master = master