|
| 1 | +"""shim.py: Shim code to interface between the calling conventions expected by the current |
| 2 | +bravo_sample_sizes() code with the API currently provided by the athena module. |
| 3 | +
|
| 4 | +Over time we expect both the Arlo and the Athena calling conventions to change, so |
| 5 | +this is a very temporary solution. |
| 6 | +
|
| 7 | +TODO: Is is worth finding a way to keep the Audit objects cached? |
| 8 | +Or is it better to make them up for each pairwise estimate as we go? |
| 9 | +""" |
| 10 | + |
| 11 | +import logging |
| 12 | +import math |
| 13 | +from typing import Any |
| 14 | +from athena.audit import Audit # type: ignore |
| 15 | + |
| 16 | + |
| 17 | +def make_election(risk_limit, p_w: float, p_r: float) -> Any: |
| 18 | + """ |
| 19 | + Transform fractional shares to an athena Election object. |
| 20 | +
|
| 21 | + Inputs: |
| 22 | + risk_limit - the risk-limit for this audit |
| 23 | + p_w - the fraction of vote share for the winner |
| 24 | + p_r - the fraction of vote share for the loser / runner-up |
| 25 | + """ |
| 26 | + |
| 27 | + # calculate the undiluted "two-way" share of votes for the winner |
| 28 | + p_wr = p_w + p_r |
| 29 | + p_w2 = p_w / p_wr |
| 30 | + |
| 31 | + contest_ballots = 100000 |
| 32 | + winner = int(contest_ballots * p_w2) |
| 33 | + loser = contest_ballots - winner |
| 34 | + |
| 35 | + contest = { |
| 36 | + "contest_ballots": contest_ballots, |
| 37 | + "tally": {"A": winner, "LOSER": loser}, |
| 38 | + "num_winners": 1, |
| 39 | + "reported_winners": ["A"], |
| 40 | + "contest_type": "PLURALITY", |
| 41 | + } |
| 42 | + |
| 43 | + contest_name = "ArloContest" |
| 44 | + election = { |
| 45 | + "name": "ArloElection", |
| 46 | + "total_ballots": contest_ballots, |
| 47 | + "contests": {contest_name: contest}, |
| 48 | + } |
| 49 | + |
| 50 | + audit = Audit("minerva", risk_limit) |
| 51 | + audit.add_election(election) |
| 52 | + audit.load_contest(contest_name) |
| 53 | + |
| 54 | + return audit |
| 55 | + |
| 56 | + |
| 57 | +def get_minerva_test_statistics( |
| 58 | + risk_limit: float, p_w: float, p_r: float, sample_w: int, sample_r: int, |
| 59 | +) -> Any: |
| 60 | + """ |
| 61 | + Return Minerva p-value |
| 62 | + TODO: refactor to pass in integer vote shares to allow more exact calculations, incorporate or |
| 63 | + track round schedule over time, and handle sampling without replacement. |
| 64 | +
|
| 65 | + Inputs: |
| 66 | + risk_limit - the risk-limit for this audit |
| 67 | + p_w - the fraction of vote share for the winner |
| 68 | + p_r - the fraction of vote share for the loser |
| 69 | + sample_w - the number of votes for the winner that have already |
| 70 | + been sampled |
| 71 | + sample_r - the number of votes for the runner-up that have |
| 72 | + already been sampled |
| 73 | +
|
| 74 | + Outputs: |
| 75 | + p_value - p-value for given circumstances |
| 76 | +
|
| 77 | + FIXME: need new Minerva-specific test cases - are these exactly right? |
| 78 | + Vs Athena Test cases from https://github.com/gwexploratoryaudits/brla_explore/pull/10/files/988f068e65fd955c8e5d1512865ef5e95a1d7b3c..94693c67aa33a1c642a98336ca5b7fcd32c1ce33# |
| 79 | + test26: pass |
| 80 | + >>> get_minerva_test_statistics(0.1, 0.224472184613, 0.12237580158, 50, 36) |
| 81 | + 0.08762086910131112 |
| 82 | +
|
| 83 | + test27: fail |
| 84 | + >>> get_minerva_test_statistics(0.1, 0.224472184613, 0.12237580158, 49, 37) |
| 85 | + 0.12450655512929908 |
| 86 | +
|
| 87 | + FIXME: Should this be 1.0? Or nothing, indicaating "None"? |
| 88 | + >>> get_minerva_test_statistics(0.1, 0.224472184613, 0.12237580158, 0, 0) |
| 89 | + >>> get_minerva_test_statistics(0.1, 0.75, 0.25, 7, 0) |
| 90 | + 0.05852766346593508 |
| 91 | + """ |
| 92 | + |
| 93 | + # calculate the undiluted "two-way" share of votes for the winner |
| 94 | + p_wr = p_w + p_r |
| 95 | + p_w2 = p_w / p_wr |
| 96 | + |
| 97 | + audit = make_election(risk_limit, p_w, p_r) |
| 98 | + |
| 99 | + if sample_w or sample_r: |
| 100 | + round_sizes = [sample_w + sample_r] |
| 101 | + audit.add_round_schedule(round_sizes) |
| 102 | + audit.set_observations(round_sizes[0], round_sizes[0], [sample_w, sample_r]) |
| 103 | + else: |
| 104 | + round_sizes = [] |
| 105 | + |
| 106 | + if round_sizes: |
| 107 | + status = audit.status[audit.active_contest] |
| 108 | + risk = status.risks[0] |
| 109 | + else: |
| 110 | + risk = None |
| 111 | + |
| 112 | + logging.info( |
| 113 | + f"shim get_minerva_test_statistics: margin {(p_w2 - 0.5) * 2} (pw {p_w} pr {p_r}) (sw {sample_w} sr {sample_w}) risk {risk}" |
| 114 | + ) |
| 115 | + |
| 116 | + return risk |
| 117 | + |
| 118 | + |
| 119 | +def minerva_sample_sizes( |
| 120 | + risk_limit: float, |
| 121 | + p_w: float, |
| 122 | + p_r: float, |
| 123 | + sample_w: int, |
| 124 | + sample_r: int, |
| 125 | + p_completion: float, |
| 126 | +) -> int: |
| 127 | + """ |
| 128 | + Return Minerva round size based on completion probability, assuming the election outcome is correct. |
| 129 | + TODO: refactor to pass in integer vote shares to allow more exact calculations, incorporate or |
| 130 | + track round schedule over time, and handle sampling without replacement. |
| 131 | +
|
| 132 | + Inputs: |
| 133 | + risk_limit - the risk-limit for this audit |
| 134 | + p_w - the fraction of vote share for the winner |
| 135 | + p_r - the fraction of vote share for the loser |
| 136 | + sample_w - the number of votes for the winner that have already |
| 137 | + been sampled |
| 138 | + sample_r - the number of votes for the runner-up that have |
| 139 | + already been sampled |
| 140 | + p_completion - the desired chance of completion in one round, |
| 141 | + if the outcome is correct |
| 142 | +
|
| 143 | + Outputs: |
| 144 | + sample_size - the expected sample size for the given chance |
| 145 | + of completion in one round |
| 146 | +
|
| 147 | + >>> minerva_sample_sizes(0.1, 0.6, 0.4, 56, 56, 0.7) |
| 148 | + 244 |
| 149 | +
|
| 150 | + # FIXME: check this |
| 151 | + >>> minerva_sample_sizes(0.1, 0.6, 0.4, 0, 0, 0.7) |
| 152 | + 111 |
| 153 | + >>> minerva_sample_sizes(0.1, 0.6, 0.4, 0, 0, 0.9) |
| 154 | + 179 |
| 155 | + """ |
| 156 | + |
| 157 | + # calculate the undiluted "two-way" share of votes for the winner |
| 158 | + p_wr = p_w + p_r |
| 159 | + p_w2 = p_w / p_wr |
| 160 | + |
| 161 | + audit = make_election(risk_limit, p_w, p_r) |
| 162 | + |
| 163 | + pstop_goal = [p_completion] |
| 164 | + |
| 165 | + if sample_w or sample_r: |
| 166 | + round_sizes = [sample_w + sample_r] |
| 167 | + audit.add_round_schedule(round_sizes) |
| 168 | + audit.set_observations(round_sizes[0], round_sizes[0], [sample_w, sample_r]) |
| 169 | + else: |
| 170 | + round_sizes = [] |
| 171 | + |
| 172 | + if round_sizes: |
| 173 | + status = audit.status[audit.active_contest] |
| 174 | + below_kmin = status.min_kmins[0] - sample_w |
| 175 | + else: |
| 176 | + below_kmin = 0 |
| 177 | + |
| 178 | + res = audit.find_next_round_size(pstop_goal) |
| 179 | + next_round_size_0 = res["future_round_sizes"][0] |
| 180 | + |
| 181 | + next_round_size = next_round_size_0 + 2 * below_kmin |
| 182 | + |
| 183 | + size_adj = math.ceil(next_round_size / p_wr) |
| 184 | + |
| 185 | + logging.info( |
| 186 | + f"shim sample sizes: margin {(p_w2 - 0.5) * 2} (pw {p_w} pr {p_r}) (sw {sample_w} sr {sample_r}) pstop {p_completion} below_kmin {below_kmin} raw {next_round_size} scaled {size_adj}" |
| 187 | + ) |
| 188 | + |
| 189 | + return size_adj |
| 190 | + |
| 191 | + |
| 192 | +if __name__ == "__main__": |
| 193 | + import doctest |
| 194 | + |
| 195 | + doctest.testmod() |
0 commit comments