Skip to content

Commit 84f79c2

Browse files
eric-czechmergify[bot]
authored andcommitted
Adding HWE validation code and docs #118
1 parent 1a58857 commit 84f79c2

File tree

10 files changed

+2015
-0
lines changed

10 files changed

+2015
-0
lines changed

validation/gwas/method/hwe/Makefile

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
all: clean chwe.so
2+
3+
clean:
4+
rm -f *.o *.so
5+
6+
chwe.so: chwe.o
7+
gcc -shared -o libchwe.so chwe.o
8+
9+
chwe.o: chwe.c
10+
gcc -c -Wall -Werror -fpic chwe.c
11+
12+

validation/gwas/method/hwe/README.md

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
## HWE Exact Test Validation
2+
3+
This validation produces simulated genotype counts and corresponding HWE statistics from the (C) implementation described in [Wigginton et al. 2005](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1199378).
4+
5+
The `invoke` [tasks](tasks.py) will compile the C code, simulate genotype counts (inputs for unit tests), and attach p values (outputs for unit tests) from the C code to the genotype counts, as a dataframe.
6+
7+
The [hwe_unit_test.ipynb](hwe_unit_test.ipynb) is only instructive and shows how to debug and possibly extend test cases, perhaps validating inputs/outputs on a scale that wouldn't be included in unit testing.
8+
9+
To export the unit test data, all steps can be run as follows:
10+
11+
```bash
12+
> invoke compile simulate export
13+
Building reference C library
14+
rm -f *.o *.so
15+
gcc -c -Wall -Werror -fpic chwe.c
16+
gcc -shared -o libchwe.so chwe.o
17+
Build complete
18+
Generating unit test data
19+
Unit test data written to data/sim_01.csv
20+
Exporting test data to /home/jovyan/work/repos/sgkit/sgkit/tests/test_hwe
21+
Clearing test datadir at /home/jovyan/work/repos/sgkit/sgkit/tests/test_hwe
22+
Copying data/sim_01.csv to /home/jovyan/work/repos/sgkit/sgkit/tests/test_hwe/sim_01.csv
23+
Export complete
24+
```

validation/gwas/method/hwe/__init__.py

Whitespace-only changes.

validation/gwas/method/hwe/chwe.c

+98
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
// Lift from http://csg.sph.umich.edu/abecasis/Exact/snp_hwe.c
2+
#include<stdio.h>
3+
#include<stdlib.h>
4+
5+
double hwep(int obs_hets, int obs_hom1, int obs_hom2){
6+
if (obs_hom1 < 0 || obs_hom2 < 0 || obs_hets < 0)
7+
{
8+
printf("FATAL ERROR - SNP-HWE: Current genotype configuration (%d %d %d ) includes a"
9+
" negative count", obs_hets, obs_hom1, obs_hom2);
10+
exit(EXIT_FAILURE);
11+
}
12+
13+
int obs_homc = obs_hom1 < obs_hom2 ? obs_hom2 : obs_hom1;
14+
int obs_homr = obs_hom1 < obs_hom2 ? obs_hom1 : obs_hom2;
15+
16+
int rare_copies = 2 * obs_homr + obs_hets;
17+
int genotypes = obs_hets + obs_homc + obs_homr;
18+
19+
double * het_probs = (double *) malloc((size_t) (rare_copies + 1) * sizeof(double));
20+
if (het_probs == NULL)
21+
{
22+
printf("FATAL ERROR - SNP-HWE: Unable to allocate array for heterozygote probabilities" );
23+
exit(EXIT_FAILURE);
24+
}
25+
26+
int i;
27+
for (i = 0; i <= rare_copies; i++)
28+
het_probs[i] = 0.0;
29+
30+
/* start at midpoint */
31+
int mid = rare_copies * (2 * genotypes - rare_copies) / (2 * genotypes);
32+
33+
/* check to ensure that midpoint and rare alleles have same parity */
34+
if ((rare_copies & 1) ^ (mid & 1))
35+
mid++;
36+
37+
int curr_hets = mid;
38+
int curr_homr = (rare_copies - mid) / 2;
39+
int curr_homc = genotypes - curr_hets - curr_homr;
40+
41+
het_probs[mid] = 1.0;
42+
double sum = het_probs[mid];
43+
for (curr_hets = mid; curr_hets > 1; curr_hets -= 2)
44+
{
45+
het_probs[curr_hets - 2] = het_probs[curr_hets] * curr_hets * (curr_hets - 1.0)
46+
/ (4.0 * (curr_homr + 1.0) * (curr_homc + 1.0));
47+
sum += het_probs[curr_hets - 2];
48+
49+
/* 2 fewer heterozygotes for next iteration -> add one rare, one common homozygote */
50+
curr_homr++;
51+
curr_homc++;
52+
}
53+
54+
curr_hets = mid;
55+
curr_homr = (rare_copies - mid) / 2;
56+
curr_homc = genotypes - curr_hets - curr_homr;
57+
for (curr_hets = mid; curr_hets <= rare_copies - 2; curr_hets += 2)
58+
{
59+
het_probs[curr_hets + 2] = het_probs[curr_hets] * 4.0 * curr_homr * curr_homc
60+
/((curr_hets + 2.0) * (curr_hets + 1.0));
61+
sum += het_probs[curr_hets + 2];
62+
63+
/* add 2 heterozygotes for next iteration -> subtract one rare, one common homozygote */
64+
curr_homr--;
65+
curr_homc--;
66+
}
67+
68+
for (i = 0; i <= rare_copies; i++)
69+
het_probs[i] /= (sum > 0 ? sum : 1e-128);
70+
71+
/* alternate p-value calculation for p_hi/p_lo
72+
double p_hi = het_probs[obs_hets];
73+
for (i = obs_hets + 1; i <= rare_copies; i++)
74+
p_hi += het_probs[i];
75+
76+
double p_lo = het_probs[obs_hets];
77+
for (i = obs_hets - 1; i >= 0; i--)
78+
p_lo += het_probs[i];
79+
80+
81+
double p_hi_lo = p_hi < p_lo ? 2.0 * p_hi : 2.0 * p_lo;
82+
*/
83+
84+
double p_hwe = 0.0;
85+
/* p-value calculation for p_hwe */
86+
for (i = 0; i <= rare_copies; i++)
87+
{
88+
if (het_probs[i] > het_probs[obs_hets])
89+
continue;
90+
p_hwe += het_probs[i];
91+
}
92+
93+
p_hwe = p_hwe > 1.0 ? 1.0 : p_hwe;
94+
95+
free(het_probs);
96+
97+
return p_hwe;
98+
}

validation/gwas/method/hwe/chwe.o

3.38 KB
Binary file not shown.
+201
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
n_het,n_hom_1,n_hom_2,p
2+
1,0,0,1.0
3+
51,27,26,0.845926828898329
4+
101,47,56,0.88930424473698
5+
151,71,99,0.3684874920023832
6+
201,137,91,0.28344213097688165
7+
251,154,128,0.1937805064539818
8+
301,158,201,0.03464156123462855
9+
351,115,117,9.778051416840305e-07
10+
401,123,253,0.09001690989721362
11+
451,275,292,0.00027739277561481717
12+
501,346,310,5.785950777451914e-06
13+
551,267,337,0.15629004579821143
14+
601,208,334,0.031198370505333042
15+
651,232,441,0.7782347792381512
16+
701,356,326,0.6282550560763566
17+
751,304,457,0.9172428161320041
18+
801,386,422,0.8810870885296063
19+
851,261,465,0.00012095475067049487
20+
901,490,492,0.06519606837210713
21+
951,644,544,4.879275092149919e-07
22+
1001,444,475,0.0614320395959403
23+
1051,608,340,0.0016721676932256468
24+
1101,623,625,0.002583239824235713
25+
1151,442,404,9.22666204121466e-12
26+
1201,511,535,0.0011559180266171758
27+
1251,660,594,0.999999999999999
28+
1301,904,443,0.5213032029206021
29+
1351,518,492,2.4059583160333022e-12
30+
1401,786,562,0.17901712606288267
31+
1451,705,577,0.0008509642369958977
32+
1501,545,516,4.0878258413432674e-18
33+
1551,872,551,0.0026562564901761523
34+
1601,606,716,1.7304716281165935e-07
35+
1651,1037,559,0.026125733725720014
36+
1701,1080,575,0.03402821665047714
37+
1751,1209,853,5.572653418323456e-06
38+
1801,1243,976,2.2818771131803835e-10
39+
1851,1102,584,3.710099688663844e-05
40+
1901,785,661,2.0279602738807187e-15
41+
1951,816,677,2.8569078183446102e-15
42+
2001,854,931,0.00044371590402349367
43+
2051,667,1183,1.2488023350848009e-05
44+
2101,1106,853,0.01396809036282682
45+
2151,1095,726,6.691337471001894e-09
46+
2201,1167,1478,9.432197243064005e-10
47+
2251,962,1276,0.610161450028642
48+
2301,811,1349,0.0019349995547350155
49+
2351,977,877,1.444515271924519e-14
50+
2401,1283,739,1.1565179920072941e-11
51+
2451,1547,739,6.164163620718031e-06
52+
2501,1428,1020,0.21851919021140662
53+
2551,1515,1747,2.6790289570097594e-20
54+
2601,1039,1379,0.003812650444672208
55+
2651,1423,1402,0.018717042880622592
56+
2701,1051,1839,0.2872761239911692
57+
2751,1317,1756,0.00014693980061811332
58+
2801,1623,1173,0.5905238038361554
59+
2851,1783,1307,0.009007126080770289
60+
2901,1892,1544,7.821435236313817e-11
61+
2951,1926,1702,1.5502927534087137e-16
62+
3001,1770,1502,0.0010079660986331209
63+
3051,2082,1701,6.517752718327402e-18
64+
3101,1456,1682,0.7226272913884884
65+
3151,969,1325,3.506219413030305e-33
66+
3201,1805,1331,0.21569403737115994
67+
3251,1778,1532,0.5365544398754494
68+
3301,1169,1384,4.982094499906052e-23
69+
3351,1769,1797,0.010072541488350021
70+
3401,1801,1908,0.00028325611912255233
71+
3451,1935,1630,0.23164761482387491
72+
3501,2305,1565,0.0005408313105776889
73+
3551,1684,2332,2.2869325074804336e-06
74+
3601,2241,2094,2.3394325551477485e-16
75+
3651,1241,2438,0.04636664490239822
76+
3701,2167,2588,1.9557166448260108e-29
77+
3751,1349,2427,0.13201350023523056
78+
3801,1387,2076,1.9301027903365754e-06
79+
3851,1346,2461,0.017224681698312864
80+
3901,2430,2058,4.440082083118905e-10
81+
3951,1828,1294,2.647713527040051e-25
82+
4001,2316,1926,0.014256792586706637
83+
4051,2385,2619,2.3465822237767657e-23
84+
4101,2830,2634,5.149856509652088e-44
85+
4151,1264,1843,4.48467880717666e-38
86+
4201,2486,1548,0.002389012903737995
87+
4251,2161,1367,3.000197368786019e-20
88+
4301,1634,1322,9.919986792554593e-58
89+
4351,2686,1695,0.38577132434045974
90+
4401,1928,2954,0.0001199016650457927
91+
4451,2589,1391,1.0958652901993421e-12
92+
4501,1646,2469,4.5269662611402833e-07
93+
4551,2416,1798,4.521174352826995e-05
94+
4601,3099,2510,3.742694664287275e-22
95+
4651,2391,2492,0.01847696637280268
96+
4701,2783,1996,0.9012359250831754
97+
4751,2182,1824,4.711674213296269e-16
98+
4801,1797,3253,0.7262132846779064
99+
4851,2890,2406,2.680676415905425e-05
100+
4901,1916,1968,1.9576351291926123e-27
101+
4951,1600,2345,3.4468103297849734e-30
102+
5001,2124,2893,0.6729732567000968
103+
5051,2278,1878,1.9099808504594497e-21
104+
5101,1580,1667,2.7112967783602924e-92
105+
5151,2945,2480,0.0133474541803742
106+
5201,2676,3425,9.423282617882861e-16
107+
5251,3655,2030,0.06045595125773339
108+
5301,2996,2148,0.027356813588423446
109+
5351,1649,3228,3.454974198424328e-13
110+
5401,2311,2448,1.8596430888102382e-10
111+
5451,2918,3447,2.2579116525279354e-16
112+
5501,3034,3570,8.880020019491782e-23
113+
5551,2272,3437,0.7316458577549785
114+
5601,2096,3814,0.6201032727285236
115+
5651,3249,2182,0.002020139572176394
116+
5701,3870,3376,3.265438252095903e-41
117+
5751,2309,2216,9.550965561735892e-34
118+
5801,2942,1799,1.584359459942145e-31
119+
5851,2240,2749,1.6772397283662627e-17
120+
5901,2653,2864,0.0003015718221877463
121+
5951,2446,3182,0.0005946478430517058
122+
6001,3873,2082,0.0035099937981355705
123+
6051,3067,2134,1.4269228740338587e-18
124+
6101,3579,2796,0.044070159340087074
125+
6151,3236,2296,1.0207282058200817e-10
126+
6201,2219,3070,6.128444533922714e-20
127+
6251,2764,4226,4.0114938634006145e-07
128+
6301,3819,3777,4.523296332293788e-28
129+
6351,4201,2117,0.0006366210109203425
130+
6401,3334,3416,0.0024069172689109197
131+
6451,4417,2689,0.0001510795635284454
132+
6501,2576,2211,1.464394314932261e-59
133+
6551,2008,4401,1.1960349880178966e-07
134+
6601,3749,4053,2.3049912620301826e-23
135+
6651,2744,3555,0.00039048361950001163
136+
6701,2181,3311,3.0049171175443936e-33
137+
6751,4664,4392,4.673341342704634e-75
138+
6801,2960,4656,2.1444652549020045e-07
139+
6851,2690,4656,0.05748086029092399
140+
6901,4668,4276,7.549019391941723e-59
141+
6951,3838,4516,1.0082038188372868e-28
142+
7001,2920,4477,0.05455036043254638
143+
7051,3857,2152,2.004089851444536e-29
144+
7101,3116,2551,2.600550198892281e-38
145+
7151,4953,3513,1.3795033294064868e-21
146+
7201,3592,4002,0.0016733670587886574
147+
7251,3244,2572,5.114970910833606e-38
148+
7301,4591,2744,0.09630014560631561
149+
7351,3708,2864,1.850482626161518e-12
150+
7401,2509,4772,7.605121039219745e-05
151+
7451,5134,5098,1.9026070284002625e-97
152+
7501,4970,4572,8.417077321620368e-55
153+
7551,3271,2510,5.027720092225525e-56
154+
7601,3518,2986,4.747697469921551e-21
155+
7651,2700,2458,1.5342703779996893e-108
156+
7701,4545,2345,3.6554706456314347e-22
157+
7751,4714,2780,3.593455829002233e-05
158+
7801,2588,2619,4.290089679393479e-115
159+
7851,4465,3125,0.002296908375565855
160+
7901,3699,4131,0.5129622712973023
161+
7951,5122,4697,2.8201026369032667e-44
162+
8001,3265,2821,1.2548891947495432e-59
163+
8051,2593,3386,4.265167768158848e-72
164+
8101,3279,3908,2.925977657425333e-14
165+
8151,4673,4713,1.1393802442050531e-20
166+
8201,3390,3706,2.6510169444939377e-19
167+
8251,3073,5077,0.006392608069466881
168+
8301,2679,4804,3.445753907043592e-19
169+
8351,5106,5102,2.4696466905259013e-42
170+
8401,3392,3776,2.188930822752493e-23
171+
8451,4521,3457,2.325359213936893e-05
172+
8501,3811,3220,3.675484972066898e-33
173+
8551,4138,2717,6.2817846465740635e-50
174+
8601,5331,2845,4.01855250200448e-10
175+
8651,4390,3656,8.142102766041389e-07
176+
8701,4620,5949,7.199420468330689e-38
177+
8751,4885,2749,1.232938976024364e-28
178+
8801,4155,4435,0.10455510731861506
179+
8851,4553,5067,2.8097763112411485e-08
180+
8901,3658,3129,3.388296324761072e-65
181+
8951,4091,6109,3.676828438505e-14
182+
9001,3374,5955,0.7977941668959887
183+
9051,4684,4369,0.9881371290166111
184+
9101,5941,4399,7.830542740608026e-16
185+
9151,5396,4205,0.006102055167403756
186+
9201,6087,5299,3.5473799808013304e-51
187+
9251,5364,3987,1.0
188+
9301,5605,5156,1.2301526743633959e-24
189+
9351,3703,3405,7.065360404513176e-69
190+
9401,5815,6427,2.27403687143121e-82
191+
9451,4567,5069,0.2128530705427019
192+
9501,6109,4587,2.098685170824773e-14
193+
9551,6501,5064,4.208356112420144e-40
194+
9601,6032,6370,1.7234955440937876e-79
195+
9651,6043,3510,0.001588282559753373
196+
9701,5350,4456,0.6461087776569374
197+
9751,3169,4579,1.8055731034783026e-58
198+
9801,3954,6268,0.270043231915957
199+
9851,3086,6734,1.8497805574159475e-07
200+
9901,4377,4383,6.979118182818968e-17
201+
9951,3050,3722,6.099767234370392e-137

0 commit comments

Comments
 (0)