File tree 2 files changed +23
-2
lines changed 2 files changed +23
-2
lines changed Original file line number Diff line number Diff line change @@ -33,3 +33,8 @@ def test_generate_code_lists(test_context, tmp_path):
33
33
34
34
# generate_code_lists() runs
35
35
iea_web .generate_code_lists (tmp_path )
36
+
37
+
38
+ def test_fuzz_data (test_context , tmp_path ):
39
+ # fuzz_data() runs
40
+ iea_web .fuzz_data (target_path = tmp_path )
Original file line number Diff line number Diff line change 2
2
import logging
3
3
from pathlib import Path
4
4
5
+ import numpy as np
5
6
import pandas as pd
6
7
import yaml
7
8
from iam_units import registry
24
25
25
26
#: File name containing data.
26
27
FILE = "WBAL_12052022124930839.csv"
27
- FILE = "cac5fa90-en.zip"
28
+ # FILE = "cac5fa90-en.zip"
28
29
29
- NROWS = 1e7
30
+ NROWS = 1e6
30
31
31
32
32
33
def _read (base_path = None , ** kwargs ) -> pd .DataFrame :
@@ -142,3 +143,18 @@ def _check1(value):
142
143
cl_path .write_text (yaml .dump (data ))
143
144
144
145
146
+ def fuzz_data (base_path = None , target_path = None ):
147
+ """Generate a fuzzed subset of the data for testing."""
148
+ df = _read (base_path )
149
+
150
+ # - Reduce the data by only taking 2 periods for each (flow, product, country).
151
+ # - Replace the actual values with random.
152
+ df = (
153
+ df .groupby (["FLOW" , "PRODUCT" , "COUNTRY" ])
154
+ .take ([0 , - 1 ])
155
+ .reset_index (drop = True )
156
+ .assign (Value = lambda df : np .random .rand (len (df )))
157
+ )
158
+
159
+ # TODO write to file
160
+ # path = (target_path or package_data_path("iea")).joinpath(f"fuzzed-{FILE}")
You can’t perform that action at this time.
0 commit comments