diff --git a/package-parser/.gitignore b/package-parser/.gitignore index 7e6729c80..c6875d9b6 100644 --- a/package-parser/.gitignore +++ b/package-parser/.gitignore @@ -30,3 +30,6 @@ out/ # VSCode Settings .vscode/ + +# IntelliJ/Pycharm settings +.idea/ diff --git a/package-parser/package_parser/commands/generate_annotations/__init__.py b/package-parser/package_parser/commands/generate_annotations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/package-parser/package_parser/commands/generate_annotations/_generate_unused_annotations.py b/package-parser/package_parser/commands/generate_annotations/_generate_unused_annotations.py new file mode 100644 index 000000000..59b40a90d --- /dev/null +++ b/package-parser/package_parser/commands/generate_annotations/_generate_unused_annotations.py @@ -0,0 +1,44 @@ +import json +import re +from typing import Dict, List, Tuple + + +def generate_unused_annotations(in_file_path: str): + """ + Returns a Dict of unused functions or classes + + :param in_file_path: JSON file that contains a list of unused functions or classes + """ + + with open(in_file_path, "r", encoding="UTF-8") as in_file: + data = json.load(in_file) + + unuseds: Dict[str, Dict[str, str]] = {} + for name in data: + formatted_name = format_name(name) + unuseds[formatted_name] = {"target": formatted_name} + + return unuseds + + +def format_name(name: str): + if name is None: + return None + + parts = re.split("\\.", name) + newname = "sklearn/" + parts[0] + + if len(parts) == 1: + return newname + + slash = False + for part in parts[1:-1]: + if not slash and re.match("^_{0,2}[A-Z]", part): + slash = True + if slash: + newname += "/" + part + else: + newname += "." + part + + newname += "/" + parts[-1] + return newname diff --git a/package-parser/tests/commands/generate_annotations/__init__.py b/package-parser/tests/commands/generate_annotations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/package-parser/tests/commands/generate_annotations/test_generate_unused_annotations.py b/package-parser/tests/commands/generate_annotations/test_generate_unused_annotations.py new file mode 100644 index 000000000..1c6a74ae6 --- /dev/null +++ b/package-parser/tests/commands/generate_annotations/test_generate_unused_annotations.py @@ -0,0 +1,67 @@ +import pytest +from package_parser.commands.generate_annotations._generate_unused_annotations import ( + format_name, + generate_unused_annotations, +) + +EXPECTED_VALUE = { + "sklearn/sklearn.base/_BaseEstimator/__setstate__": { + "target": "sklearn/sklearn.base/_BaseEstimator/__setstate__" + }, + "sklearn/sklearn.base/is_regressor": { + "target": "sklearn/sklearn.base/is_regressor" + }, + "sklearn/sklearn.cluster._agglomerative/linkage_tree": { + "target": "sklearn/sklearn.cluster._agglomerative/linkage_tree" + }, + "sklearn/sklearn.cluster._kmeans/MiniBatchKMeans/init_size_": { + "target": "sklearn/sklearn.cluster._kmeans/MiniBatchKMeans/init_size_" + }, +} + + +def test_format_underscores(): + assert ( + format_name("sklearn.cluster._kmeans._MiniBatchKMeans.random_state_") + == "sklearn/sklearn.cluster._kmeans/_MiniBatchKMeans/random_state_" + ) + + +def test_format_uppercase(): + assert ( + format_name("sklearn.cluster._kmeans.MiniBatchKMeans.random_state_") + == "sklearn/sklearn.cluster._kmeans/MiniBatchKMeans/random_state_" + ) + + +def test_format_normal(): + assert ( + format_name("sklearn.cluster._mean_shift.get_bin_seeds") + == "sklearn/sklearn.cluster._mean_shift/get_bin_seeds" + ) + + +def test_format_one_part(): + assert format_name("test") == "sklearn/test" + + +def test_format_none(): + assert format_name(None) is None + + +def test_format_empty(): + assert format_name("") == "sklearn/" + + +def test_generate(): + assert ( + generate_unused_annotations( + "tests/commands/generate_annotations/unused_functions_list.json" + ) + == EXPECTED_VALUE + ) + + +def test_generate_bad_path(): + with pytest.raises(FileNotFoundError): + generate_unused_annotations("aaaaaaaaaaaAAAAAAAAAAAA") diff --git a/package-parser/tests/commands/generate_annotations/unused_functions_list.json b/package-parser/tests/commands/generate_annotations/unused_functions_list.json new file mode 100644 index 000000000..e4e850adc --- /dev/null +++ b/package-parser/tests/commands/generate_annotations/unused_functions_list.json @@ -0,0 +1,6 @@ +[ + "sklearn.base._BaseEstimator.__setstate__", + "sklearn.base.is_regressor", + "sklearn.cluster._agglomerative.linkage_tree", + "sklearn.cluster._kmeans.MiniBatchKMeans.init_size_" +]