Skip to content

Commit 0ec0f63

Browse files
move old packages script to repo and add download stats (#36390)
* move old packages script to repo and add download stats * add verify_status_by * mention env var
1 parent dd4f6ad commit 0ec0f63

File tree

2 files changed

+211
-0
lines changed

2 files changed

+211
-0
lines changed

scripts/old_packages/README.md

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Output old packages script
2+
3+
This script can be run to output old packages in the repo and their download stats to csv.
4+
5+
It defaults to reporting packages that have not seen a release in the last 2 years.
6+
7+
To omit an older package from the script output that we don't want to deprecate, add the `verify_status_by` key to the package's `pyproject.toml`. The value should be a date in the format `YYYY-MM-DD` and indicate the future date by which the package should be re-evaluated for deprecation. If a package should be kept and not re-evaluated, then set the date to 3000-01-01.
8+
9+
```toml
10+
[tool.azure-sdk-build]
11+
verify_status_by = 2025-07-09
12+
```
13+
14+
## Requirements
15+
16+
- requests
17+
- PePy API key ([PePy API](https://www.pepy.tech/pepy-api))
18+
19+
Set the `PEPY_API_KEY` environment variable to your PePy API key.
20+
21+
## Usage
22+
23+
1. Defaults to packages that have not released in the past 2 years. Omits already Inactive released packages.
24+
25+
```bash
26+
python output_old_packages.py
27+
```
28+
29+
2. Specify the number of years since last release (e.g. 1 year).
30+
31+
```bash
32+
python output_old_packages.py -y 1
33+
```
34+
35+
3. Run unfiltered (don't omit already Inactive packages or packages which have a `verify_status_by` in the future.)
36+
37+
```bash
38+
python output_old_packages.py -f
39+
```
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
# --------------------------------------------------------------------------------------------
2+
# Copyright (c) Microsoft Corporation. All rights reserved.
3+
# Licensed under the MIT License. See License.txt in the project root for license information.
4+
# --------------------------------------------------------------------------------------------
5+
6+
import os
7+
import typing
8+
import csv
9+
import argparse
10+
import pathlib
11+
import glob
12+
import datetime
13+
14+
import requests
15+
16+
from ci_tools.parsing import get_config_setting
17+
from pypi_tools.pypi import PyPIClient
18+
19+
INACTIVE_CLASSIFIER = "Development Status :: 7 - Inactive"
20+
21+
22+
def get_newer(current: datetime.date, contender: datetime.date) -> datetime.date:
23+
if current > contender:
24+
return current
25+
return contender
26+
27+
28+
def write_csv(packages: typing.Mapping[str, str]) -> None:
29+
if not packages:
30+
print("No packages found.")
31+
return
32+
33+
with open("./old_packages.csv", mode="w", newline="", encoding="utf-8") as file:
34+
writer = csv.writer(file)
35+
36+
column_names = [
37+
"Package",
38+
"Last released version",
39+
"Last released date",
40+
"Status",
41+
"Downloads (last 90 days)"
42+
]
43+
writer.writerow(column_names)
44+
45+
for package, info in packages.items():
46+
writer.writerow([package, info["version"], info["date"], info["status"], info["downloads_90d"]])
47+
48+
49+
def get_latest_release(
50+
project: typing.Mapping[str, typing.Any]
51+
) -> typing.Mapping[str, str]:
52+
current = datetime.datetime(1970, 1, 1).date()
53+
54+
for version, release in project["releases"].items():
55+
if not release:
56+
# somehow we release without whl/sdist?
57+
continue
58+
59+
release_date = datetime.datetime.strptime(
60+
release[0]["upload_time"], "%Y-%m-%dT%H:%M:%S"
61+
).date()
62+
if get_newer(current, release_date) == release_date:
63+
latest = {
64+
"version": version,
65+
"date": release_date,
66+
"status": project["info"]["classifiers"][0],
67+
}
68+
current = release_date
69+
return latest
70+
71+
72+
def apply_filters(pkg_path: str, release: typing.Mapping[str, str]) -> bool:
73+
"""Filter out packages that are marked as Inactive or have a verify_status_by date in the future.
74+
If the package has no verify_status_by date, it is considered active.
75+
"""
76+
if release["status"] == INACTIVE_CLASSIFIER:
77+
return False
78+
79+
verify_status_by = get_config_setting(pkg_path, "verify_status_by", default=None)
80+
if verify_status_by is None:
81+
return True
82+
83+
today = datetime.datetime.today().date()
84+
if get_newer(today, verify_status_by) == verify_status_by:
85+
return False
86+
87+
return True
88+
89+
90+
class PepyClient:
91+
"""Client to interact with the Pepy API to fetch package download data."""
92+
93+
def __init__(self, api_key: str):
94+
"""Initialize the client with your API key - https://www.pepy.tech/pepy-api (register first)"""
95+
self.api_key = api_key
96+
97+
def get_downloads_90d(self, package: str) -> int:
98+
"""Get the total downloads in the last 90 days for a given package."""
99+
url = f"https://api.pepy.tech/api/v2/projects/{package}"
100+
headers = {"x-api-key": self.api_key}
101+
try:
102+
response = requests.get(url, headers=headers)
103+
response.raise_for_status()
104+
downloads_90d = response.json().get("downloads", {})
105+
except requests.RequestException as e:
106+
print(f"Request failed: {e}")
107+
return -1
108+
109+
total_downloads_90d = sum(
110+
downloads
111+
for versions in downloads_90d.values()
112+
for downloads in versions.values()
113+
)
114+
115+
return total_downloads_90d
116+
117+
118+
if __name__ == "__main__":
119+
parser = argparse.ArgumentParser(description="Output old packages in the repo.")
120+
121+
parser.add_argument(
122+
"-y",
123+
"--years",
124+
dest="years",
125+
help="How many years since last release. Defaults to 2.",
126+
type=int,
127+
default=2,
128+
)
129+
130+
parser.add_argument(
131+
"-f",
132+
"--disable-filter",
133+
dest="filter",
134+
help="Disable the filter which removes Inactive packages and ones with verify_status_by dates in the future.",
135+
action="store_false",
136+
)
137+
138+
args = parser.parse_args()
139+
sdk_path = pathlib.Path(__file__).parent.parent.parent / "sdk"
140+
service_directories = glob.glob(f"{sdk_path}/*/", recursive=True)
141+
pypi_client = PyPIClient()
142+
pepy_client = PepyClient(os.environ["PEPY_API_KEY"])
143+
old_packages = {}
144+
145+
years = args.years
146+
timepoint = datetime.datetime.today().date() - datetime.timedelta(days=365 * years)
147+
148+
for service in service_directories:
149+
package_paths = glob.glob(f"{service}*/", recursive=True)
150+
for package_path in package_paths:
151+
package_name = pathlib.Path(package_path).name
152+
if not package_name.startswith("azure"):
153+
continue
154+
155+
pypi_project = pypi_client.project(package_name)
156+
if pypi_project.get("releases") is None:
157+
# not yet released
158+
continue
159+
160+
latest_release = get_latest_release(pypi_project)
161+
162+
if (
163+
get_newer(latest_release["date"], timepoint) == timepoint
164+
):
165+
add_package = not args.filter or apply_filters(package_path, latest_release)
166+
if add_package:
167+
old_packages[package_name] = latest_release
168+
old_packages[package_name]["downloads_90d"] = (
169+
pepy_client.get_downloads_90d(package_name)
170+
)
171+
172+
write_csv(old_packages)

0 commit comments

Comments
 (0)