Skip to content

Commit 79a6148

Browse files
committed
Add CI-based group id (#48)
* Add CI-based group id closes #12 * lint
1 parent a5768bc commit 79a6148

File tree

1 file changed

+99
-6
lines changed

1 file changed

+99
-6
lines changed

src/iterative_telemetry/__init__.py

Lines changed: 99 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Iterative Telemetry."""
22
import contextlib
33
import dataclasses
4+
import hashlib
45
import json
56
import logging
67
import os
@@ -11,7 +12,7 @@
1112
from functools import lru_cache, wraps
1213
from pathlib import Path
1314
from threading import Thread
14-
from typing import Any, Callable, Dict, Iterator, Optional, Union
15+
from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Union
1516

1617
import distro
1718
import requests
@@ -206,15 +207,18 @@ def _runtime_info(self):
206207
"""
207208
Gather information from the environment where DVC runs to fill a report
208209
"""
209-
210+
ci_id = _generate_ci_id()
211+
if ci_id:
212+
group_id, user_id = ci_id
213+
else:
214+
group_id, user_id = None, find_or_create_user_id()
210215
return {
211216
"tool_name": self.tool_name,
212217
"tool_version": self.tool_version,
213-
# "tool_source": self.tool_source, # TODO
218+
"user_id": user_id,
219+
"group_id": group_id,
214220
# "scm_class": _scm_in_use(),
215221
**_system_info(),
216-
"user_id": find_or_create_user_id(),
217-
"group_id": "", # TODO
218222
}
219223

220224

@@ -249,7 +253,82 @@ def _system_info():
249253

250254
def _generate_id():
251255
"""A randomly generated ID string"""
252-
return str(uuid.uuid4()) # TODO: CI env-based ID
256+
return str(uuid.uuid4())
257+
258+
259+
_ci_id_generators: List[Callable[[], Optional[Tuple[str, str]]]] = []
260+
261+
262+
def ci_id_generator(func):
263+
_ci_id_generators.append(func)
264+
return lru_cache()(func)
265+
266+
267+
@ci_id_generator
268+
def _generate_github_id():
269+
"""group_id = "$GITHUB_SERVER_URL/$(dirname "$GITHUB_REPOSITORY")"
270+
user_id = "$(gh api users/$GITHUB_ACTOR --jq '.name, .login, .id' |
271+
xargs echo)"""
272+
if not os.environ.get("GITHUB_ACTIONS"):
273+
return None
274+
275+
server_url = os.environ.get("GITHUB_SERVER_URL")
276+
repository = os.environ.get("GITHUB_REPOSITORY")
277+
actor = os.environ.get("GITHUB_ACTOR")
278+
group_id = f"{server_url}/{os.path.dirname(repository)}"
279+
try:
280+
user_id = subprocess.check_output(
281+
["gh", "api", f"users/{actor}", "--jq", ".name, .login, .id"]
282+
)
283+
except subprocess.SubprocessError:
284+
return None
285+
return group_id, user_id
286+
287+
288+
@ci_id_generator
289+
def _generate_gitlab_id():
290+
"""group_id = "$CI_SERVER_URL/$CI_PROJECT_ROOT_NAMESPACE"
291+
user_id = "$GITLAB_USER_NAME $GITLAB_USER_LOGIN $GITLAB_USER_ID"""
292+
user_name = os.environ.get("GITLAB_USER_NAME")
293+
if not user_name:
294+
return None
295+
server_url = os.environ.get("CI_SERVER_URL")
296+
root_namespace = os.environ.get("CI_PROJECT_ROOT_NAMESPACE")
297+
user_login = os.environ.get("GITLAB_USER_LOGIN")
298+
user_id = os.environ.get("GITLAB_USER_ID")
299+
300+
group_id = f"{server_url}/{root_namespace}"
301+
user_id = f"{user_name} {user_login} {user_id}"
302+
return group_id, user_id
303+
304+
305+
@ci_id_generator
306+
def _generate_bitbucket_id():
307+
"""group_id = "$BITBUCKET_WORKSPACE"
308+
user_id = "$(git log -1 --pretty=format:'%ae')"""
309+
group_id = os.environ.get("BITBUCKET_WORKSPACE")
310+
if not group_id:
311+
return None
312+
try:
313+
user_id = subprocess.check_output(
314+
["git", "log", "-1", "--pretty=format:'%ae'"]
315+
)
316+
return group_id, user_id
317+
except subprocess.SubprocessError:
318+
return None
319+
320+
321+
@ci_id_generator
322+
def _generate_generic_ci_id():
323+
return None
324+
325+
326+
def _generate_ci_id():
327+
for generator in _ci_id_generators:
328+
res = generator()
329+
if res is not None:
330+
return tuple(map(deterministic, res))
331+
return None
253332

254333

255334
def _read_user_id(config_file: Path):
@@ -308,3 +387,17 @@ def find_or_create_user_id():
308387
except Timeout:
309388
logger.debug("Failed to acquire %s", lockfile)
310389
return user_id if user_id.lower() != DO_NOT_TRACK_VALUE.lower() else None
390+
391+
392+
def deterministic(data: str) -> uuid.UUID:
393+
namespace = uuid.uuid5(uuid.NAMESPACE_DNS, "iterative.ai")
394+
name = hashlib.scrypt(
395+
password=data.encode(),
396+
salt=namespace.bytes,
397+
n=1 << 16,
398+
r=8,
399+
p=1,
400+
maxmem=128 * 1024**2,
401+
dklen=8,
402+
)
403+
return uuid.uuid5(namespace, name.hex())

0 commit comments

Comments
 (0)