Skip to content

Commit fc022d1

Browse files
authored
Add CI-based group id (#48)
* Add CI-based group id closes #12 * lint
1 parent c5ac5ee commit fc022d1

File tree

1 file changed

+99
-7
lines changed

1 file changed

+99
-7
lines changed

src/iterative_telemetry/__init__.py

Lines changed: 99 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""Iterative Telemetry."""
2-
2+
import hashlib
33
import json
44
import logging
55
import os
@@ -10,7 +10,7 @@
1010
from functools import lru_cache
1111
from pathlib import Path
1212
from threading import Thread
13-
from typing import Any, Callable, Dict, Union
13+
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
1414

1515
import distro
1616
import requests
@@ -146,15 +146,18 @@ def _runtime_info(self):
146146
"""
147147
Gather information from the environment where DVC runs to fill a report
148148
"""
149-
149+
ci_id = _generate_ci_id()
150+
if ci_id:
151+
group_id, user_id = ci_id
152+
else:
153+
group_id, user_id = None, find_or_create_user_id()
150154
return {
151155
"tool_name": self.tool_name,
152156
"tool_version": self.tool_version,
153-
# "tool_source": self.tool_source, # TODO
157+
"user_id": user_id,
158+
"group_id": group_id,
154159
# "scm_class": _scm_in_use(),
155160
**_system_info(),
156-
"user_id": find_or_create_user_id(),
157-
"group_id": "", # TODO
158161
}
159162

160163

@@ -189,7 +192,82 @@ def _system_info():
189192

190193
def _generate_id():
191194
"""A randomly generated ID string"""
192-
return str(uuid.uuid4()) # TODO: CI env-based ID
195+
return str(uuid.uuid4())
196+
197+
198+
_ci_id_generators: List[Callable[[], Optional[Tuple[str, str]]]] = []
199+
200+
201+
def ci_id_generator(func):
202+
_ci_id_generators.append(func)
203+
return lru_cache()(func)
204+
205+
206+
@ci_id_generator
207+
def _generate_github_id():
208+
"""group_id = "$GITHUB_SERVER_URL/$(dirname "$GITHUB_REPOSITORY")"
209+
user_id = "$(gh api users/$GITHUB_ACTOR --jq '.name, .login, .id' |
210+
xargs echo)"""
211+
if not os.environ.get("GITHUB_ACTIONS"):
212+
return None
213+
214+
server_url = os.environ.get("GITHUB_SERVER_URL")
215+
repository = os.environ.get("GITHUB_REPOSITORY")
216+
actor = os.environ.get("GITHUB_ACTOR")
217+
group_id = f"{server_url}/{os.path.dirname(repository)}"
218+
try:
219+
user_id = subprocess.check_output(
220+
["gh", "api", f"users/{actor}", "--jq", ".name, .login, .id"]
221+
)
222+
except subprocess.SubprocessError:
223+
return None
224+
return group_id, user_id
225+
226+
227+
@ci_id_generator
228+
def _generate_gitlab_id():
229+
"""group_id = "$CI_SERVER_URL/$CI_PROJECT_ROOT_NAMESPACE"
230+
user_id = "$GITLAB_USER_NAME $GITLAB_USER_LOGIN $GITLAB_USER_ID"""
231+
user_name = os.environ.get("GITLAB_USER_NAME")
232+
if not user_name:
233+
return None
234+
server_url = os.environ.get("CI_SERVER_URL")
235+
root_namespace = os.environ.get("CI_PROJECT_ROOT_NAMESPACE")
236+
user_login = os.environ.get("GITLAB_USER_LOGIN")
237+
user_id = os.environ.get("GITLAB_USER_ID")
238+
239+
group_id = f"{server_url}/{root_namespace}"
240+
user_id = f"{user_name} {user_login} {user_id}"
241+
return group_id, user_id
242+
243+
244+
@ci_id_generator
245+
def _generate_bitbucket_id():
246+
"""group_id = "$BITBUCKET_WORKSPACE"
247+
user_id = "$(git log -1 --pretty=format:'%ae')"""
248+
group_id = os.environ.get("BITBUCKET_WORKSPACE")
249+
if not group_id:
250+
return None
251+
try:
252+
user_id = subprocess.check_output(
253+
["git", "log", "-1", "--pretty=format:'%ae'"]
254+
)
255+
return group_id, user_id
256+
except subprocess.SubprocessError:
257+
return None
258+
259+
260+
@ci_id_generator
261+
def _generate_generic_ci_id():
262+
return None
263+
264+
265+
def _generate_ci_id():
266+
for generator in _ci_id_generators:
267+
res = generator()
268+
if res is not None:
269+
return tuple(map(deterministic, res))
270+
return None
193271

194272

195273
def _read_user_id(config_file: Path):
@@ -248,3 +326,17 @@ def find_or_create_user_id():
248326
except Timeout:
249327
logger.debug("Failed to acquire %s", lockfile)
250328
return user_id if user_id.lower() != DO_NOT_TRACK_VALUE.lower() else None
329+
330+
331+
def deterministic(data: str) -> uuid.UUID:
332+
namespace = uuid.uuid5(uuid.NAMESPACE_DNS, "iterative.ai")
333+
name = hashlib.scrypt(
334+
password=data.encode(),
335+
salt=namespace.bytes,
336+
n=1 << 16,
337+
r=8,
338+
p=1,
339+
maxmem=128 * 1024**2,
340+
dklen=8,
341+
)
342+
return uuid.uuid5(namespace, name.hex())

0 commit comments

Comments
 (0)