Skip to content

Commit 0990094

Browse files
committed
Add CI-based group id (#48)
* Add CI-based group id closes #12 * lint
1 parent 6969945 commit 0990094

File tree

1 file changed

+99
-6
lines changed

1 file changed

+99
-6
lines changed

src/iterative_telemetry/__init__.py

Lines changed: 99 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""Iterative Telemetry."""
2-
2+
import hashlib
33
import json
44
import logging
55
import os
@@ -10,7 +10,7 @@
1010
from functools import lru_cache
1111
from pathlib import Path
1212
from threading import Thread
13-
from typing import Any, Callable, Dict, Union
13+
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
1414

1515
import distro
1616
import requests
@@ -146,17 +146,21 @@ def _runtime_info(self):
146146
"""
147147
Gather information from the environment where DVC runs to fill a report
148148
"""
149+
ci_id = _generate_ci_id()
150+
if ci_id:
151+
group_id, user_id = ci_id
152+
else:
153+
group_id, user_id = None, find_or_create_user_id()
149154
major, minor, patch, *_ = sys.version_info
150155

151156
return {
152157
"python_version": {"major": major, "minor": minor, "patch": patch},
153158
"tool_name": self.tool_name,
154159
"tool_version": self.tool_version,
155-
# "tool_source": self.tool_source, # TODO
160+
"user_id": user_id,
161+
"group_id": group_id,
156162
# "scm_class": _scm_in_use(),
157163
**_system_info(),
158-
"user_id": find_or_create_user_id(),
159-
"group_id": "", # TODO
160164
}
161165

162166

@@ -191,7 +195,82 @@ def _system_info():
191195

192196
def _generate_id():
193197
"""A randomly generated ID string"""
194-
return str(uuid.uuid4()) # TODO: CI env-based ID
198+
return str(uuid.uuid4())
199+
200+
201+
_ci_id_generators: List[Callable[[], Optional[Tuple[str, str]]]] = []
202+
203+
204+
def ci_id_generator(func):
205+
_ci_id_generators.append(func)
206+
return lru_cache()(func)
207+
208+
209+
@ci_id_generator
210+
def _generate_github_id():
211+
"""group_id = "$GITHUB_SERVER_URL/$(dirname "$GITHUB_REPOSITORY")"
212+
user_id = "$(gh api users/$GITHUB_ACTOR --jq '.name, .login, .id' |
213+
xargs echo)"""
214+
if not os.environ.get("GITHUB_ACTIONS"):
215+
return None
216+
217+
server_url = os.environ.get("GITHUB_SERVER_URL")
218+
repository = os.environ.get("GITHUB_REPOSITORY")
219+
actor = os.environ.get("GITHUB_ACTOR")
220+
group_id = f"{server_url}/{os.path.dirname(repository)}"
221+
try:
222+
user_id = subprocess.check_output(
223+
["gh", "api", f"users/{actor}", "--jq", ".name, .login, .id"]
224+
)
225+
except subprocess.SubprocessError:
226+
return None
227+
return group_id, user_id
228+
229+
230+
@ci_id_generator
231+
def _generate_gitlab_id():
232+
"""group_id = "$CI_SERVER_URL/$CI_PROJECT_ROOT_NAMESPACE"
233+
user_id = "$GITLAB_USER_NAME $GITLAB_USER_LOGIN $GITLAB_USER_ID"""
234+
user_name = os.environ.get("GITLAB_USER_NAME")
235+
if not user_name:
236+
return None
237+
server_url = os.environ.get("CI_SERVER_URL")
238+
root_namespace = os.environ.get("CI_PROJECT_ROOT_NAMESPACE")
239+
user_login = os.environ.get("GITLAB_USER_LOGIN")
240+
user_id = os.environ.get("GITLAB_USER_ID")
241+
242+
group_id = f"{server_url}/{root_namespace}"
243+
user_id = f"{user_name} {user_login} {user_id}"
244+
return group_id, user_id
245+
246+
247+
@ci_id_generator
248+
def _generate_bitbucket_id():
249+
"""group_id = "$BITBUCKET_WORKSPACE"
250+
user_id = "$(git log -1 --pretty=format:'%ae')"""
251+
group_id = os.environ.get("BITBUCKET_WORKSPACE")
252+
if not group_id:
253+
return None
254+
try:
255+
user_id = subprocess.check_output(
256+
["git", "log", "-1", "--pretty=format:'%ae'"]
257+
)
258+
return group_id, user_id
259+
except subprocess.SubprocessError:
260+
return None
261+
262+
263+
@ci_id_generator
264+
def _generate_generic_ci_id():
265+
return None
266+
267+
268+
def _generate_ci_id():
269+
for generator in _ci_id_generators:
270+
res = generator()
271+
if res is not None:
272+
return tuple(map(deterministic, res))
273+
return None
195274

196275

197276
def _read_user_id(config_file: Path):
@@ -250,3 +329,17 @@ def find_or_create_user_id():
250329
except Timeout:
251330
logger.debug("Failed to acquire %s", lockfile)
252331
return user_id if user_id.lower() != DO_NOT_TRACK_VALUE.lower() else None
332+
333+
334+
def deterministic(data: str) -> uuid.UUID:
335+
namespace = uuid.uuid5(uuid.NAMESPACE_DNS, "iterative.ai")
336+
name = hashlib.scrypt(
337+
password=data.encode(),
338+
salt=namespace.bytes,
339+
n=1 << 16,
340+
r=8,
341+
p=1,
342+
maxmem=128 * 1024**2,
343+
dklen=8,
344+
)
345+
return uuid.uuid5(namespace, name.hex())

0 commit comments

Comments
 (0)