|
1 | 1 | """Iterative Telemetry."""
|
2 | 2 | import contextlib
|
3 | 3 | import dataclasses
|
| 4 | +import hashlib |
4 | 5 | import json
|
5 | 6 | import logging
|
6 | 7 | import os
|
|
11 | 12 | from functools import lru_cache, wraps
|
12 | 13 | from pathlib import Path
|
13 | 14 | from threading import Thread
|
14 |
| -from typing import Any, Callable, Dict, Iterator, Optional, Union |
| 15 | +from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Union |
15 | 16 |
|
16 | 17 | import distro
|
17 | 18 | import requests
|
@@ -206,15 +207,18 @@ def _runtime_info(self):
|
206 | 207 | """
|
207 | 208 | Gather information from the environment where DVC runs to fill a report
|
208 | 209 | """
|
209 |
| - |
| 210 | + ci_id = _generate_ci_id() |
| 211 | + if ci_id: |
| 212 | + group_id, user_id = ci_id |
| 213 | + else: |
| 214 | + group_id, user_id = None, find_or_create_user_id() |
210 | 215 | return {
|
211 | 216 | "tool_name": self.tool_name,
|
212 | 217 | "tool_version": self.tool_version,
|
213 |
| - # "tool_source": self.tool_source, # TODO |
| 218 | + "user_id": user_id, |
| 219 | + "group_id": group_id, |
214 | 220 | # "scm_class": _scm_in_use(),
|
215 | 221 | **_system_info(),
|
216 |
| - "user_id": find_or_create_user_id(), |
217 |
| - "group_id": "", # TODO |
218 | 222 | }
|
219 | 223 |
|
220 | 224 |
|
@@ -249,7 +253,82 @@ def _system_info():
|
249 | 253 |
|
250 | 254 | def _generate_id():
|
251 | 255 | """A randomly generated ID string"""
|
252 |
| - return str(uuid.uuid4()) # TODO: CI env-based ID |
| 256 | + return str(uuid.uuid4()) |
| 257 | + |
| 258 | + |
| 259 | +_ci_id_generators: List[Callable[[], Optional[Tuple[str, str]]]] = [] |
| 260 | + |
| 261 | + |
| 262 | +def ci_id_generator(func): |
| 263 | + _ci_id_generators.append(func) |
| 264 | + return lru_cache()(func) |
| 265 | + |
| 266 | + |
| 267 | +@ci_id_generator |
| 268 | +def _generate_github_id(): |
| 269 | + """group_id = "$GITHUB_SERVER_URL/$(dirname "$GITHUB_REPOSITORY")" |
| 270 | + user_id = "$(gh api users/$GITHUB_ACTOR --jq '.name, .login, .id' | |
| 271 | + xargs echo)""" |
| 272 | + if not os.environ.get("GITHUB_ACTIONS"): |
| 273 | + return None |
| 274 | + |
| 275 | + server_url = os.environ.get("GITHUB_SERVER_URL") |
| 276 | + repository = os.environ.get("GITHUB_REPOSITORY") |
| 277 | + actor = os.environ.get("GITHUB_ACTOR") |
| 278 | + group_id = f"{server_url}/{os.path.dirname(repository)}" |
| 279 | + try: |
| 280 | + user_id = subprocess.check_output( |
| 281 | + ["gh", "api", f"users/{actor}", "--jq", ".name, .login, .id"] |
| 282 | + ) |
| 283 | + except subprocess.SubprocessError: |
| 284 | + return None |
| 285 | + return group_id, user_id |
| 286 | + |
| 287 | + |
| 288 | +@ci_id_generator |
| 289 | +def _generate_gitlab_id(): |
| 290 | + """group_id = "$CI_SERVER_URL/$CI_PROJECT_ROOT_NAMESPACE" |
| 291 | + user_id = "$GITLAB_USER_NAME $GITLAB_USER_LOGIN $GITLAB_USER_ID""" |
| 292 | + user_name = os.environ.get("GITLAB_USER_NAME") |
| 293 | + if not user_name: |
| 294 | + return None |
| 295 | + server_url = os.environ.get("CI_SERVER_URL") |
| 296 | + root_namespace = os.environ.get("CI_PROJECT_ROOT_NAMESPACE") |
| 297 | + user_login = os.environ.get("GITLAB_USER_LOGIN") |
| 298 | + user_id = os.environ.get("GITLAB_USER_ID") |
| 299 | + |
| 300 | + group_id = f"{server_url}/{root_namespace}" |
| 301 | + user_id = f"{user_name} {user_login} {user_id}" |
| 302 | + return group_id, user_id |
| 303 | + |
| 304 | + |
| 305 | +@ci_id_generator |
| 306 | +def _generate_bitbucket_id(): |
| 307 | + """group_id = "$BITBUCKET_WORKSPACE" |
| 308 | + user_id = "$(git log -1 --pretty=format:'%ae')""" |
| 309 | + group_id = os.environ.get("BITBUCKET_WORKSPACE") |
| 310 | + if not group_id: |
| 311 | + return None |
| 312 | + try: |
| 313 | + user_id = subprocess.check_output( |
| 314 | + ["git", "log", "-1", "--pretty=format:'%ae'"] |
| 315 | + ) |
| 316 | + return group_id, user_id |
| 317 | + except subprocess.SubprocessError: |
| 318 | + return None |
| 319 | + |
| 320 | + |
| 321 | +@ci_id_generator |
| 322 | +def _generate_generic_ci_id(): |
| 323 | + return None |
| 324 | + |
| 325 | + |
| 326 | +def _generate_ci_id(): |
| 327 | + for generator in _ci_id_generators: |
| 328 | + res = generator() |
| 329 | + if res is not None: |
| 330 | + return tuple(map(deterministic, res)) |
| 331 | + return None |
253 | 332 |
|
254 | 333 |
|
255 | 334 | def _read_user_id(config_file: Path):
|
@@ -308,3 +387,17 @@ def find_or_create_user_id():
|
308 | 387 | except Timeout:
|
309 | 388 | logger.debug("Failed to acquire %s", lockfile)
|
310 | 389 | return user_id if user_id.lower() != DO_NOT_TRACK_VALUE.lower() else None
|
| 390 | + |
| 391 | + |
| 392 | +def deterministic(data: str) -> uuid.UUID: |
| 393 | + namespace = uuid.uuid5(uuid.NAMESPACE_DNS, "iterative.ai") |
| 394 | + name = hashlib.scrypt( |
| 395 | + password=data.encode(), |
| 396 | + salt=namespace.bytes, |
| 397 | + n=1 << 16, |
| 398 | + r=8, |
| 399 | + p=1, |
| 400 | + maxmem=128 * 1024**2, |
| 401 | + dklen=8, |
| 402 | + ) |
| 403 | + return uuid.uuid5(namespace, name.hex()) |
0 commit comments