|
1 | 1 | """Iterative Telemetry."""
|
2 |
| - |
| 2 | +import hashlib |
3 | 3 | import json
|
4 | 4 | import logging
|
5 | 5 | import os
|
|
10 | 10 | from functools import lru_cache
|
11 | 11 | from pathlib import Path
|
12 | 12 | from threading import Thread
|
13 |
| -from typing import Any, Callable, Dict, Union |
| 13 | +from typing import Any, Callable, Dict, List, Optional, Tuple, Union |
14 | 14 |
|
15 | 15 | import distro
|
16 | 16 | import requests
|
@@ -146,17 +146,21 @@ def _runtime_info(self):
|
146 | 146 | """
|
147 | 147 | Gather information from the environment where DVC runs to fill a report
|
148 | 148 | """
|
| 149 | + ci_id = _generate_ci_id() |
| 150 | + if ci_id: |
| 151 | + group_id, user_id = ci_id |
| 152 | + else: |
| 153 | + group_id, user_id = None, find_or_create_user_id() |
149 | 154 | major, minor, patch, *_ = sys.version_info
|
150 | 155 |
|
151 | 156 | return {
|
152 | 157 | "python_version": {"major": major, "minor": minor, "patch": patch},
|
153 | 158 | "tool_name": self.tool_name,
|
154 | 159 | "tool_version": self.tool_version,
|
155 |
| - # "tool_source": self.tool_source, # TODO |
| 160 | + "user_id": user_id, |
| 161 | + "group_id": group_id, |
156 | 162 | # "scm_class": _scm_in_use(),
|
157 | 163 | **_system_info(),
|
158 |
| - "user_id": find_or_create_user_id(), |
159 |
| - "group_id": "", # TODO |
160 | 164 | }
|
161 | 165 |
|
162 | 166 |
|
@@ -191,7 +195,82 @@ def _system_info():
|
191 | 195 |
|
192 | 196 | def _generate_id():
|
193 | 197 | """A randomly generated ID string"""
|
194 |
| - return str(uuid.uuid4()) # TODO: CI env-based ID |
| 198 | + return str(uuid.uuid4()) |
| 199 | + |
| 200 | + |
| 201 | +_ci_id_generators: List[Callable[[], Optional[Tuple[str, str]]]] = [] |
| 202 | + |
| 203 | + |
| 204 | +def ci_id_generator(func): |
| 205 | + _ci_id_generators.append(func) |
| 206 | + return lru_cache()(func) |
| 207 | + |
| 208 | + |
| 209 | +@ci_id_generator |
| 210 | +def _generate_github_id(): |
| 211 | + """group_id = "$GITHUB_SERVER_URL/$(dirname "$GITHUB_REPOSITORY")" |
| 212 | + user_id = "$(gh api users/$GITHUB_ACTOR --jq '.name, .login, .id' | |
| 213 | + xargs echo)""" |
| 214 | + if not os.environ.get("GITHUB_ACTIONS"): |
| 215 | + return None |
| 216 | + |
| 217 | + server_url = os.environ.get("GITHUB_SERVER_URL") |
| 218 | + repository = os.environ.get("GITHUB_REPOSITORY") |
| 219 | + actor = os.environ.get("GITHUB_ACTOR") |
| 220 | + group_id = f"{server_url}/{os.path.dirname(repository)}" |
| 221 | + try: |
| 222 | + user_id = subprocess.check_output( |
| 223 | + ["gh", "api", f"users/{actor}", "--jq", ".name, .login, .id"] |
| 224 | + ) |
| 225 | + except subprocess.SubprocessError: |
| 226 | + return None |
| 227 | + return group_id, user_id |
| 228 | + |
| 229 | + |
| 230 | +@ci_id_generator |
| 231 | +def _generate_gitlab_id(): |
| 232 | + """group_id = "$CI_SERVER_URL/$CI_PROJECT_ROOT_NAMESPACE" |
| 233 | + user_id = "$GITLAB_USER_NAME $GITLAB_USER_LOGIN $GITLAB_USER_ID""" |
| 234 | + user_name = os.environ.get("GITLAB_USER_NAME") |
| 235 | + if not user_name: |
| 236 | + return None |
| 237 | + server_url = os.environ.get("CI_SERVER_URL") |
| 238 | + root_namespace = os.environ.get("CI_PROJECT_ROOT_NAMESPACE") |
| 239 | + user_login = os.environ.get("GITLAB_USER_LOGIN") |
| 240 | + user_id = os.environ.get("GITLAB_USER_ID") |
| 241 | + |
| 242 | + group_id = f"{server_url}/{root_namespace}" |
| 243 | + user_id = f"{user_name} {user_login} {user_id}" |
| 244 | + return group_id, user_id |
| 245 | + |
| 246 | + |
| 247 | +@ci_id_generator |
| 248 | +def _generate_bitbucket_id(): |
| 249 | + """group_id = "$BITBUCKET_WORKSPACE" |
| 250 | + user_id = "$(git log -1 --pretty=format:'%ae')""" |
| 251 | + group_id = os.environ.get("BITBUCKET_WORKSPACE") |
| 252 | + if not group_id: |
| 253 | + return None |
| 254 | + try: |
| 255 | + user_id = subprocess.check_output( |
| 256 | + ["git", "log", "-1", "--pretty=format:'%ae'"] |
| 257 | + ) |
| 258 | + return group_id, user_id |
| 259 | + except subprocess.SubprocessError: |
| 260 | + return None |
| 261 | + |
| 262 | + |
| 263 | +@ci_id_generator |
| 264 | +def _generate_generic_ci_id(): |
| 265 | + return None |
| 266 | + |
| 267 | + |
| 268 | +def _generate_ci_id(): |
| 269 | + for generator in _ci_id_generators: |
| 270 | + res = generator() |
| 271 | + if res is not None: |
| 272 | + return tuple(map(deterministic, res)) |
| 273 | + return None |
195 | 274 |
|
196 | 275 |
|
197 | 276 | def _read_user_id(config_file: Path):
|
@@ -250,3 +329,17 @@ def find_or_create_user_id():
|
250 | 329 | except Timeout:
|
251 | 330 | logger.debug("Failed to acquire %s", lockfile)
|
252 | 331 | return user_id if user_id.lower() != DO_NOT_TRACK_VALUE.lower() else None
|
| 332 | + |
| 333 | + |
| 334 | +def deterministic(data: str) -> uuid.UUID: |
| 335 | + namespace = uuid.uuid5(uuid.NAMESPACE_DNS, "iterative.ai") |
| 336 | + name = hashlib.scrypt( |
| 337 | + password=data.encode(), |
| 338 | + salt=namespace.bytes, |
| 339 | + n=1 << 16, |
| 340 | + r=8, |
| 341 | + p=1, |
| 342 | + maxmem=128 * 1024**2, |
| 343 | + dklen=8, |
| 344 | + ) |
| 345 | + return uuid.uuid5(namespace, name.hex()) |
0 commit comments