Skip to content

Add CI-based group id #48

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 3, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 99 additions & 7 deletions src/iterative_telemetry/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Iterative Telemetry."""

import hashlib
import json
import logging
import os
Expand All @@ -10,7 +10,7 @@
from functools import lru_cache
from pathlib import Path
from threading import Thread
from typing import Any, Callable, Dict, Union
from typing import Any, Callable, Dict, List, Optional, Tuple, Union

import distro
import requests
Expand Down Expand Up @@ -146,15 +146,18 @@ def _runtime_info(self):
"""
Gather information from the environment where DVC runs to fill a report
"""

ci_id = _generate_ci_id()
if ci_id:
group_id, user_id = ci_id
else:
group_id, user_id = None, find_or_create_user_id()
return {
"tool_name": self.tool_name,
"tool_version": self.tool_version,
# "tool_source": self.tool_source, # TODO
"user_id": user_id,
"group_id": group_id,
# "scm_class": _scm_in_use(),
**_system_info(),
"user_id": find_or_create_user_id(),
"group_id": "", # TODO
}


Expand Down Expand Up @@ -189,7 +192,82 @@ def _system_info():

def _generate_id():
"""A randomly generated ID string"""
return str(uuid.uuid4()) # TODO: CI env-based ID
return str(uuid.uuid4())


_ci_id_generators: List[Callable[[], Optional[Tuple[str, str]]]] = []


def ci_id_generator(func):
_ci_id_generators.append(func)
return lru_cache()(func)


@ci_id_generator
def _generate_github_id():
"""group_id = "$GITHUB_SERVER_URL/$(dirname "$GITHUB_REPOSITORY")"
user_id = "$(gh api users/$GITHUB_ACTOR --jq '.name, .login, .id' |
xargs echo)"""
if not os.environ.get("GITHUB_ACTIONS"):
return None

server_url = os.environ.get("GITHUB_SERVER_URL")
repository = os.environ.get("GITHUB_REPOSITORY")
actor = os.environ.get("GITHUB_ACTOR")
group_id = f"{server_url}/{os.path.dirname(repository)}"
try:
user_id = subprocess.check_output(
["gh", "api", f"users/{actor}", "--jq", ".name, .login, .id"]
)
except subprocess.SubprocessError:
return None
return group_id, user_id


@ci_id_generator
def _generate_gitlab_id():
"""group_id = "$CI_SERVER_URL/$CI_PROJECT_ROOT_NAMESPACE"
user_id = "$GITLAB_USER_NAME $GITLAB_USER_LOGIN $GITLAB_USER_ID"""
user_name = os.environ.get("GITLAB_USER_NAME")
if not user_name:
return None
server_url = os.environ.get("CI_SERVER_URL")
root_namespace = os.environ.get("CI_PROJECT_ROOT_NAMESPACE")
user_login = os.environ.get("GITLAB_USER_LOGIN")
user_id = os.environ.get("GITLAB_USER_ID")

group_id = f"{server_url}/{root_namespace}"
user_id = f"{user_name} {user_login} {user_id}"
return group_id, user_id


@ci_id_generator
def _generate_bitbucket_id():
"""group_id = "$BITBUCKET_WORKSPACE"
user_id = "$(git log -1 --pretty=format:'%ae')"""
group_id = os.environ.get("BITBUCKET_WORKSPACE")
if not group_id:
return None
try:
user_id = subprocess.check_output(
["git", "log", "-1", "--pretty=format:'%ae'"]
)
return group_id, user_id
except subprocess.SubprocessError:
return None


@ci_id_generator
def _generate_generic_ci_id():
return None


def _generate_ci_id():
for generator in _ci_id_generators:
res = generator()
if res is not None:
return tuple(map(deterministic, res))
return None


def _read_user_id(config_file: Path):
Expand Down Expand Up @@ -248,3 +326,17 @@ def find_or_create_user_id():
except Timeout:
logger.debug("Failed to acquire %s", lockfile)
return user_id if user_id.lower() != DO_NOT_TRACK_VALUE.lower() else None


def deterministic(data: str) -> uuid.UUID:
namespace = uuid.uuid5(uuid.NAMESPACE_DNS, "iterative.ai")
name = hashlib.scrypt(
password=data.encode(),
salt=namespace.bytes,
n=1 << 16,
r=8,
p=1,
maxmem=128 * 1024**2,
dklen=8,
)
return uuid.uuid5(namespace, name.hex())