ydb-platform
diff --git a/‎ydb/requirements/collect_reqs.py
Lines changed: 305 additions & 0 deletions b/‎ydb/requirements/collect_reqs.py
Lines changed: 305 additions & 0 deletions
diff --git a/‎ydb/requirements/olap/analytic_queries/req.md
Lines changed: 51 additions & 0 deletions b/‎ydb/requirements/olap/analytic_queries/req.md
Lines changed: 51 additions & 0 deletions
diff --git a/‎ydb/requirements/olap/analytic_queries/summary.md
Lines changed: 6 additions & 0 deletions b/‎ydb/requirements/olap/analytic_queries/summary.md
Lines changed: 6 additions & 0 deletions
@@ -0,0 +1,305 @@
+import os
+import re
+import requests
+
+GITHUB_API_URL = "https://api.github.com"
+GITHUB_GRAPHQL_URL = "https://api.github.com/graphql"
+
+def parse_requirements(file_path, github_token):
+    requirements = []
+    current_req = None
+    current_section = ""
+    current_subsection = ""
+
+    with open(file_path, 'r', encoding='utf-8') as file:
+        lines = file.readlines()
+
+    for line in lines:
+        # Detect section headings
+        section_match = re.match(r"^##\s(.+)", line)
+        if section_match:
+            current_section = section_match.group(1)
+            continue
+
+        subsection_match = re.match(r"^###\s(.+)", line)
+        if subsection_match:
+            current_subsection = subsection_match.group(1)
+            continue
+
+        # Identify a GitHub issue
+        issue_match = re.match(r"- #(\d+)", line)
+        if issue_match:
+            issue_number = issue_match.group(1)
+            issue_data = fetch_github_issue(issue_number, github_token)
+            if issue_data:
+                if current_req:
+                    requirements.append(current_req)
+                issue_id = issue_data.get('node_id')
+                sub_issues = fetch_sub_issues_by_id(issue_id, github_token) if issue_id else []
+                if issue_data.get('sub_issues_summary'):
+                    percent_completed = issue_data['sub_issues_summary']['percent_completed']
+                    total = issue_data['sub_issues_summary']['total']
+                    completed = issue_data['sub_issues_summary']['completed']
+                    if issue_data['sub_issues_summary']['percent_completed'] == 100:
+                        status = 'DONE'
+                        color = f'rgb(249%2C%20239%2C%20254%2C1)'
+                    elif 0 < issue_data['sub_issues_summary']['percent_completed'] < 100 :
+                        status = 'PROGRESS'
+                        color = f'rgb(254%2C%20248%2C%20202%2C1)'
+                    else:
+                        status = 'TO%20DO'
+                        color = f'rgb(224%2C%20250%2C%20227%2C1)'
+                    issue_data['badge'] = f"![{status}](https://img.shields.io/badge/{status}-{completed}%2F{total}:{percent_completed}%25-{color}?style=for-the-badge&logo=database&labelColor=grey)"
+                current_req = {
+                    'id': f"ISSUE-{issue_number}",
+                    'title': issue_data['title'],  # Title of the issue
+                    'description': issue_data['body'],
+                    'url': issue_data['html_url'],
+                    'body': issue_data['body'],
+                    'cases': sub_issues,  # Sub-issues as cases
+                    'section': current_section,
+                    'subsection': current_subsection,
+                    'sub_issues_summary': issue_data.get('sub_issues_summary'),
+                    'badge': issue_data.get('badge')
+                }
+            continue
+
+        # Identify a new requirement
+        req_match = re.match(r"- \*\*(REQ-[A-Z]+-\d+)\*\*: (.+)", line)
+        if req_match:
+            if current_req:
+                requirements.append(current_req)
+            current_req = {
+                'id': req_match.group(1),
+                'title': req_match.group(2),
+                'cases': [],
+                'issues': [],
+                'section': current_section,
+                'subsection': current_subsection
+            }
+        # Identify requirement description
+        #  - **Description**: 
+        req_description_match = re.match(r"\s+- \*\*Description\*\*: (.+)", line)
+        if req_description_match:
+            current_req['description'] = req_description_match.group(1)
+
+        # Identify requirement issues
+        issue_match = re.match(r"\s+- ISSUE:(.+):(.+)", line)
+        if issue_match and current_req:
+            issue_id = issue_match.group(2).split('/')[-1]
+            issue_desc = issue_match.group(1)
+            current_req['issues'].append({
+                'id': issue_id,
+                'description': issue_desc,
+                'bage': f"[![GitHub issue/pull request detail](https://img.shields.io/github/issues/detail/state/ydb-platform/ydb/{issue_id})](https://github.com/ydb-platform/ydb/issues/{issue_id})" })
+
+        # Identify cases with optional paths
+        case_match = re.match(r"\s+- Case (\d+\.\d+): \[(.+)\]\((.+)\) - (.+)", line)
+        if case_match and current_req:
+            current_case = {
+                'case_id': f"{current_req['id']}-{case_match.group(1)}",
+                'name': case_match.group(2),
+                'description': case_match.group(4),
+                'path': case_match.group(3),
+                'issues': [],
+                'status': "Pending"
+            }
+            current_req['cases'].append(current_case)
+
+        # Identify case issues
+        case_issue_match = re.match(r"\s{6}- ISSUE:(.+):(.+)", line)
+        if case_issue_match and current_case:
+            issue_id = issue_match.group(2).split('/')[-1]
+            issue_desc = issue_match.group(1)
+            current_req['issues'].append({
+                'id': issue_id,
+                'description': issue_desc,
+                'bage': f"[![GitHub issue/pull request detail](https://img.shields.io/github/issues/detail/state/ydb-platform/ydb/{issue_id})](https://github.com/ydb-platform/ydb/issues/{issue_id})" })
+
+
+    if current_req:
+        requirements.append(current_req)
+
+    return requirements
+
+def fetch_github_issue(issue_number, github_token):
+    headers = {"Authorization": f"token {github_token}"}
+    response = requests.get(f"{GITHUB_API_URL}/repos/ydb-platform/ydb/issues/{issue_number}", headers=headers)
+    
+    if response.status_code == 200:
+        return response.json()
+    else:
+        print(f"Failed to fetch issue #{issue_number}: {response.status_code} {response.text}")
+        return None
+
+def fetch_sub_issues_by_id(issue_id, github_token):
+    query = """
+    query($issueId: ID!, $after: String) {
+      node(id: $issueId) {
+        ... on Issue {
+          subIssues(first: 100, after: $after) {
+            nodes {
+              title
+              number
+              url
+              id
+              body
+            }
+            pageInfo { 
+              hasNextPage 
+              endCursor 
+            }
+          }
+        }
+      }
+    }
+    """
+    
+    variables = {
+        "issueId": issue_id,
+        "after": None
+    }
+    
+    headers = {
+        "Authorization": f"Bearer {github_token}",
+        "GraphQL-Features": "sub_issues"
+    }
+    
+    sub_issues = []
+    
+    while True:
+        response = requests.post(GITHUB_GRAPHQL_URL, json={"query": query, "variables": variables}, headers=headers)
+        
+        if response.status_code == 200:
+            data = response.json()
+            sub_issues_data = data['data']['node']['subIssues']
+            nodes = sub_issues_data['nodes']
+            for node in nodes:
+                sub_issues.append({
+                    'case_id': f"#{node['number']}",
+                    'name': node['title'],
+                    'description': node['body'].split('\n')[0],
+                    'path': node['url'],
+                    'issue': node['number'],
+                    'status': "Pending",
+                    'bage': f"[![GitHub issue/pull request detail](https://img.shields.io/github/issues/detail/state/ydb-platform/ydb/{node['number']})](https://github.com/ydb-platform/ydb/issues/{node['number']})"
+                })
+
+            if not sub_issues_data['pageInfo']['hasNextPage']:
+                break
+            variables['after'] = sub_issues_data['pageInfo']['endCursor']
+        else:
+            print(f"GraphQL query failed: {response.status_code} {response.text}")
+            break
+
+    return sub_issues
+
+def to_anchor(s):
+    return '#' + re.sub(r'[\s/:()]+', '-', s.lower().replace('/', '').replace('+', '')).strip('-')
+
+
+def generate_traceability_matrix(requirements, output_path):
+    with open(output_path, 'w', encoding='utf-8') as file:
+        file.write("# Traceability Matrix\n\n")
+        section = ''
+        subsection = ''
+        for req in requirements:
+            if section != req['section']:
+                file.write(f"## {req['section']}\n\n")
+                section = req['section']
+            if subsection != req['subsection']:
+                file.write(f"### {req['subsection']}\n")
+                subsection = req['subsection']
+
+            if req.get('url'):
+                file.write(f"#### [{req['id']}]({req['url']}): {req['title']}\n")
+            else:
+                file.write(f"#### {req['id']}: {req['title']}\n")
+            if req.get('badge'):
+                linq = to_anchor(f"{req['id']}: {req['title']}")
+                file.write(f"[{req['badge']}](./summary.md{linq})\n\n")
+            if req['description']:
+                file.write(f"**Description**: {req['description']}\n\n")
+            if req.get('issues'):
+                file.write("Issues:\n")
+                for issue in req['issues']:
+                    file.write(f"- {issue['id']}: {issue['description']}\n")
+                file.write("\n")
+
+            file.write("| Case ID | Name | Description | Issues |  Status |\n")
+            file.write("|---------|------|-------------|--------|:--------|\n")
+            
+            for case in req['cases']:
+                issues_list = ""
+                if case.get('bage'):
+                    issues_list = case['bage']
+                if case.get('issues'):
+                    issues_list = issues_list + ','.join([f"{issue['bage']}" for issue in case['issues']]) 
+                if req.get('issues'):
+                    issues_list = issues_list + ','.join([f"{issue['bage']}" for issue in req['issues']] or req['issues'])
+                file.write(f"| {case['case_id']} | {case['name']} | {case['description']} | {issues_list} | {case['status']} |\n")
+            file.write("\n")
+            
+def generate_summary(requirements, output_path):
+    with open(output_path, 'w', encoding='utf-8') as file:
+        file.write("# Summary\n\n")
+        section = ''
+        subsection = ''
+        total = 0
+        completed = 0
+        for req in requirements:
+            if req.get('sub_issues_summary'):
+                total += req['sub_issues_summary']['total']
+                completed += req['sub_issues_summary']['completed']
+        file.write(f"**Completed tests: {completed}/{total}: {round(completed*100/total,2) if total > 0 else 0 }%**\n\n")
+        file.write(f"## {req['section']}\n\n")
+        for req in requirements:
+            if req.get('sub_issues_summary'):
+                if section != req['section']:
+                    file.write(f"## {req['section']}\n\n")
+                    section = req['section']
+                if subsection != req['subsection']:
+                    file.write(f"### {req['subsection']}\n")
+                    subsection = req['subsection']
+                
+                if req.get('url'):
+                    file.write(f"#### [{req['id']}]({req['url']}): {req['title']}\n")
+                else:
+                    file.write(f"#### {req['id']}: {req['title']}\n")
+                if req['description']:
+                    file.write(f"**Description**: {req['description']}\n\n")
+                if req.get('badge'):
+                    linq = to_anchor(f"{req['id']}: {req['title']}")
+                    file.write(f"[{req['badge']}](./traceability_matrix.md{linq})\n\n")
+                if req.get('issues'):
+                    file.write("Issues:\n")
+                    for issue in req['issues']:
+                        file.write(f"- {issue['id']}: {issue['description']}\n")
+                    file.write("\n")
+
+def collect_requirements_from_directory(directory, github_token):
+    requirements = []
+    for root, _, files in os.walk(directory):
+        for file in files:
+            if file.startswith('req') and file.endswith('.md'):
+                file_path = os.path.join(root, file)
+                requirements.extend(parse_requirements(file_path, github_token))
+    return requirements
+
+def process_and_generate_matrices(base_directory, github_token):
+    for root, subdirs, files in os.walk(base_directory):
+        # Collect requirements from the current directory and its direct subdirectories
+        requirements = collect_requirements_from_directory(root, github_token)
+
+        if requirements:
+            matrix_output_file = os.path.join(root, 'traceability_matrix.md')
+            summary_output_file = os.path.join(root, 'summary.md')
+            generate_traceability_matrix(requirements, matrix_output_file)
+            print(f"Generated traceability matrix in {matrix_output_file}")
+            generate_summary(requirements, summary_output_file)
+            print(f"Generated summary in {summary_output_file}")
+
+if __name__ == "__main__":
+    GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")  # You need to set this environment variable with your GitHub token
+    current_directory = os.path.dirname(os.path.abspath(__file__))
+    process_and_generate_matrices(current_directory, GITHUB_TOKEN)
@@ -0,0 +1,51 @@
+# Requirements for YDB Analytics System
+
+## Introduction
+This document outlines the detailed functional and non-functional requirements for the YDB analytics system, including associated test cases for verification, focusing on aggregate functions and complex analytical queries.
+
+## Non-functional Requirements
+
+### Performance
+
+- **REQ-PERF-001**: Ensure the system handles aggregate functions efficiently across various data sizes.
+  - **Description**: Verify that aggregate functions maintain performance standards at increasing data volumes, ensuring response times are within acceptable limits.
+  - **Cases**:
+    - Case 1.1: [COUNT Function Performance - 1GB](path/to/test/count_1gb) - Validate performance with a dataset of 1GB.
+    - Case 1.2: [COUNT Function Performance - 10GB](path/to/test/count_10gb) - Validate performance with a dataset of 10GB.
+    - Case 1.3: [COUNT Function Performance - 100GB](path/to/test/count_100gb) - Validate performance with a dataset of 100GB.
+    - Case 1.4: [COUNT Function Performance - 1TB](path/to/test/count_1tb) - Validate performance with a dataset of 1TB.
+    - Case 1.5: [COUNT Function Performance - 10TB](path/to/test/count_10tb) - Validate performance with a dataset of 10TB.
+
+- **REQ-PERF-002**: Ensure system can efficiently compute distinct counts at scale.
+  - **Description**: Evaluate the ability to perform COUNT(DISTINCT) operations with acceptable overhead across increasing data volumes.
+  - **Cases**:
+    - Case 2.1: [COUNT DISTINCT Performance - 1GB](path/to/test/count_distinct_1gb) - Measure distinct count efficiency at 1GB.
+    - Case 2.2: [COUNT DISTINCT Performance - 10GB](path/to/test/count_distinct_10gb) - Measure distinct count efficiency at 10GB.
+
+- **REQ-PERF-003**: Validate efficiency of SUM operations over large datasets.
+  - **Description**: Ensure SUM functions execute with optimal performance metrics at different data scales.
+  - **Cases**:
+    - Case 3.1: [SUM Function Performance - 1GB](path/to/test/sum_1gb) - Validate SUM operation efficiency with 1GB of data.
+    - Case 3.2: [SUM Function Performance - 10GB](path/to/test/sum_10gb) - Validate SUM operation efficiency with 10GB of data.
+
+- **REQ-PERF-004**: Ensure system maintains average calculation efficiency.
+  - **Description**: Verify AVG functions sustain performance as data sizes increase.
+  - **Cases**:
+    - Case 4.1: [AVG Function Performance - 1GB](path/to/test/avg_1gb) - Performance metrics for AVG operation on 1GB of data.
+
+- **REQ-PERF-005**: Efficient computation of MIN/MAX operations.
+  - **Description**: Confirm that minimum and maximum functions perform within the expected time frames across various datasets.
+  - **Cases**:
+    - Case 5.1: [MIN/MAX Performance - 1GB](path/to/test/min_max_1gb) - Validate performance of MIN/MAX operations with 1GB.
+
+- **REQ-PERF-006**: TPC-H benchmark testing on scalability.
+  - **Description**: Evaluate system performance using TPC-H benchmark tests at different dataset volumes.
+  - **Cases**:
+    - Case 6.1: [TPC-H Performance - 10GB](path/to/test/tpch_10gb) - Validate TPC-H benchmark performance with 10GB.
+
+- **REQ-PERF-007**: ClickBench benchmark to test efficiency under different conditions.
+  - **Description**: Assess system capabilities using ClickBench, targeting different data sizes.
+  - **Cases**:
+    - Case 7.1: [ClickBench Performance - 1GB](path/to/test/clickbench_1gb) - Evaluate with ClickBench on 1GB of data.
+
+These requirements provide a framework for measuring and ensuring performance across key analytic functionalities within the YDB analytics system, with specific focus on scalability and efficiency.
@@ -0,0 +1,6 @@
+# Summary
+
+**Completed tests: 0/0: 0%**
+
+## Non-functional Requirements
+