From 8b22b289063d3dcc4fcf521bb2b309080812644e Mon Sep 17 00:00:00 2001 From: Orlando Barrera II Date: Tue, 18 Feb 2025 17:49:49 -0600 Subject: [PATCH 01/22] Fixed a bug with alert in multiple files --- socketsecurity/__init__.py | 2 +- socketsecurity/core/messages.py | 64 +++++++++++++++++++-------------- 2 files changed, 39 insertions(+), 27 deletions(-) diff --git a/socketsecurity/__init__.py b/socketsecurity/__init__.py index 59b063c..872fb53 100644 --- a/socketsecurity/__init__.py +++ b/socketsecurity/__init__.py @@ -1,2 +1,2 @@ __author__ = 'socket.dev' -__version__ = '2.0.4' +__version__ = '2.0.5' diff --git a/socketsecurity/core/messages.py b/socketsecurity/core/messages.py index ca90c27..d7b2acc 100644 --- a/socketsecurity/core/messages.py +++ b/socketsecurity/core/messages.py @@ -195,10 +195,10 @@ def create_security_comment_sarif(diff) -> dict: scan_failed = False if len(diff.new_alerts) == 0: for alert in diff.new_alerts: - alert: Issue if alert.error: scan_failed = True break + sarif_data = { "$schema": "https://json.schemastore.org/sarif-2.1.0.json", "version": "2.1.0", @@ -225,21 +225,27 @@ def create_security_comment_sarif(diff) -> dict: rule_id = f"{pkg_name}=={pkg_version}" severity = alert.severity - # Generate the correct URL for the alert based on manifest type - introduced_list = alert.introduced_by - manifest_file = introduced_list[0][1] if introduced_list and isinstance(introduced_list[0], list) else alert.manifests or "requirements.txt" - socket_url = Messages.get_manifest_type_url(manifest_file, pkg_name, pkg_version) + # --- NEW LOGIC: Determine the list of manifest files --- + if alert.introduced_by and isinstance(alert.introduced_by[0], list): + # Extract file names from each introduced_by entry + manifest_files = [entry[1] for entry in alert.introduced_by] + elif alert.manifests: + # Split semicolon-delimited manifest string if necessary + manifest_files = [mf.strip() for mf in alert.manifests.split(";")] + else: + manifest_files = ["requirements.txt"] - # Prepare descriptions with
replacements - short_desc = f"{alert.props.get('note', '')}

Suggested Action:
{alert.suggestion}
{socket_url}" - full_desc = "{} - {}".format(alert.title, alert.description.replace('\r\n', '
')) + # Use the first file for generating the help URL. + socket_url = Messages.get_manifest_type_url(manifest_files[0], pkg_name, pkg_version) - # Identify the line and snippet in the manifest file - line_number, line_content = Messages.find_line_in_file(pkg_name, pkg_version, manifest_file) - if line_number < 1: - line_number = 1 # Ensure SARIF compliance + # Prepare the description messages. + short_desc = ( + f"{alert.props.get('note', '')}

Suggested Action:
" + f"{alert.suggestion}
{socket_url}" + ) + full_desc = "{} - {}".format(alert.title, alert.description.replace('\r\n', '
')) - # Create the rule if not already defined + # Create the rule if not already defined. if rule_id not in rules_map: rules_map[rule_id] = { "id": rule_id, @@ -252,25 +258,31 @@ def create_security_comment_sarif(diff) -> dict: }, } - # Add the SARIF result + # --- NEW LOGIC: Create separate locations for each manifest file --- + locations = [] + for mf in manifest_files: + line_number, line_content = Messages.find_line_in_file(pkg_name, pkg_version, mf) + if line_number < 1: + line_number = 1 # Ensure SARIF compliance. + locations.append({ + "physicalLocation": { + "artifactLocation": {"uri": mf}, + "region": { + "startLine": line_number, + "snippet": {"text": line_content}, + }, + } + }) + + # Add the SARIF result. result_obj = { "ruleId": rule_id, "message": {"text": short_desc}, - "locations": [ - { - "physicalLocation": { - "artifactLocation": {"uri": manifest_file}, - "region": { - "startLine": line_number, - "snippet": {"text": line_content}, - }, - } - } - ], + "locations": locations, } results_list.append(result_obj) - # Attach rules and results + # Attach rules and results. sarif_data["runs"][0]["tool"]["driver"]["rules"] = list(rules_map.values()) sarif_data["runs"][0]["results"] = results_list From 7cfc9340c373b73a93c21947c5c7a99444035f37 Mon Sep 17 00:00:00 2001 From: Orlando Barrera II Date: Wed, 19 Feb 2025 08:48:09 -0600 Subject: [PATCH 02/22] Improved teh sarif file parsing --- socketsecurity/core/messages.py | 176 ++++++++++++++------------------ 1 file changed, 76 insertions(+), 100 deletions(-) diff --git a/socketsecurity/core/messages.py b/socketsecurity/core/messages.py index d7b2acc..46e76af 100644 --- a/socketsecurity/core/messages.py +++ b/socketsecurity/core/messages.py @@ -15,16 +15,12 @@ class Messages: @staticmethod def map_severity_to_sarif(severity: str) -> str: """ - Map Socket severity levels to SARIF levels (GitHub code scanning). - - 'low' -> 'note' - 'medium' or 'middle' -> 'warning' - 'high' or 'critical' -> 'error' + Map Socket Security severity levels to SARIF levels. """ severity_mapping = { "low": "note", "medium": "warning", - "middle": "warning", # older data might say "middle" + "middle": "warning", "high": "error", "critical": "error", } @@ -33,82 +29,67 @@ def map_severity_to_sarif(severity: str) -> str: @staticmethod def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) -> tuple: """ - Finds the line number and snippet of code for the given package/version in a manifest file. - Returns a 2-tuple: (line_number, snippet_or_message). - - Supports: - 1) JSON-based manifest files (package-lock.json, Pipfile.lock, composer.lock) - - Locates a dictionary entry with the matching package & version - - Does a rough line-based search to find the actual line in the raw text - 2) Text-based (requirements.txt, package.json, yarn.lock, etc.) - - Uses compiled regex patterns to detect a match line by line + Given a manifest file, find the line number and snippet where the package is declared. + For JSON-based manifests (package-lock.json, Pipfile.lock, composer.lock, package.json), + we attempt to parse the JSON to verify the package is present, then search for the key. + For text-based manifests, we use a regex search. """ - # Extract just the file name to detect manifest type file_type = Path(manifest_file).name - # ---------------------------------------------------- - # 1) JSON-based manifest files - # ---------------------------------------------------- - if file_type in ["package-lock.json", "Pipfile.lock", "composer.lock"]: + # Handle JSON-based files. + if file_type in ["package-lock.json", "Pipfile.lock", "composer.lock", "package.json"]: try: - # Read entire file so we can parse JSON and also do raw line checks with open(manifest_file, "r", encoding="utf-8") as f: raw_text = f.read() - - # Attempt JSON parse - data = json.loads(raw_text) - - # In practice, you may need to check data["dependencies"], data["default"], etc. - # This is an example approach. - packages_dict = ( - data.get("packages") - or data.get("default") - or data.get("dependencies") - or {} - ) - - found_key = None - found_info = None - # Locate a dictionary entry whose 'version' matches - for key, value in packages_dict.items(): - # For NPM package-lock, keys might look like "node_modules/axios" - if key.endswith(packagename) and "version" in value: - if value["version"] == packageversion: - found_key = key - found_info = value - break - - if found_key and found_info: - # Search lines to approximate the correct line number - needle_key = f'"{found_key}":' # e.g. "node_modules/axios": - needle_version = f'"version": "{packageversion}"' - lines = raw_text.splitlines() - best_line = 1 - snippet = None - - for i, line in enumerate(lines, start=1): - if (needle_key in line) or (needle_version in line): - best_line = i - snippet = line.strip() - break # On first match, stop - - # If we found an approximate line, return it; else fallback to line 1 - if best_line > 0 and snippet: - return best_line, snippet - else: - return 1, f'"{found_key}": {found_info}' + try: + data = json.loads(raw_text) + except json.JSONDecodeError: + data = {} + + found = False + # For package.json, check dependencies and devDependencies. + if file_type == "package.json": + deps = data.get("dependencies", {}) + deps_dev = data.get("devDependencies", {}) + all_deps = {**deps, **deps_dev} + if packagename in all_deps: + actual_version = all_deps[packagename] + # Allow for versions with caret/tilde prefixes. + if actual_version == packageversion or actual_version.lstrip("^~") == packageversion: + found = True else: - return 1, f"{packagename} {packageversion} (not found in {manifest_file})" - - except (FileNotFoundError, json.JSONDecodeError): - return 1, f"Error reading {manifest_file}" + # For other JSON-based manifests, look into common keys. + for key in ["packages", "default", "dependencies"]: + if key in data: + packages_dict = data[key] + # In package-lock.json, keys can be paths (e.g. "node_modules/axios") + for key_item, info in packages_dict.items(): + if key_item.endswith(packagename): + # info may be a dict (with "version") or a simple version string. + ver = info if isinstance(info, str) else info.get("version", "") + if ver == packageversion: + found = True + break + if found: + break - # ---------------------------------------------------- - # 2) Text-based / line-based manifests - # ---------------------------------------------------- - # Define a dictionary of patterns for common manifest types + if not found: + return 1, f'"{packagename}": not found in {manifest_file}' + + # Now search the raw text to locate the declaration line. + needle = f'"{packagename}":' + lines = raw_text.splitlines() + for i, line in enumerate(lines, start=1): + if needle in line: + return i, line.strip() + return 1, f'"{packagename}": declaration not found' + except FileNotFoundError: + return 1, f"{manifest_file} not found" + except Exception as e: + return 1, f"Error reading {manifest_file}: {e}" + + # For text-based files, define regex search patterns for common manifest types. search_patterns = { - "package.json": rf'"{packagename}":\s*"{packageversion}"', "yarn.lock": rf'{packagename}@{packageversion}', "pnpm-lock.yaml": rf'"{re.escape(packagename)}"\s*:\s*\{{[^}}]*"version":\s*"{re.escape(packageversion)}"', "requirements.txt": rf'^{re.escape(packagename)}\s*(?:==|===|!=|>=|<=|~=|\s+)?\s*{re.escape(packageversion)}(?:\s*;.*)?$', @@ -132,33 +113,25 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) "conanfile.txt": rf'{re.escape(packagename)}/{re.escape(packageversion)}', "vcpkg.json": rf'"{re.escape(packagename)}":\s*"{re.escape(packageversion)}"', } - - # If no specific pattern is found for this file name, fallback to a naive approach searchstring = search_patterns.get(file_type, rf'{re.escape(packagename)}.*{re.escape(packageversion)}') - try: - # Read file lines and search for a match with open(manifest_file, 'r', encoding="utf-8") as file: lines = [line.rstrip("\n") for line in file] for line_number, line_content in enumerate(lines, start=1): - # For Python conditional dependencies, ignore everything after first ';' + # For cases where dependencies have conditionals (e.g. Python), only consider the main part. line_main = line_content.split(";", 1)[0].strip() - - # Use a case-insensitive regex search if re.search(searchstring, line_main, re.IGNORECASE): return line_number, line_content.strip() - except FileNotFoundError: return 1, f"{manifest_file} not found" except Exception as e: return 1, f"Error reading {manifest_file}: {e}" - return 1, f"{packagename} {packageversion} (not found)" @staticmethod def get_manifest_type_url(manifest_file: str, pkg_name: str, pkg_version: str) -> str: """ - Determine the correct URL path based on the manifest file type. + Determine the URL prefix based on the manifest file. """ manifest_to_url_prefix = { "package.json": "npm", @@ -181,7 +154,6 @@ def get_manifest_type_url(manifest_file: str, pkg_name: str, pkg_version: str) - "composer.json": "composer", "vcpkg.json": "vcpkg", } - file_type = Path(manifest_file).name url_prefix = manifest_to_url_prefix.get(file_type, "unknown") return f"https://socket.dev/{url_prefix}/package/{pkg_name}/alerts/{pkg_version}" @@ -189,10 +161,13 @@ def get_manifest_type_url(manifest_file: str, pkg_name: str, pkg_version: str) - @staticmethod def create_security_comment_sarif(diff) -> dict: """ - Create SARIF-compliant output from the diff report, including dynamic URL generation - based on manifest type and improved
formatting for GitHub SARIF display. + Create a SARIF-compliant JSON object for alerts. This function now: + - Accepts multiple manifest files (from alert.introduced_by or alert.manifests) + - Generates one SARIF location per manifest file. + - Supports various language-specific manifest types. """ scan_failed = False + # (Optional: handle scan failure based on alert.error flags) if len(diff.new_alerts) == 0: for alert in diff.new_alerts: if alert.error: @@ -225,27 +200,30 @@ def create_security_comment_sarif(diff) -> dict: rule_id = f"{pkg_name}=={pkg_version}" severity = alert.severity - # --- NEW LOGIC: Determine the list of manifest files --- - if alert.introduced_by and isinstance(alert.introduced_by[0], list): - # Extract file names from each introduced_by entry - manifest_files = [entry[1] for entry in alert.introduced_by] - elif alert.manifests: - # Split semicolon-delimited manifest string if necessary - manifest_files = [mf.strip() for mf in alert.manifests.split(";")] + # --- Determine manifest files from alert data --- + manifest_files = [] + if alert.introduced_by and isinstance(alert.introduced_by, list): + for entry in alert.introduced_by: + if isinstance(entry, list) and len(entry) >= 2: + manifest_files.append(entry[1]) + elif isinstance(entry, str): + manifest_files.extend([m.strip() for m in entry.split(";") if m.strip()]) + elif hasattr(alert, 'manifests') and alert.manifests: + manifest_files = [mf.strip() for mf in alert.manifests.split(";") if mf.strip()] else: manifest_files = ["requirements.txt"] - # Use the first file for generating the help URL. + # Use the first manifest for URL generation. socket_url = Messages.get_manifest_type_url(manifest_files[0], pkg_name, pkg_version) - # Prepare the description messages. + # Prepare descriptions with HTML
for GitHub display. short_desc = ( f"{alert.props.get('note', '')}

Suggested Action:
" f"{alert.suggestion}
{socket_url}" ) full_desc = "{} - {}".format(alert.title, alert.description.replace('\r\n', '
')) - # Create the rule if not already defined. + # Create or reuse the rule definition. if rule_id not in rules_map: rules_map[rule_id] = { "id": rule_id, @@ -258,12 +236,12 @@ def create_security_comment_sarif(diff) -> dict: }, } - # --- NEW LOGIC: Create separate locations for each manifest file --- + # --- Build SARIF locations for each manifest file --- locations = [] for mf in manifest_files: line_number, line_content = Messages.find_line_in_file(pkg_name, pkg_version, mf) if line_number < 1: - line_number = 1 # Ensure SARIF compliance. + line_number = 1 locations.append({ "physicalLocation": { "artifactLocation": {"uri": mf}, @@ -274,7 +252,6 @@ def create_security_comment_sarif(diff) -> dict: } }) - # Add the SARIF result. result_obj = { "ruleId": rule_id, "message": {"text": short_desc}, @@ -282,7 +259,6 @@ def create_security_comment_sarif(diff) -> dict: } results_list.append(result_obj) - # Attach rules and results. sarif_data["runs"][0]["tool"]["driver"]["rules"] = list(rules_map.values()) sarif_data["runs"][0]["results"] = results_list From bdb6de1118230806537a14a747d8fdf3fcb70c49 Mon Sep 17 00:00:00 2001 From: Orlando Barrera II Date: Wed, 19 Feb 2025 09:09:40 -0600 Subject: [PATCH 03/22] Improved teh sarif file parsing --- socketsecurity/core/messages.py | 52 +++++++++++++++++++-------------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/socketsecurity/core/messages.py b/socketsecurity/core/messages.py index 46e76af..90961b7 100644 --- a/socketsecurity/core/messages.py +++ b/socketsecurity/core/messages.py @@ -30,13 +30,16 @@ def map_severity_to_sarif(severity: str) -> str: def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) -> tuple: """ Given a manifest file, find the line number and snippet where the package is declared. - For JSON-based manifests (package-lock.json, Pipfile.lock, composer.lock, package.json), - we attempt to parse the JSON to verify the package is present, then search for the key. - For text-based manifests, we use a regex search. + For JSON-based manifests (e.g. package-lock.json, package.json, Pipfile.lock, composer.lock), + we first verify the package exists (via JSON parsing) and then scan the raw text using one + or more needle patterns. + For text-based manifests, we use regex search. """ file_type = Path(manifest_file).name - # Handle JSON-based files. + # -------------------- + # 1) JSON-based manifests + # -------------------- if file_type in ["package-lock.json", "Pipfile.lock", "composer.lock", "package.json"]: try: with open(manifest_file, "r", encoding="utf-8") as f: @@ -47,27 +50,26 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) data = {} found = False - # For package.json, check dependencies and devDependencies. + # For package.json, check both dependencies and devDependencies. if file_type == "package.json": deps = data.get("dependencies", {}) deps_dev = data.get("devDependencies", {}) all_deps = {**deps, **deps_dev} if packagename in all_deps: - actual_version = all_deps[packagename] # Allow for versions with caret/tilde prefixes. + actual_version = all_deps[packagename] if actual_version == packageversion or actual_version.lstrip("^~") == packageversion: found = True else: - # For other JSON-based manifests, look into common keys. + # For package-lock.json and similar, look into common keys. for key in ["packages", "default", "dependencies"]: if key in data: packages_dict = data[key] - # In package-lock.json, keys can be paths (e.g. "node_modules/axios") + # Keys in package-lock.json can be "node_modules/" for key_item, info in packages_dict.items(): if key_item.endswith(packagename): - # info may be a dict (with "version") or a simple version string. ver = info if isinstance(info, str) else info.get("version", "") - if ver == packageversion: + if ver == packageversion or ver.lstrip("^~") == packageversion: found = True break if found: @@ -76,19 +78,31 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) if not found: return 1, f'"{packagename}": not found in {manifest_file}' - # Now search the raw text to locate the declaration line. - needle = f'"{packagename}":' + # Build one or more needle patterns. For package-lock.json, try both patterns. + needles = [] + if file_type == "package-lock.json": + # Try with "node_modules/..." first, then without. + needles.append(f'"node_modules/{packagename}"') + needles.append(f'"{packagename}"') + else: + needles.append(f'"{packagename}"') + + # Scan through the file's lines to locate a matching needle. lines = raw_text.splitlines() for i, line in enumerate(lines, start=1): - if needle in line: - return i, line.strip() + for needle in needles: + if needle in line: + return i, line.strip() return 1, f'"{packagename}": declaration not found' except FileNotFoundError: return 1, f"{manifest_file} not found" except Exception as e: return 1, f"Error reading {manifest_file}: {e}" - # For text-based files, define regex search patterns for common manifest types. + # -------------------- + # 2) Text-based / line-based manifests + # -------------------- + # Define regex patterns for common text-based manifest types. search_patterns = { "yarn.lock": rf'{packagename}@{packageversion}', "pnpm-lock.yaml": rf'"{re.escape(packagename)}"\s*:\s*\{{[^}}]*"version":\s*"{re.escape(packageversion)}"', @@ -118,7 +132,6 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) with open(manifest_file, 'r', encoding="utf-8") as file: lines = [line.rstrip("\n") for line in file] for line_number, line_content in enumerate(lines, start=1): - # For cases where dependencies have conditionals (e.g. Python), only consider the main part. line_main = line_content.split(";", 1)[0].strip() if re.search(searchstring, line_main, re.IGNORECASE): return line_number, line_content.strip() @@ -166,12 +179,10 @@ def create_security_comment_sarif(diff) -> dict: - Generates one SARIF location per manifest file. - Supports various language-specific manifest types. """ - scan_failed = False # (Optional: handle scan failure based on alert.error flags) if len(diff.new_alerts) == 0: for alert in diff.new_alerts: if alert.error: - scan_failed = True break sarif_data = { @@ -216,14 +227,12 @@ def create_security_comment_sarif(diff) -> dict: # Use the first manifest for URL generation. socket_url = Messages.get_manifest_type_url(manifest_files[0], pkg_name, pkg_version) - # Prepare descriptions with HTML
for GitHub display. short_desc = ( f"{alert.props.get('note', '')}

Suggested Action:
" f"{alert.suggestion}
{socket_url}" ) full_desc = "{} - {}".format(alert.title, alert.description.replace('\r\n', '
')) - # Create or reuse the rule definition. if rule_id not in rules_map: rules_map[rule_id] = { "id": rule_id, @@ -236,7 +245,6 @@ def create_security_comment_sarif(diff) -> dict: }, } - # --- Build SARIF locations for each manifest file --- locations = [] for mf in manifest_files: line_number, line_content = Messages.find_line_in_file(pkg_name, pkg_version, mf) @@ -263,7 +271,7 @@ def create_security_comment_sarif(diff) -> dict: sarif_data["runs"][0]["results"] = results_list return sarif_data - + @staticmethod def create_security_comment_json(diff: Diff) -> dict: scan_failed = False From 1683f7ef0dd0813dc555b0e8ca33968ed093014b Mon Sep 17 00:00:00 2001 From: Orlando Barrera II Date: Wed, 19 Feb 2025 10:05:20 -0600 Subject: [PATCH 04/22] Improved the sarif file parsing --- socketsecurity/core/messages.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/socketsecurity/core/messages.py b/socketsecurity/core/messages.py index 90961b7..043270d 100644 --- a/socketsecurity/core/messages.py +++ b/socketsecurity/core/messages.py @@ -175,9 +175,9 @@ def get_manifest_type_url(manifest_file: str, pkg_name: str, pkg_version: str) - def create_security_comment_sarif(diff) -> dict: """ Create a SARIF-compliant JSON object for alerts. This function now: - - Accepts multiple manifest files (from alert.introduced_by or alert.manifests) - - Generates one SARIF location per manifest file. - - Supports various language-specific manifest types. + - Accepts multiple manifest files (from alert.introduced_by or alert.manifests) + - Generates one SARIF location per manifest file. + - Supports various language-specific manifest types. """ # (Optional: handle scan failure based on alert.error flags) if len(diff.new_alerts) == 0: @@ -221,7 +221,9 @@ def create_security_comment_sarif(diff) -> dict: manifest_files.extend([m.strip() for m in entry.split(";") if m.strip()]) elif hasattr(alert, 'manifests') and alert.manifests: manifest_files = [mf.strip() for mf in alert.manifests.split(";") if mf.strip()] - else: + + # Fallback if no manifest file was determined. + if not manifest_files: manifest_files = ["requirements.txt"] # Use the first manifest for URL generation. From 09149711c4b7439377ac18c844c4356f0dc3d330 Mon Sep 17 00:00:00 2001 From: Orlando Barrera II Date: Wed, 19 Feb 2025 10:16:32 -0600 Subject: [PATCH 05/22] Improved the sarif file parsing --- socketsecurity/core/messages.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/socketsecurity/core/messages.py b/socketsecurity/core/messages.py index 043270d..50b83f1 100644 --- a/socketsecurity/core/messages.py +++ b/socketsecurity/core/messages.py @@ -174,10 +174,13 @@ def get_manifest_type_url(manifest_file: str, pkg_name: str, pkg_version: str) - @staticmethod def create_security_comment_sarif(diff) -> dict: """ - Create a SARIF-compliant JSON object for alerts. This function now: - - Accepts multiple manifest files (from alert.introduced_by or alert.manifests) + Create SARIF-compliant output from the diff report, including dynamic URL generation + based on manifest type and improved
formatting for GitHub SARIF display. + + This function now: + - Accepts multiple manifest files from alert.introduced_by or alert.manifests. - Generates one SARIF location per manifest file. - - Supports various language-specific manifest types. + - Falls back to a default ("requirements.txt") if none is found. """ # (Optional: handle scan failure based on alert.error flags) if len(diff.new_alerts) == 0: @@ -216,7 +219,7 @@ def create_security_comment_sarif(diff) -> dict: if alert.introduced_by and isinstance(alert.introduced_by, list): for entry in alert.introduced_by: if isinstance(entry, list) and len(entry) >= 2: - manifest_files.append(entry[1]) + manifest_files.append(entry[1].strip()) elif isinstance(entry, str): manifest_files.extend([m.strip() for m in entry.split(";") if m.strip()]) elif hasattr(alert, 'manifests') and alert.manifests: @@ -229,12 +232,14 @@ def create_security_comment_sarif(diff) -> dict: # Use the first manifest for URL generation. socket_url = Messages.get_manifest_type_url(manifest_files[0], pkg_name, pkg_version) + # Prepare the short and full descriptions. short_desc = ( f"{alert.props.get('note', '')}

Suggested Action:
" f"{alert.suggestion}
{socket_url}" ) full_desc = "{} - {}".format(alert.title, alert.description.replace('\r\n', '
')) + # Create the rule definition if it hasn't been defined yet. if rule_id not in rules_map: rules_map[rule_id] = { "id": rule_id, @@ -247,11 +252,12 @@ def create_security_comment_sarif(diff) -> dict: }, } + # Create a SARIF location for each manifest file. locations = [] for mf in manifest_files: line_number, line_content = Messages.find_line_in_file(pkg_name, pkg_version, mf) if line_number < 1: - line_number = 1 + line_number = 1 # Ensure SARIF compliance. locations.append({ "physicalLocation": { "artifactLocation": {"uri": mf}, @@ -262,6 +268,7 @@ def create_security_comment_sarif(diff) -> dict: } }) + # Create the SARIF result for this alert. result_obj = { "ruleId": rule_id, "message": {"text": short_desc}, @@ -269,6 +276,7 @@ def create_security_comment_sarif(diff) -> dict: } results_list.append(result_obj) + # Attach the collected rules and results. sarif_data["runs"][0]["tool"]["driver"]["rules"] = list(rules_map.values()) sarif_data["runs"][0]["results"] = results_list From f664b4730875fcf77bf2381a9d2623fbb3705202 Mon Sep 17 00:00:00 2001 From: Orlando Barrera II Date: Wed, 19 Feb 2025 10:58:39 -0600 Subject: [PATCH 06/22] Improved the sarif file parsing --- socketsecurity/core/messages.py | 161 +++++++++++++++++--------------- 1 file changed, 87 insertions(+), 74 deletions(-) diff --git a/socketsecurity/core/messages.py b/socketsecurity/core/messages.py index 50b83f1..f035556 100644 --- a/socketsecurity/core/messages.py +++ b/socketsecurity/core/messages.py @@ -11,16 +11,20 @@ class Messages: - + @staticmethod def map_severity_to_sarif(severity: str) -> str: """ - Map Socket Security severity levels to SARIF levels. + Map Socket severity levels to SARIF levels (GitHub code scanning). + + 'low' -> 'note' + 'medium' or 'middle' -> 'warning' + 'high' or 'critical' -> 'error' """ severity_mapping = { "low": "note", "medium": "warning", - "middle": "warning", + "middle": "warning", # older data might say "middle" "high": "error", "critical": "error", } @@ -29,81 +33,82 @@ def map_severity_to_sarif(severity: str) -> str: @staticmethod def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) -> tuple: """ - Given a manifest file, find the line number and snippet where the package is declared. - For JSON-based manifests (e.g. package-lock.json, package.json, Pipfile.lock, composer.lock), - we first verify the package exists (via JSON parsing) and then scan the raw text using one - or more needle patterns. - For text-based manifests, we use regex search. + Finds the line number and snippet of code for the given package/version in a manifest file. + Returns a 2-tuple: (line_number, snippet_or_message). + + Supports: + 1) JSON-based manifest files (package-lock.json, Pipfile.lock, composer.lock) + - Locates a dictionary entry with the matching package & version + - Does a rough line-based search to find the actual line in the raw text + 2) Text-based (requirements.txt, package.json, yarn.lock, etc.) + - Uses compiled regex patterns to detect a match line by line """ + # Extract just the file name to detect manifest type file_type = Path(manifest_file).name - # -------------------- - # 1) JSON-based manifests - # -------------------- - if file_type in ["package-lock.json", "Pipfile.lock", "composer.lock", "package.json"]: + # ---------------------------------------------------- + # 1) JSON-based manifest files + # ---------------------------------------------------- + if file_type in ["package-lock.json", "Pipfile.lock", "composer.lock"]: try: + # Read entire file so we can parse JSON and also do raw line checks with open(manifest_file, "r", encoding="utf-8") as f: raw_text = f.read() - try: - data = json.loads(raw_text) - except json.JSONDecodeError: - data = {} - - found = False - # For package.json, check both dependencies and devDependencies. - if file_type == "package.json": - deps = data.get("dependencies", {}) - deps_dev = data.get("devDependencies", {}) - all_deps = {**deps, **deps_dev} - if packagename in all_deps: - # Allow for versions with caret/tilde prefixes. - actual_version = all_deps[packagename] - if actual_version == packageversion or actual_version.lstrip("^~") == packageversion: - found = True - else: - # For package-lock.json and similar, look into common keys. - for key in ["packages", "default", "dependencies"]: - if key in data: - packages_dict = data[key] - # Keys in package-lock.json can be "node_modules/" - for key_item, info in packages_dict.items(): - if key_item.endswith(packagename): - ver = info if isinstance(info, str) else info.get("version", "") - if ver == packageversion or ver.lstrip("^~") == packageversion: - found = True - break - if found: - break - if not found: - return 1, f'"{packagename}": not found in {manifest_file}' + # Attempt JSON parse + data = json.loads(raw_text) + + # In practice, you may need to check data["dependencies"], data["default"], etc. + # This is an example approach. + packages_dict = ( + data.get("packages") + or data.get("default") + or data.get("dependencies") + or {} + ) + + found_key = None + found_info = None + # Locate a dictionary entry whose 'version' matches + for key, value in packages_dict.items(): + # For NPM package-lock, keys might look like "node_modules/axios" + if key.endswith(packagename) and "version" in value: + if value["version"] == packageversion: + found_key = key + found_info = value + break - # Build one or more needle patterns. For package-lock.json, try both patterns. - needles = [] - if file_type == "package-lock.json": - # Try with "node_modules/..." first, then without. - needles.append(f'"node_modules/{packagename}"') - needles.append(f'"{packagename}"') + if found_key and found_info: + # Search lines to approximate the correct line number + needle_key = f'"{found_key}":' # e.g. "node_modules/axios": + needle_version = f'"version": "{packageversion}"' + lines = raw_text.splitlines() + best_line = 1 + snippet = None + + for i, line in enumerate(lines, start=1): + if (needle_key in line) or (needle_version in line): + best_line = i + snippet = line.strip() + break # On first match, stop + + # If we found an approximate line, return it; else fallback to line 1 + if best_line > 0 and snippet: + return best_line, snippet + else: + return 1, f'"{found_key}": {found_info}' else: - needles.append(f'"{packagename}"') - - # Scan through the file's lines to locate a matching needle. - lines = raw_text.splitlines() - for i, line in enumerate(lines, start=1): - for needle in needles: - if needle in line: - return i, line.strip() - return 1, f'"{packagename}": declaration not found' - except FileNotFoundError: - return 1, f"{manifest_file} not found" - except Exception as e: - return 1, f"Error reading {manifest_file}: {e}" - - # -------------------- + return 1, f"{packagename} {packageversion} (not found in {manifest_file})" + + except (FileNotFoundError, json.JSONDecodeError): + return 1, f"Error reading {manifest_file}" + + # ---------------------------------------------------- # 2) Text-based / line-based manifests - # -------------------- - # Define regex patterns for common text-based manifest types. + # ---------------------------------------------------- + # Define a dictionary of patterns for common manifest types search_patterns = { + "package.json": rf'"{packagename}":\s*"{packageversion}"', "yarn.lock": rf'{packagename}@{packageversion}', "pnpm-lock.yaml": rf'"{re.escape(packagename)}"\s*:\s*\{{[^}}]*"version":\s*"{re.escape(packageversion)}"', "requirements.txt": rf'^{re.escape(packagename)}\s*(?:==|===|!=|>=|<=|~=|\s+)?\s*{re.escape(packageversion)}(?:\s*;.*)?$', @@ -127,24 +132,30 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) "conanfile.txt": rf'{re.escape(packagename)}/{re.escape(packageversion)}', "vcpkg.json": rf'"{re.escape(packagename)}":\s*"{re.escape(packageversion)}"', } + + # If no specific pattern is found for this file name, fallback to a naive approach searchstring = search_patterns.get(file_type, rf'{re.escape(packagename)}.*{re.escape(packageversion)}') try: + # Read file lines and search for a match with open(manifest_file, 'r', encoding="utf-8") as file: lines = [line.rstrip("\n") for line in file] for line_number, line_content in enumerate(lines, start=1): + # For Python conditional dependencies, ignore everything after first ';' line_main = line_content.split(";", 1)[0].strip() + # Use a case-insensitive regex search if re.search(searchstring, line_main, re.IGNORECASE): return line_number, line_content.strip() except FileNotFoundError: return 1, f"{manifest_file} not found" except Exception as e: return 1, f"Error reading {manifest_file}: {e}" + return 1, f"{packagename} {packageversion} (not found)" @staticmethod def get_manifest_type_url(manifest_file: str, pkg_name: str, pkg_version: str) -> str: """ - Determine the URL prefix based on the manifest file. + Determine the correct URL path based on the manifest file type. """ manifest_to_url_prefix = { "package.json": "npm", @@ -167,6 +178,7 @@ def get_manifest_type_url(manifest_file: str, pkg_name: str, pkg_version: str) - "composer.json": "composer", "vcpkg.json": "vcpkg", } + file_type = Path(manifest_file).name url_prefix = manifest_to_url_prefix.get(file_type, "unknown") return f"https://socket.dev/{url_prefix}/package/{pkg_name}/alerts/{pkg_version}" @@ -176,7 +188,7 @@ def create_security_comment_sarif(diff) -> dict: """ Create SARIF-compliant output from the diff report, including dynamic URL generation based on manifest type and improved
formatting for GitHub SARIF display. - + This function now: - Accepts multiple manifest files from alert.introduced_by or alert.manifests. - Generates one SARIF location per manifest file. @@ -215,6 +227,7 @@ def create_security_comment_sarif(diff) -> dict: severity = alert.severity # --- Determine manifest files from alert data --- + # Instead of using a single manifest file, split the values. manifest_files = [] if alert.introduced_by and isinstance(alert.introduced_by, list): for entry in alert.introduced_by: @@ -232,10 +245,10 @@ def create_security_comment_sarif(diff) -> dict: # Use the first manifest for URL generation. socket_url = Messages.get_manifest_type_url(manifest_files[0], pkg_name, pkg_version) - # Prepare the short and full descriptions. + # Prepare descriptions with
replacements. short_desc = ( - f"{alert.props.get('note', '')}

Suggested Action:
" - f"{alert.suggestion}
{socket_url}" + f"{alert.props.get('note', '')}

Suggested Action:
{alert.suggestion}" + f"
{socket_url}" ) full_desc = "{} - {}".format(alert.title, alert.description.replace('\r\n', '
')) @@ -268,7 +281,7 @@ def create_security_comment_sarif(diff) -> dict: } }) - # Create the SARIF result for this alert. + # Create the SARIF result for this alert with multiple locations. result_obj = { "ruleId": rule_id, "message": {"text": short_desc}, @@ -276,7 +289,7 @@ def create_security_comment_sarif(diff) -> dict: } results_list.append(result_obj) - # Attach the collected rules and results. + # Attach rules and results. sarif_data["runs"][0]["tool"]["driver"]["rules"] = list(rules_map.values()) sarif_data["runs"][0]["results"] = results_list From 3f9ad6f5eca6d3faf9bfd6697b8b700c708313ea Mon Sep 17 00:00:00 2001 From: Orlando Barrera II Date: Wed, 19 Feb 2025 11:20:36 -0600 Subject: [PATCH 07/22] Testing the sarif file parsing --- socketsecurity/core/messages.py | 63 ++++++++++++++------------------- 1 file changed, 27 insertions(+), 36 deletions(-) diff --git a/socketsecurity/core/messages.py b/socketsecurity/core/messages.py index f035556..1097a49 100644 --- a/socketsecurity/core/messages.py +++ b/socketsecurity/core/messages.py @@ -2,6 +2,8 @@ import os import re import json +import logging +logging.basicConfig(level=logging.DEBUG) from pathlib import Path from mdutils import MdUtils @@ -11,7 +13,7 @@ class Messages: - + @staticmethod def map_severity_to_sarif(severity: str) -> str: """ @@ -186,15 +188,8 @@ def get_manifest_type_url(manifest_file: str, pkg_name: str, pkg_version: str) - @staticmethod def create_security_comment_sarif(diff) -> dict: """ - Create SARIF-compliant output from the diff report, including dynamic URL generation - based on manifest type and improved
formatting for GitHub SARIF display. - - This function now: - - Accepts multiple manifest files from alert.introduced_by or alert.manifests. - - Generates one SARIF location per manifest file. - - Falls back to a default ("requirements.txt") if none is found. + Create SARIF-compliant output from the diff report. """ - # (Optional: handle scan failure based on alert.error flags) if len(diff.new_alerts) == 0: for alert in diff.new_alerts: if alert.error: @@ -203,18 +198,16 @@ def create_security_comment_sarif(diff) -> dict: sarif_data = { "$schema": "https://json.schemastore.org/sarif-2.1.0.json", "version": "2.1.0", - "runs": [ - { - "tool": { - "driver": { - "name": "Socket Security", - "informationUri": "https://socket.dev", - "rules": [] - } - }, - "results": [] - } - ] + "runs": [{ + "tool": { + "driver": { + "name": "Socket Security", + "informationUri": "https://socket.dev", + "rules": [] + } + }, + "results": [] + }] } rules_map = {} @@ -226,8 +219,7 @@ def create_security_comment_sarif(diff) -> dict: rule_id = f"{pkg_name}=={pkg_version}" severity = alert.severity - # --- Determine manifest files from alert data --- - # Instead of using a single manifest file, split the values. + # --- Extract manifest files --- manifest_files = [] if alert.introduced_by and isinstance(alert.introduced_by, list): for entry in alert.introduced_by: @@ -238,21 +230,21 @@ def create_security_comment_sarif(diff) -> dict: elif hasattr(alert, 'manifests') and alert.manifests: manifest_files = [mf.strip() for mf in alert.manifests.split(";") if mf.strip()] - # Fallback if no manifest file was determined. + # Log the extracted manifest files + logging.debug("Alert %s manifest_files before fallback: %s", rule_id, manifest_files) + if not manifest_files: manifest_files = ["requirements.txt"] + logging.debug("Alert %s: Falling back to manifest_files: %s", rule_id, manifest_files) - # Use the first manifest for URL generation. - socket_url = Messages.get_manifest_type_url(manifest_files[0], pkg_name, pkg_version) + # Log the manifest file used for URL generation + logging.debug("Alert %s: Using manifest_file for URL: %s", rule_id, manifest_files[0]) - # Prepare descriptions with
replacements. - short_desc = ( - f"{alert.props.get('note', '')}

Suggested Action:
{alert.suggestion}" - f"
{socket_url}" - ) + socket_url = Messages.get_manifest_type_url(manifest_files[0], pkg_name, pkg_version) + short_desc = (f"{alert.props.get('note', '')}

Suggested Action:
{alert.suggestion}" + f"
{socket_url}") full_desc = "{} - {}".format(alert.title, alert.description.replace('\r\n', '
')) - # Create the rule definition if it hasn't been defined yet. if rule_id not in rules_map: rules_map[rule_id] = { "id": rule_id, @@ -265,12 +257,13 @@ def create_security_comment_sarif(diff) -> dict: }, } - # Create a SARIF location for each manifest file. + # Create a SARIF location for each manifest file and log each result. locations = [] for mf in manifest_files: line_number, line_content = Messages.find_line_in_file(pkg_name, pkg_version, mf) if line_number < 1: - line_number = 1 # Ensure SARIF compliance. + line_number = 1 + logging.debug("Alert %s: Manifest %s, line %s: %s", rule_id, mf, line_number, line_content) locations.append({ "physicalLocation": { "artifactLocation": {"uri": mf}, @@ -281,7 +274,6 @@ def create_security_comment_sarif(diff) -> dict: } }) - # Create the SARIF result for this alert with multiple locations. result_obj = { "ruleId": rule_id, "message": {"text": short_desc}, @@ -289,7 +281,6 @@ def create_security_comment_sarif(diff) -> dict: } results_list.append(result_obj) - # Attach rules and results. sarif_data["runs"][0]["tool"]["driver"]["rules"] = list(rules_map.values()) sarif_data["runs"][0]["results"] = results_list From 7d8130665c81dcf9030844ef4e7c6ce518ee6404 Mon Sep 17 00:00:00 2001 From: Orlando Barrera II Date: Wed, 19 Feb 2025 11:35:38 -0600 Subject: [PATCH 08/22] Testing the sarif file parsing --- socketsecurity/core/messages.py | 79 ++++++++++++--------------------- 1 file changed, 29 insertions(+), 50 deletions(-) diff --git a/socketsecurity/core/messages.py b/socketsecurity/core/messages.py index 1097a49..16121c2 100644 --- a/socketsecurity/core/messages.py +++ b/socketsecurity/core/messages.py @@ -37,43 +37,34 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) """ Finds the line number and snippet of code for the given package/version in a manifest file. Returns a 2-tuple: (line_number, snippet_or_message). - - Supports: - 1) JSON-based manifest files (package-lock.json, Pipfile.lock, composer.lock) - - Locates a dictionary entry with the matching package & version - - Does a rough line-based search to find the actual line in the raw text - 2) Text-based (requirements.txt, package.json, yarn.lock, etc.) - - Uses compiled regex patterns to detect a match line by line """ - # Extract just the file name to detect manifest type file_type = Path(manifest_file).name - + logging.debug("Processing manifest file: %s", manifest_file) + # ---------------------------------------------------- # 1) JSON-based manifest files # ---------------------------------------------------- if file_type in ["package-lock.json", "Pipfile.lock", "composer.lock"]: try: - # Read entire file so we can parse JSON and also do raw line checks with open(manifest_file, "r", encoding="utf-8") as f: raw_text = f.read() + logging.debug("Raw text length: %d", len(raw_text)) + try: + data = json.loads(raw_text) + except json.JSONDecodeError: + data = {} + logging.debug("JSON decode failed for %s", manifest_file) - # Attempt JSON parse - data = json.loads(raw_text) - - # In practice, you may need to check data["dependencies"], data["default"], etc. - # This is an example approach. packages_dict = ( data.get("packages") or data.get("default") or data.get("dependencies") or {} ) - + logging.debug("Packages dict keys: %s", list(packages_dict.keys())) found_key = None found_info = None - # Locate a dictionary entry whose 'version' matches for key, value in packages_dict.items(): - # For NPM package-lock, keys might look like "node_modules/axios" if key.endswith(packagename) and "version" in value: if value["version"] == packageversion: found_key = key @@ -81,34 +72,23 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) break if found_key and found_info: - # Search lines to approximate the correct line number - needle_key = f'"{found_key}":' # e.g. "node_modules/axios": + needle_key = f'"{found_key}":' needle_version = f'"version": "{packageversion}"' lines = raw_text.splitlines() - best_line = 1 - snippet = None - + logging.debug("Total lines: %d", len(lines)) for i, line in enumerate(lines, start=1): if (needle_key in line) or (needle_version in line): - best_line = i - snippet = line.strip() - break # On first match, stop - - # If we found an approximate line, return it; else fallback to line 1 - if best_line > 0 and snippet: - return best_line, snippet - else: - return 1, f'"{found_key}": {found_info}' + logging.debug("Found match at line %d: %s", i, line.strip()) + return i, line.strip() + return 1, f'"{found_key}": {found_info}' else: return 1, f"{packagename} {packageversion} (not found in {manifest_file})" - except (FileNotFoundError, json.JSONDecodeError): return 1, f"Error reading {manifest_file}" - + # ---------------------------------------------------- # 2) Text-based / line-based manifests # ---------------------------------------------------- - # Define a dictionary of patterns for common manifest types search_patterns = { "package.json": rf'"{packagename}":\s*"{packageversion}"', "yarn.lock": rf'{packagename}@{packageversion}', @@ -134,18 +114,16 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) "conanfile.txt": rf'{re.escape(packagename)}/{re.escape(packageversion)}', "vcpkg.json": rf'"{re.escape(packagename)}":\s*"{re.escape(packageversion)}"', } - - # If no specific pattern is found for this file name, fallback to a naive approach searchstring = search_patterns.get(file_type, rf'{re.escape(packagename)}.*{re.escape(packageversion)}') + logging.debug("Using search pattern for %s: %s", file_type, searchstring) try: - # Read file lines and search for a match with open(manifest_file, 'r', encoding="utf-8") as file: lines = [line.rstrip("\n") for line in file] + logging.debug("Total lines in %s: %d", manifest_file, len(lines)) for line_number, line_content in enumerate(lines, start=1): - # For Python conditional dependencies, ignore everything after first ';' line_main = line_content.split(";", 1)[0].strip() - # Use a case-insensitive regex search if re.search(searchstring, line_main, re.IGNORECASE): + logging.debug("Match found in %s at line %d: %s", manifest_file, line_number, line_content.strip()) return line_number, line_content.strip() except FileNotFoundError: return 1, f"{manifest_file} not found" @@ -180,7 +158,6 @@ def get_manifest_type_url(manifest_file: str, pkg_name: str, pkg_version: str) - "composer.json": "composer", "vcpkg.json": "vcpkg", } - file_type = Path(manifest_file).name url_prefix = manifest_to_url_prefix.get(file_type, "unknown") return f"https://socket.dev/{url_prefix}/package/{pkg_name}/alerts/{pkg_version}" @@ -188,7 +165,13 @@ def get_manifest_type_url(manifest_file: str, pkg_name: str, pkg_version: str) - @staticmethod def create_security_comment_sarif(diff) -> dict: """ - Create SARIF-compliant output from the diff report. + Create SARIF-compliant output from the diff report, including dynamic URL generation + based on manifest type and improved
formatting for GitHub SARIF display. + + This function now: + - Accepts multiple manifest files from alert.introduced_by or alert.manifests. + - Generates one SARIF location per manifest file. + - Falls back to a default ("requirements.txt") if none is found. """ if len(diff.new_alerts) == 0: for alert in diff.new_alerts: @@ -219,7 +202,7 @@ def create_security_comment_sarif(diff) -> dict: rule_id = f"{pkg_name}=={pkg_version}" severity = alert.severity - # --- Extract manifest files --- + # --- Extract manifest files from alert data --- manifest_files = [] if alert.introduced_by and isinstance(alert.introduced_by, list): for entry in alert.introduced_by: @@ -230,16 +213,12 @@ def create_security_comment_sarif(diff) -> dict: elif hasattr(alert, 'manifests') and alert.manifests: manifest_files = [mf.strip() for mf in alert.manifests.split(";") if mf.strip()] - # Log the extracted manifest files logging.debug("Alert %s manifest_files before fallback: %s", rule_id, manifest_files) - if not manifest_files: manifest_files = ["requirements.txt"] logging.debug("Alert %s: Falling back to manifest_files: %s", rule_id, manifest_files) - # Log the manifest file used for URL generation logging.debug("Alert %s: Using manifest_file for URL: %s", rule_id, manifest_files[0]) - socket_url = Messages.get_manifest_type_url(manifest_files[0], pkg_name, pkg_version) short_desc = (f"{alert.props.get('note', '')}

Suggested Action:
{alert.suggestion}" f"
{socket_url}") @@ -257,13 +236,13 @@ def create_security_comment_sarif(diff) -> dict: }, } - # Create a SARIF location for each manifest file and log each result. + # Create a SARIF location for each manifest file. locations = [] for mf in manifest_files: line_number, line_content = Messages.find_line_in_file(pkg_name, pkg_version, mf) if line_number < 1: line_number = 1 - logging.debug("Alert %s: Manifest %s, line %s: %s", rule_id, mf, line_number, line_content) + logging.debug("Alert %s: Manifest %s, line %d: %s", rule_id, mf, line_number, line_content) locations.append({ "physicalLocation": { "artifactLocation": {"uri": mf}, @@ -285,7 +264,7 @@ def create_security_comment_sarif(diff) -> dict: sarif_data["runs"][0]["results"] = results_list return sarif_data - + @staticmethod def create_security_comment_json(diff: Diff) -> dict: scan_failed = False From 0df07529e6100963d410c2de0477f7f11794f668 Mon Sep 17 00:00:00 2001 From: Orlando Barrera II Date: Wed, 19 Feb 2025 11:49:47 -0600 Subject: [PATCH 09/22] Testing the sarif file parsing --- socketsecurity/core/messages.py | 49 ++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/socketsecurity/core/messages.py b/socketsecurity/core/messages.py index 16121c2..6580fa7 100644 --- a/socketsecurity/core/messages.py +++ b/socketsecurity/core/messages.py @@ -18,7 +18,7 @@ class Messages: def map_severity_to_sarif(severity: str) -> str: """ Map Socket severity levels to SARIF levels (GitHub code scanning). - + 'low' -> 'note' 'medium' or 'middle' -> 'warning' 'high' or 'critical' -> 'error' @@ -37,10 +37,18 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) """ Finds the line number and snippet of code for the given package/version in a manifest file. Returns a 2-tuple: (line_number, snippet_or_message). + + Supports: + 1) JSON-based manifest files (package-lock.json, Pipfile.lock, composer.lock) + - Locates a dictionary entry with the matching package & version + - Does a rough line-based search to find the actual line in the raw text + 2) Text-based (requirements.txt, package.json, yarn.lock, etc.) + - Uses compiled regex patterns to detect a match line by line """ + # Extract just the file name to detect manifest type file_type = Path(manifest_file).name - logging.debug("Processing manifest file: %s", manifest_file) - + logging.debug("Processing file: %s", manifest_file) + # ---------------------------------------------------- # 1) JSON-based manifest files # ---------------------------------------------------- @@ -48,20 +56,16 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) try: with open(manifest_file, "r", encoding="utf-8") as f: raw_text = f.read() - logging.debug("Raw text length: %d", len(raw_text)) - try: - data = json.loads(raw_text) - except json.JSONDecodeError: - data = {} - logging.debug("JSON decode failed for %s", manifest_file) - + logging.debug("Read %d characters from %s", len(raw_text), manifest_file) + data = json.loads(raw_text) packages_dict = ( data.get("packages") or data.get("default") or data.get("dependencies") or {} ) - logging.debug("Packages dict keys: %s", list(packages_dict.keys())) + logging.debug("Found package keys: %s", list(packages_dict.keys())) + found_key = None found_info = None for key, value in packages_dict.items(): @@ -75,17 +79,18 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) needle_key = f'"{found_key}":' needle_version = f'"version": "{packageversion}"' lines = raw_text.splitlines() - logging.debug("Total lines: %d", len(lines)) + logging.debug("Total lines in %s: %d", manifest_file, len(lines)) for i, line in enumerate(lines, start=1): if (needle_key in line) or (needle_version in line): - logging.debug("Found match at line %d: %s", i, line.strip()) + logging.debug("Match found at line %d in %s: %s", i, manifest_file, line.strip()) return i, line.strip() return 1, f'"{found_key}": {found_info}' else: return 1, f"{packagename} {packageversion} (not found in {manifest_file})" - except (FileNotFoundError, json.JSONDecodeError): + except (FileNotFoundError, json.JSONDecodeError) as e: + logging.error("Error reading JSON from %s: %s", manifest_file, e) return 1, f"Error reading {manifest_file}" - + # ---------------------------------------------------- # 2) Text-based / line-based manifests # ---------------------------------------------------- @@ -123,7 +128,7 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) for line_number, line_content in enumerate(lines, start=1): line_main = line_content.split(";", 1)[0].strip() if re.search(searchstring, line_main, re.IGNORECASE): - logging.debug("Match found in %s at line %d: %s", manifest_file, line_number, line_content.strip()) + logging.debug("Match found at line %d in %s: %s", line_number, manifest_file, line_content.strip()) return line_number, line_content.strip() except FileNotFoundError: return 1, f"{manifest_file} not found" @@ -167,7 +172,7 @@ def create_security_comment_sarif(diff) -> dict: """ Create SARIF-compliant output from the diff report, including dynamic URL generation based on manifest type and improved
formatting for GitHub SARIF display. - + This function now: - Accepts multiple manifest files from alert.introduced_by or alert.manifests. - Generates one SARIF location per manifest file. @@ -216,12 +221,12 @@ def create_security_comment_sarif(diff) -> dict: logging.debug("Alert %s manifest_files before fallback: %s", rule_id, manifest_files) if not manifest_files: manifest_files = ["requirements.txt"] - logging.debug("Alert %s: Falling back to manifest_files: %s", rule_id, manifest_files) + logging.debug("Alert %s falling back to: %s", rule_id, manifest_files) - logging.debug("Alert %s: Using manifest_file for URL: %s", rule_id, manifest_files[0]) + logging.debug("Alert %s using manifest_file for URL: %s", rule_id, manifest_files[0]) socket_url = Messages.get_manifest_type_url(manifest_files[0], pkg_name, pkg_version) short_desc = (f"{alert.props.get('note', '')}

Suggested Action:
{alert.suggestion}" - f"
{socket_url}") + f"
{socket_url}") full_desc = "{} - {}".format(alert.title, alert.description.replace('\r\n', '
')) if rule_id not in rules_map: @@ -241,7 +246,7 @@ def create_security_comment_sarif(diff) -> dict: for mf in manifest_files: line_number, line_content = Messages.find_line_in_file(pkg_name, pkg_version, mf) if line_number < 1: - line_number = 1 + line_number = 1 # Ensure SARIF compliance. logging.debug("Alert %s: Manifest %s, line %d: %s", rule_id, mf, line_number, line_content) locations.append({ "physicalLocation": { @@ -264,7 +269,7 @@ def create_security_comment_sarif(diff) -> dict: sarif_data["runs"][0]["results"] = results_list return sarif_data - + @staticmethod def create_security_comment_json(diff: Diff) -> dict: scan_failed = False From 12e8e6e05182ec60eb2c6e6e39e7531b9e96d8be Mon Sep 17 00:00:00 2001 From: Orlando Barrera II Date: Wed, 19 Feb 2025 11:56:43 -0600 Subject: [PATCH 10/22] Testing the sarif file parsing --- socketsecurity/core/messages.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/socketsecurity/core/messages.py b/socketsecurity/core/messages.py index 6580fa7..2929a34 100644 --- a/socketsecurity/core/messages.py +++ b/socketsecurity/core/messages.py @@ -45,7 +45,6 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) 2) Text-based (requirements.txt, package.json, yarn.lock, etc.) - Uses compiled regex patterns to detect a match line by line """ - # Extract just the file name to detect manifest type file_type = Path(manifest_file).name logging.debug("Processing file: %s", manifest_file) @@ -65,7 +64,6 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) or {} ) logging.debug("Found package keys: %s", list(packages_dict.keys())) - found_key = None found_info = None for key, value in packages_dict.items(): @@ -88,7 +86,7 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) else: return 1, f"{packagename} {packageversion} (not found in {manifest_file})" except (FileNotFoundError, json.JSONDecodeError) as e: - logging.error("Error reading JSON from %s: %s", manifest_file, e) + logging.error("Error reading %s: %s", manifest_file, e) return 1, f"Error reading {manifest_file}" # ---------------------------------------------------- @@ -172,11 +170,11 @@ def create_security_comment_sarif(diff) -> dict: """ Create SARIF-compliant output from the diff report, including dynamic URL generation based on manifest type and improved
formatting for GitHub SARIF display. - + This function now: - Accepts multiple manifest files from alert.introduced_by or alert.manifests. - Generates one SARIF location per manifest file. - - Falls back to a default ("requirements.txt") if none is found. + - Does NOT fall back to 'requirements.txt' if no manifest file is provided. """ if len(diff.new_alerts) == 0: for alert in diff.new_alerts: @@ -209,6 +207,7 @@ def create_security_comment_sarif(diff) -> dict: # --- Extract manifest files from alert data --- manifest_files = [] + logging.debug("Alert %s - introduced_by: %s, manifests: %s", rule_id, alert.introduced_by, getattr(alert, 'manifests', None)) if alert.introduced_by and isinstance(alert.introduced_by, list): for entry in alert.introduced_by: if isinstance(entry, list) and len(entry) >= 2: @@ -218,12 +217,13 @@ def create_security_comment_sarif(diff) -> dict: elif hasattr(alert, 'manifests') and alert.manifests: manifest_files = [mf.strip() for mf in alert.manifests.split(";") if mf.strip()] - logging.debug("Alert %s manifest_files before fallback: %s", rule_id, manifest_files) if not manifest_files: - manifest_files = ["requirements.txt"] - logging.debug("Alert %s falling back to: %s", rule_id, manifest_files) + # Do not fall back to requirements.txt; log an error instead. + logging.error("Alert %s: No manifest file found; cannot determine file location.", rule_id) + continue # Skip this alert - logging.debug("Alert %s using manifest_file for URL: %s", rule_id, manifest_files[0]) + logging.debug("Alert %s using manifest_files: %s", rule_id, manifest_files) + # Use the first manifest for URL generation. socket_url = Messages.get_manifest_type_url(manifest_files[0], pkg_name, pkg_version) short_desc = (f"{alert.props.get('note', '')}

Suggested Action:
{alert.suggestion}" f"
{socket_url}") From 77eee7d4ea084cf436ded8506ae3868bfc5027d1 Mon Sep 17 00:00:00 2001 From: Orlando Barrera II Date: Wed, 19 Feb 2025 12:06:24 -0600 Subject: [PATCH 11/22] Testing the sarif file parsing --- socketsecurity/core/messages.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/socketsecurity/core/messages.py b/socketsecurity/core/messages.py index 2929a34..6d06df1 100644 --- a/socketsecurity/core/messages.py +++ b/socketsecurity/core/messages.py @@ -18,7 +18,7 @@ class Messages: def map_severity_to_sarif(severity: str) -> str: """ Map Socket severity levels to SARIF levels (GitHub code scanning). - + 'low' -> 'note' 'medium' or 'middle' -> 'warning' 'high' or 'critical' -> 'error' @@ -45,6 +45,7 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) 2) Text-based (requirements.txt, package.json, yarn.lock, etc.) - Uses compiled regex patterns to detect a match line by line """ + # Extract just the file name to detect manifest type file_type = Path(manifest_file).name logging.debug("Processing file: %s", manifest_file) @@ -206,8 +207,8 @@ def create_security_comment_sarif(diff) -> dict: severity = alert.severity # --- Extract manifest files from alert data --- - manifest_files = [] logging.debug("Alert %s - introduced_by: %s, manifests: %s", rule_id, alert.introduced_by, getattr(alert, 'manifests', None)) + manifest_files = [] if alert.introduced_by and isinstance(alert.introduced_by, list): for entry in alert.introduced_by: if isinstance(entry, list) and len(entry) >= 2: @@ -218,9 +219,8 @@ def create_security_comment_sarif(diff) -> dict: manifest_files = [mf.strip() for mf in alert.manifests.split(";") if mf.strip()] if not manifest_files: - # Do not fall back to requirements.txt; log an error instead. logging.error("Alert %s: No manifest file found; cannot determine file location.", rule_id) - continue # Skip this alert + continue # Skip this alert if no manifest is provided logging.debug("Alert %s using manifest_files: %s", rule_id, manifest_files) # Use the first manifest for URL generation. @@ -246,7 +246,7 @@ def create_security_comment_sarif(diff) -> dict: for mf in manifest_files: line_number, line_content = Messages.find_line_in_file(pkg_name, pkg_version, mf) if line_number < 1: - line_number = 1 # Ensure SARIF compliance. + line_number = 1 logging.debug("Alert %s: Manifest %s, line %d: %s", rule_id, mf, line_number, line_content) locations.append({ "physicalLocation": { From f9d3f1c2ccdd15ccd4a31d38978f1b85600e9b4a Mon Sep 17 00:00:00 2001 From: Orlando Barrera II Date: Wed, 19 Feb 2025 12:30:17 -0600 Subject: [PATCH 12/22] Testing the sarif file parsing --- socketsecurity/core/messages.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/socketsecurity/core/messages.py b/socketsecurity/core/messages.py index 6d06df1..759bd0e 100644 --- a/socketsecurity/core/messages.py +++ b/socketsecurity/core/messages.py @@ -18,7 +18,7 @@ class Messages: def map_severity_to_sarif(severity: str) -> str: """ Map Socket severity levels to SARIF levels (GitHub code scanning). - + 'low' -> 'note' 'medium' or 'middle' -> 'warning' 'high' or 'critical' -> 'error' @@ -211,8 +211,11 @@ def create_security_comment_sarif(diff) -> dict: manifest_files = [] if alert.introduced_by and isinstance(alert.introduced_by, list): for entry in alert.introduced_by: - if isinstance(entry, list) and len(entry) >= 2: - manifest_files.append(entry[1].strip()) + # Accept lists or tuples + if isinstance(entry, (list, tuple)) and len(entry) >= 2: + # Split the second element if it contains semicolons + files = [f.strip() for f in entry[1].split(";") if f.strip()] + manifest_files.extend(files) elif isinstance(entry, str): manifest_files.extend([m.strip() for m in entry.split(";") if m.strip()]) elif hasattr(alert, 'manifests') and alert.manifests: @@ -246,7 +249,7 @@ def create_security_comment_sarif(diff) -> dict: for mf in manifest_files: line_number, line_content = Messages.find_line_in_file(pkg_name, pkg_version, mf) if line_number < 1: - line_number = 1 + line_number = 1 # Ensure SARIF compliance. logging.debug("Alert %s: Manifest %s, line %d: %s", rule_id, mf, line_number, line_content) locations.append({ "physicalLocation": { From c5883efa8cc977926deded0cbcd6cb200fd90d2e Mon Sep 17 00:00:00 2001 From: Orlando Barrera II Date: Wed, 19 Feb 2025 13:06:48 -0600 Subject: [PATCH 13/22] Testing the sarif file parsing --- socketsecurity/core/messages.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/socketsecurity/core/messages.py b/socketsecurity/core/messages.py index 759bd0e..c2ece4d 100644 --- a/socketsecurity/core/messages.py +++ b/socketsecurity/core/messages.py @@ -45,13 +45,10 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) 2) Text-based (requirements.txt, package.json, yarn.lock, etc.) - Uses compiled regex patterns to detect a match line by line """ - # Extract just the file name to detect manifest type file_type = Path(manifest_file).name - logging.debug("Processing file: %s", manifest_file) + logging.debug("Processing file for line lookup: %s", manifest_file) - # ---------------------------------------------------- - # 1) JSON-based manifest files - # ---------------------------------------------------- + # (Existing logic remains unchanged, with logs added where necessary) if file_type in ["package-lock.json", "Pipfile.lock", "composer.lock"]: try: with open(manifest_file, "r", encoding="utf-8") as f: @@ -64,7 +61,7 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) or data.get("dependencies") or {} ) - logging.debug("Found package keys: %s", list(packages_dict.keys())) + logging.debug("Found package keys in %s: %s", manifest_file, list(packages_dict.keys())) found_key = None found_info = None for key, value in packages_dict.items(): @@ -81,7 +78,7 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) logging.debug("Total lines in %s: %d", manifest_file, len(lines)) for i, line in enumerate(lines, start=1): if (needle_key in line) or (needle_version in line): - logging.debug("Match found at line %d in %s: %s", i, manifest_file, line.strip()) + logging.debug("Found match at line %d in %s: %s", i, manifest_file, line.strip()) return i, line.strip() return 1, f'"{found_key}": {found_info}' else: @@ -90,9 +87,7 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) logging.error("Error reading %s: %s", manifest_file, e) return 1, f"Error reading {manifest_file}" - # ---------------------------------------------------- - # 2) Text-based / line-based manifests - # ---------------------------------------------------- + # Text-based manifests search_patterns = { "package.json": rf'"{packagename}":\s*"{packageversion}"', "yarn.lock": rf'{packagename}@{packageversion}', @@ -176,6 +171,7 @@ def create_security_comment_sarif(diff) -> dict: - Accepts multiple manifest files from alert.introduced_by or alert.manifests. - Generates one SARIF location per manifest file. - Does NOT fall back to 'requirements.txt' if no manifest file is provided. + - Adds detailed logging to validate assumptions. """ if len(diff.new_alerts) == 0: for alert in diff.new_alerts: @@ -206,14 +202,14 @@ def create_security_comment_sarif(diff) -> dict: rule_id = f"{pkg_name}=={pkg_version}" severity = alert.severity - # --- Extract manifest files from alert data --- + # Log raw alert data for manifest extraction. logging.debug("Alert %s - introduced_by: %s, manifests: %s", rule_id, alert.introduced_by, getattr(alert, 'manifests', None)) + manifest_files = [] if alert.introduced_by and isinstance(alert.introduced_by, list): for entry in alert.introduced_by: - # Accept lists or tuples if isinstance(entry, (list, tuple)) and len(entry) >= 2: - # Split the second element if it contains semicolons + # Split semicolon-separated file names. files = [f.strip() for f in entry[1].split(";") if f.strip()] manifest_files.extend(files) elif isinstance(entry, str): @@ -221,12 +217,16 @@ def create_security_comment_sarif(diff) -> dict: elif hasattr(alert, 'manifests') and alert.manifests: manifest_files = [mf.strip() for mf in alert.manifests.split(";") if mf.strip()] + logging.debug("Alert %s - extracted manifest_files: %s", rule_id, manifest_files) + if not manifest_files: logging.error("Alert %s: No manifest file found; cannot determine file location.", rule_id) continue # Skip this alert if no manifest is provided - logging.debug("Alert %s using manifest_files: %s", rule_id, manifest_files) + logging.debug("Alert %s - using manifest_files for processing: %s", rule_id, manifest_files) + # Use the first manifest for URL generation. + logging.debug("Alert %s - Using file for URL generation: %s", rule_id, manifest_files[0]) socket_url = Messages.get_manifest_type_url(manifest_files[0], pkg_name, pkg_version) short_desc = (f"{alert.props.get('note', '')}

Suggested Action:
{alert.suggestion}" f"
{socket_url}") @@ -244,9 +244,10 @@ def create_security_comment_sarif(diff) -> dict: }, } - # Create a SARIF location for each manifest file. + # For each manifest file, attempt to find the package declaration. locations = [] for mf in manifest_files: + logging.debug("Alert %s - Processing manifest file: %s", rule_id, mf) line_number, line_content = Messages.find_line_in_file(pkg_name, pkg_version, mf) if line_number < 1: line_number = 1 # Ensure SARIF compliance. From b5b7b53523f8ccb99ed4f95f5a73befa7d03ed29 Mon Sep 17 00:00:00 2001 From: Orlando Barrera II Date: Wed, 19 Feb 2025 13:20:06 -0600 Subject: [PATCH 14/22] Testing the sarif file parsing --- socketsecurity/core/messages.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/socketsecurity/core/messages.py b/socketsecurity/core/messages.py index c2ece4d..956d3d6 100644 --- a/socketsecurity/core/messages.py +++ b/socketsecurity/core/messages.py @@ -18,7 +18,7 @@ class Messages: def map_severity_to_sarif(severity: str) -> str: """ Map Socket severity levels to SARIF levels (GitHub code scanning). - + 'low' -> 'note' 'medium' or 'middle' -> 'warning' 'high' or 'critical' -> 'error' @@ -45,10 +45,13 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) 2) Text-based (requirements.txt, package.json, yarn.lock, etc.) - Uses compiled regex patterns to detect a match line by line """ + # Extract just the file name to detect manifest type file_type = Path(manifest_file).name logging.debug("Processing file for line lookup: %s", manifest_file) - # (Existing logic remains unchanged, with logs added where necessary) + # ---------------------------------------------------- + # 1) JSON-based manifest files + # ---------------------------------------------------- if file_type in ["package-lock.json", "Pipfile.lock", "composer.lock"]: try: with open(manifest_file, "r", encoding="utf-8") as f: @@ -87,9 +90,12 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) logging.error("Error reading %s: %s", manifest_file, e) return 1, f"Error reading {manifest_file}" - # Text-based manifests + # ---------------------------------------------------- + # 2) Text-based / line-based manifests + # ---------------------------------------------------- search_patterns = { - "package.json": rf'"{packagename}":\s*"{packageversion}"', + # Updated pattern for package.json to allow optional '^' or '~' + "package.json": rf'"{packagename}":\s*"[\^~]?{re.escape(packageversion)}"', "yarn.lock": rf'{packagename}@{packageversion}', "pnpm-lock.yaml": rf'"{re.escape(packagename)}"\s*:\s*\{{[^}}]*"version":\s*"{re.escape(packageversion)}"', "requirements.txt": rf'^{re.escape(packagename)}\s*(?:==|===|!=|>=|<=|~=|\s+)?\s*{re.escape(packageversion)}(?:\s*;.*)?$', @@ -171,7 +177,7 @@ def create_security_comment_sarif(diff) -> dict: - Accepts multiple manifest files from alert.introduced_by or alert.manifests. - Generates one SARIF location per manifest file. - Does NOT fall back to 'requirements.txt' if no manifest file is provided. - - Adds detailed logging to validate assumptions. + - Adds detailed logging to validate our assumptions. """ if len(diff.new_alerts) == 0: for alert in diff.new_alerts: @@ -209,7 +215,6 @@ def create_security_comment_sarif(diff) -> dict: if alert.introduced_by and isinstance(alert.introduced_by, list): for entry in alert.introduced_by: if isinstance(entry, (list, tuple)) and len(entry) >= 2: - # Split semicolon-separated file names. files = [f.strip() for f in entry[1].split(";") if f.strip()] manifest_files.extend(files) elif isinstance(entry, str): @@ -244,7 +249,7 @@ def create_security_comment_sarif(diff) -> dict: }, } - # For each manifest file, attempt to find the package declaration. + # Create a SARIF location for each manifest file. locations = [] for mf in manifest_files: logging.debug("Alert %s - Processing manifest file: %s", rule_id, mf) From c1b37f657ecec9a42f214307e66074fdf854ae49 Mon Sep 17 00:00:00 2001 From: Orlando Barrera II Date: Wed, 19 Feb 2025 14:31:36 -0600 Subject: [PATCH 15/22] Testing the sarif file parsing --- socketsecurity/core/messages.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/socketsecurity/core/messages.py b/socketsecurity/core/messages.py index 956d3d6..bc634b0 100644 --- a/socketsecurity/core/messages.py +++ b/socketsecurity/core/messages.py @@ -41,11 +41,10 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) Supports: 1) JSON-based manifest files (package-lock.json, Pipfile.lock, composer.lock) - Locates a dictionary entry with the matching package & version - - Does a rough line-based search to find the actual line in the raw text + - Does a rough line-based search (by matching the key) in the raw text 2) Text-based (requirements.txt, package.json, yarn.lock, etc.) - Uses compiled regex patterns to detect a match line by line """ - # Extract just the file name to detect manifest type file_type = Path(manifest_file).name logging.debug("Processing file for line lookup: %s", manifest_file) @@ -68,6 +67,7 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) found_key = None found_info = None for key, value in packages_dict.items(): + # For NPM package-lock, keys might look like "node_modules/axios" if key.endswith(packagename) and "version" in value: if value["version"] == packageversion: found_key = key @@ -75,13 +75,13 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) break if found_key and found_info: + # Only use the found key to locate the line needle_key = f'"{found_key}":' - needle_version = f'"version": "{packageversion}"' lines = raw_text.splitlines() logging.debug("Total lines in %s: %d", manifest_file, len(lines)) for i, line in enumerate(lines, start=1): - if (needle_key in line) or (needle_version in line): - logging.debug("Found match at line %d in %s: %s", i, manifest_file, line.strip()) + if needle_key in line: + logging.debug("Match found at line %d in %s: %s", i, manifest_file, line.strip()) return i, line.strip() return 1, f'"{found_key}": {found_info}' else: @@ -94,7 +94,6 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) # 2) Text-based / line-based manifests # ---------------------------------------------------- search_patterns = { - # Updated pattern for package.json to allow optional '^' or '~' "package.json": rf'"{packagename}":\s*"[\^~]?{re.escape(packageversion)}"', "yarn.lock": rf'{packagename}@{packageversion}', "pnpm-lock.yaml": rf'"{re.escape(packagename)}"\s*:\s*\{{[^}}]*"version":\s*"{re.escape(packageversion)}"', @@ -226,10 +225,9 @@ def create_security_comment_sarif(diff) -> dict: if not manifest_files: logging.error("Alert %s: No manifest file found; cannot determine file location.", rule_id) - continue # Skip this alert if no manifest is provided + continue logging.debug("Alert %s - using manifest_files for processing: %s", rule_id, manifest_files) - # Use the first manifest for URL generation. logging.debug("Alert %s - Using file for URL generation: %s", rule_id, manifest_files[0]) socket_url = Messages.get_manifest_type_url(manifest_files[0], pkg_name, pkg_version) @@ -255,7 +253,7 @@ def create_security_comment_sarif(diff) -> dict: logging.debug("Alert %s - Processing manifest file: %s", rule_id, mf) line_number, line_content = Messages.find_line_in_file(pkg_name, pkg_version, mf) if line_number < 1: - line_number = 1 # Ensure SARIF compliance. + line_number = 1 logging.debug("Alert %s: Manifest %s, line %d: %s", rule_id, mf, line_number, line_content) locations.append({ "physicalLocation": { From 389a970f0072cd75327c2c0dce27f7cda0b47d02 Mon Sep 17 00:00:00 2001 From: Orlando Barrera II Date: Wed, 19 Feb 2025 14:33:55 -0600 Subject: [PATCH 16/22] Testing the sarif file parsing --- socketsecurity/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/socketsecurity/__init__.py b/socketsecurity/__init__.py index 872fb53..2f4f50d 100644 --- a/socketsecurity/__init__.py +++ b/socketsecurity/__init__.py @@ -1,2 +1,2 @@ __author__ = 'socket.dev' -__version__ = '2.0.5' +__version__ = '2.0.6' From 5833f5a2542e78b1be1a59d00f824a3161cb116e Mon Sep 17 00:00:00 2001 From: Orlando Barrera II Date: Wed, 19 Feb 2025 14:59:01 -0600 Subject: [PATCH 17/22] Testing the sarif file parsing --- socketsecurity/core/messages.py | 103 +++++++++++++++----------------- 1 file changed, 49 insertions(+), 54 deletions(-) diff --git a/socketsecurity/core/messages.py b/socketsecurity/core/messages.py index bc634b0..9b17f2c 100644 --- a/socketsecurity/core/messages.py +++ b/socketsecurity/core/messages.py @@ -41,16 +41,13 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) Supports: 1) JSON-based manifest files (package-lock.json, Pipfile.lock, composer.lock) - Locates a dictionary entry with the matching package & version - - Does a rough line-based search (by matching the key) in the raw text + - Searches the raw text for the key 2) Text-based (requirements.txt, package.json, yarn.lock, etc.) - - Uses compiled regex patterns to detect a match line by line + - Uses regex patterns to detect a match line by line """ file_type = Path(manifest_file).name logging.debug("Processing file for line lookup: %s", manifest_file) - # ---------------------------------------------------- - # 1) JSON-based manifest files - # ---------------------------------------------------- if file_type in ["package-lock.json", "Pipfile.lock", "composer.lock"]: try: with open(manifest_file, "r", encoding="utf-8") as f: @@ -67,7 +64,6 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) found_key = None found_info = None for key, value in packages_dict.items(): - # For NPM package-lock, keys might look like "node_modules/axios" if key.endswith(packagename) and "version" in value: if value["version"] == packageversion: found_key = key @@ -75,13 +71,12 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) break if found_key and found_info: - # Only use the found key to locate the line needle_key = f'"{found_key}":' lines = raw_text.splitlines() logging.debug("Total lines in %s: %d", manifest_file, len(lines)) for i, line in enumerate(lines, start=1): if needle_key in line: - logging.debug("Match found at line %d in %s: %s", i, manifest_file, line.strip()) + logging.debug("Found match at line %d in %s: %s", i, manifest_file, line.strip()) return i, line.strip() return 1, f'"{found_key}": {found_info}' else: @@ -90,9 +85,7 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) logging.error("Error reading %s: %s", manifest_file, e) return 1, f"Error reading {manifest_file}" - # ---------------------------------------------------- - # 2) Text-based / line-based manifests - # ---------------------------------------------------- + # Text-based manifests search_patterns = { "package.json": rf'"{packagename}":\s*"[\^~]?{re.escape(packageversion)}"', "yarn.lock": rf'{packagename}@{packageversion}', @@ -174,7 +167,8 @@ def create_security_comment_sarif(diff) -> dict: This function now: - Accepts multiple manifest files from alert.introduced_by or alert.manifests. - - Generates one SARIF location per manifest file. + - Generates an individual SARIF result for each manifest file. + - Appends the manifest file name to the alert name to make each result unique. - Does NOT fall back to 'requirements.txt' if no manifest file is provided. - Adds detailed logging to validate our assumptions. """ @@ -204,11 +198,11 @@ def create_security_comment_sarif(diff) -> dict: for alert in diff.new_alerts: pkg_name = alert.pkg_name pkg_version = alert.pkg_version - rule_id = f"{pkg_name}=={pkg_version}" + base_rule_id = f"{pkg_name}=={pkg_version}" severity = alert.severity # Log raw alert data for manifest extraction. - logging.debug("Alert %s - introduced_by: %s, manifests: %s", rule_id, alert.introduced_by, getattr(alert, 'manifests', None)) + logging.debug("Alert %s - introduced_by: %s, manifests: %s", base_rule_id, alert.introduced_by, getattr(alert, 'manifests', None)) manifest_files = [] if alert.introduced_by and isinstance(alert.introduced_by, list): @@ -221,56 +215,57 @@ def create_security_comment_sarif(diff) -> dict: elif hasattr(alert, 'manifests') and alert.manifests: manifest_files = [mf.strip() for mf in alert.manifests.split(";") if mf.strip()] - logging.debug("Alert %s - extracted manifest_files: %s", rule_id, manifest_files) - + logging.debug("Alert %s - extracted manifest_files: %s", base_rule_id, manifest_files) if not manifest_files: - logging.error("Alert %s: No manifest file found; cannot determine file location.", rule_id) + logging.error("Alert %s: No manifest file found; cannot determine file location.", base_rule_id) continue - logging.debug("Alert %s - using manifest_files for processing: %s", rule_id, manifest_files) - # Use the first manifest for URL generation. - logging.debug("Alert %s - Using file for URL generation: %s", rule_id, manifest_files[0]) - socket_url = Messages.get_manifest_type_url(manifest_files[0], pkg_name, pkg_version) - short_desc = (f"{alert.props.get('note', '')}

Suggested Action:
{alert.suggestion}" - f"
{socket_url}") - full_desc = "{} - {}".format(alert.title, alert.description.replace('\r\n', '
')) - - if rule_id not in rules_map: - rules_map[rule_id] = { - "id": rule_id, - "name": f"{pkg_name}=={pkg_version}", - "shortDescription": {"text": f"Alert generated for {rule_id} by Socket Security"}, - "fullDescription": {"text": full_desc}, - "helpUri": socket_url, - "defaultConfiguration": { - "level": Messages.map_severity_to_sarif(severity) - }, - } + logging.debug("Alert %s - using manifest_files for processing: %s", base_rule_id, manifest_files) - # Create a SARIF location for each manifest file. - locations = [] + # For each manifest file, create an individual SARIF result. for mf in manifest_files: - logging.debug("Alert %s - Processing manifest file: %s", rule_id, mf) + logging.debug("Alert %s - Processing manifest file: %s", base_rule_id, mf) + socket_url = Messages.get_manifest_type_url(mf, pkg_name, pkg_version) line_number, line_content = Messages.find_line_in_file(pkg_name, pkg_version, mf) if line_number < 1: line_number = 1 - logging.debug("Alert %s: Manifest %s, line %d: %s", rule_id, mf, line_number, line_content) - locations.append({ - "physicalLocation": { - "artifactLocation": {"uri": mf}, - "region": { - "startLine": line_number, - "snippet": {"text": line_content}, + logging.debug("Alert %s: Manifest %s, line %d: %s", base_rule_id, mf, line_number, line_content) + + # Create a unique rule id and name by appending the file prefix. + unique_rule_id = f"{base_rule_id} ({mf})" + rule_name = unique_rule_id + + short_desc = (f"{alert.props.get('note', '')}

Suggested Action:
{alert.suggestion}" + f"
{socket_url}") + full_desc = "{} - {}".format(alert.title, alert.description.replace('\r\n', '
')) + + # Add the rule if not already defined. + if unique_rule_id not in rules_map: + rules_map[unique_rule_id] = { + "id": unique_rule_id, + "name": rule_name, + "shortDescription": {"text": f"Alert generated for {unique_rule_id} by Socket Security"}, + "fullDescription": {"text": full_desc}, + "helpUri": socket_url, + "defaultConfiguration": { + "level": Messages.map_severity_to_sarif(severity) }, } - }) - - result_obj = { - "ruleId": rule_id, - "message": {"text": short_desc}, - "locations": locations, - } - results_list.append(result_obj) + + result_obj = { + "ruleId": unique_rule_id, + "message": {"text": short_desc}, + "locations": [{ + "physicalLocation": { + "artifactLocation": {"uri": mf}, + "region": { + "startLine": line_number, + "snippet": {"text": line_content}, + }, + } + }] + } + results_list.append(result_obj) sarif_data["runs"][0]["tool"]["driver"]["rules"] = list(rules_map.values()) sarif_data["runs"][0]["results"] = results_list From b58656a33170f9c3a8c98951deb42c549f0e4b68 Mon Sep 17 00:00:00 2001 From: Orlando Barrera II Date: Wed, 19 Feb 2025 15:34:54 -0600 Subject: [PATCH 18/22] Testing the sarif file parsing --- socketsecurity/core/messages.py | 39 +++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/socketsecurity/core/messages.py b/socketsecurity/core/messages.py index 9b17f2c..8cf28ac 100644 --- a/socketsecurity/core/messages.py +++ b/socketsecurity/core/messages.py @@ -41,13 +41,16 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) Supports: 1) JSON-based manifest files (package-lock.json, Pipfile.lock, composer.lock) - Locates a dictionary entry with the matching package & version - - Searches the raw text for the key - 2) Text-based (requirements.txt, package.json, yarn.lock, etc.) - - Uses regex patterns to detect a match line by line + - Searches the raw text for the dependency key + 2) Text-based (requirements.txt, package.json, yarn.lock, pnpm-lock.yaml, etc.) + - Uses compiled regex patterns to detect a match line by line """ file_type = Path(manifest_file).name logging.debug("Processing file for line lookup: %s", manifest_file) + # ---------------------------------------------------- + # 1) JSON-based manifest files + # ---------------------------------------------------- if file_type in ["package-lock.json", "Pipfile.lock", "composer.lock"]: try: with open(manifest_file, "r", encoding="utf-8") as f: @@ -85,11 +88,16 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) logging.error("Error reading %s: %s", manifest_file, e) return 1, f"Error reading {manifest_file}" - # Text-based manifests + # ---------------------------------------------------- + # 2) Text-based / line-based manifests + # ---------------------------------------------------- + # Updated search patterns; note the new pattern for pnpm-lock.yaml. search_patterns = { "package.json": rf'"{packagename}":\s*"[\^~]?{re.escape(packageversion)}"', "yarn.lock": rf'{packagename}@{packageversion}', - "pnpm-lock.yaml": rf'"{re.escape(packagename)}"\s*:\s*\{{[^}}]*"version":\s*"{re.escape(packageversion)}"', + # For pnpm-lock.yaml, look for a line in the packages section like: + # /bitget-main/19.4.9: + "pnpm-lock.yaml": rf'^/{re.escape(packagename)}/{re.escape(packageversion)}:', "requirements.txt": rf'^{re.escape(packagename)}\s*(?:==|===|!=|>=|<=|~=|\s+)?\s*{re.escape(packageversion)}(?:\s*;.*)?$', "pyproject.toml": rf'{packagename}\s*=\s*"{packageversion}"', "Pipfile": rf'"{packagename}"\s*=\s*"{packageversion}"', @@ -168,7 +176,7 @@ def create_security_comment_sarif(diff) -> dict: This function now: - Accepts multiple manifest files from alert.introduced_by or alert.manifests. - Generates an individual SARIF result for each manifest file. - - Appends the manifest file name to the alert name to make each result unique. + - Appends the manifest file name to the alert name (and rule ID) to make each result unique. - Does NOT fall back to 'requirements.txt' if no manifest file is provided. - Adds detailed logging to validate our assumptions. """ @@ -201,7 +209,6 @@ def create_security_comment_sarif(diff) -> dict: base_rule_id = f"{pkg_name}=={pkg_version}" severity = alert.severity - # Log raw alert data for manifest extraction. logging.debug("Alert %s - introduced_by: %s, manifests: %s", base_rule_id, alert.introduced_by, getattr(alert, 'manifests', None)) manifest_files = [] @@ -216,13 +223,14 @@ def create_security_comment_sarif(diff) -> dict: manifest_files = [mf.strip() for mf in alert.manifests.split(";") if mf.strip()] logging.debug("Alert %s - extracted manifest_files: %s", base_rule_id, manifest_files) + if not manifest_files: logging.error("Alert %s: No manifest file found; cannot determine file location.", base_rule_id) continue logging.debug("Alert %s - using manifest_files for processing: %s", base_rule_id, manifest_files) - # For each manifest file, create an individual SARIF result. + # For each manifest file, generate a separate result for mf in manifest_files: logging.debug("Alert %s - Processing manifest file: %s", base_rule_id, mf) socket_url = Messages.get_manifest_type_url(mf, pkg_name, pkg_version) @@ -230,28 +238,27 @@ def create_security_comment_sarif(diff) -> dict: if line_number < 1: line_number = 1 logging.debug("Alert %s: Manifest %s, line %d: %s", base_rule_id, mf, line_number, line_content) - - # Create a unique rule id and name by appending the file prefix. + + # Create a unique rule id and name by appending the manifest file name unique_rule_id = f"{base_rule_id} ({mf})" - rule_name = unique_rule_id - + rule_name = f"Alert {base_rule_id} ({mf})" + short_desc = (f"{alert.props.get('note', '')}

Suggested Action:
{alert.suggestion}" f"
{socket_url}") full_desc = "{} - {}".format(alert.title, alert.description.replace('\r\n', '
')) - - # Add the rule if not already defined. + if unique_rule_id not in rules_map: rules_map[unique_rule_id] = { "id": unique_rule_id, "name": rule_name, - "shortDescription": {"text": f"Alert generated for {unique_rule_id} by Socket Security"}, + "shortDescription": {"text": rule_name}, "fullDescription": {"text": full_desc}, "helpUri": socket_url, "defaultConfiguration": { "level": Messages.map_severity_to_sarif(severity) }, } - + result_obj = { "ruleId": unique_rule_id, "message": {"text": short_desc}, From 054acb8a00e9776b04d43a2d29c10a64d7535679 Mon Sep 17 00:00:00 2001 From: Orlando Barrera II Date: Wed, 19 Feb 2025 16:19:18 -0600 Subject: [PATCH 19/22] Testing the sarif file parsing --- socketsecurity/core/messages.py | 80 +++++++++++++++++---------------- 1 file changed, 41 insertions(+), 39 deletions(-) diff --git a/socketsecurity/core/messages.py b/socketsecurity/core/messages.py index 8cf28ac..08850cb 100644 --- a/socketsecurity/core/messages.py +++ b/socketsecurity/core/messages.py @@ -41,9 +41,9 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) Supports: 1) JSON-based manifest files (package-lock.json, Pipfile.lock, composer.lock) - Locates a dictionary entry with the matching package & version - - Searches the raw text for the dependency key + - Searches the raw text for the key 2) Text-based (requirements.txt, package.json, yarn.lock, pnpm-lock.yaml, etc.) - - Uses compiled regex patterns to detect a match line by line + - Uses regex patterns to detect a match line by line """ file_type = Path(manifest_file).name logging.debug("Processing file for line lookup: %s", manifest_file) @@ -91,35 +91,37 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) # ---------------------------------------------------- # 2) Text-based / line-based manifests # ---------------------------------------------------- - # Updated search patterns; note the new pattern for pnpm-lock.yaml. - search_patterns = { - "package.json": rf'"{packagename}":\s*"[\^~]?{re.escape(packageversion)}"', - "yarn.lock": rf'{packagename}@{packageversion}', - # For pnpm-lock.yaml, look for a line in the packages section like: - # /bitget-main/19.4.9: - "pnpm-lock.yaml": rf'^/{re.escape(packagename)}/{re.escape(packageversion)}:', - "requirements.txt": rf'^{re.escape(packagename)}\s*(?:==|===|!=|>=|<=|~=|\s+)?\s*{re.escape(packageversion)}(?:\s*;.*)?$', - "pyproject.toml": rf'{packagename}\s*=\s*"{packageversion}"', - "Pipfile": rf'"{packagename}"\s*=\s*"{packageversion}"', - "go.mod": rf'require\s+{re.escape(packagename)}\s+{re.escape(packageversion)}', - "go.sum": rf'{re.escape(packagename)}\s+{re.escape(packageversion)}', - "pom.xml": rf'{re.escape(packagename)}\s*{re.escape(packageversion)}', - "build.gradle": rf'implementation\s+"{re.escape(packagename)}:{re.escape(packageversion)}"', - "Gemfile": rf'gem\s+"{re.escape(packagename)}",\s*"{re.escape(packageversion)}"', - "Gemfile.lock": rf'\s+{re.escape(packagename)}\s+\({re.escape(packageversion)}\)', - ".csproj": rf'', - ".fsproj": rf'', - "paket.dependencies": rf'nuget\s+{re.escape(packagename)}\s+{re.escape(packageversion)}', - "Cargo.toml": rf'{re.escape(packagename)}\s*=\s*"{re.escape(packageversion)}"', - "build.sbt": rf'"{re.escape(packagename)}"\s*%\s*"{re.escape(packageversion)}"', - "Podfile": rf'pod\s+"{re.escape(packagename)}",\s*"{re.escape(packageversion)}"', - "Package.swift": rf'\.package\(name:\s*"{re.escape(packagename)}",\s*url:\s*".*?",\s*version:\s*"{re.escape(packageversion)}"\)', - "mix.exs": rf'\{{:{re.escape(packagename)},\s*"{re.escape(packageversion)}"\}}', - "composer.json": rf'"{re.escape(packagename)}":\s*"{re.escape(packageversion)}"', - "conanfile.txt": rf'{re.escape(packagename)}/{re.escape(packageversion)}', - "vcpkg.json": rf'"{re.escape(packagename)}":\s*"{re.escape(packageversion)}"', - } - searchstring = search_patterns.get(file_type, rf'{re.escape(packagename)}.*{re.escape(packageversion)}') + # For pnpm-lock.yaml, use a different pattern since its format is YAML. + if file_type.lower() == "pnpm-lock.yaml": + # Example pattern: /bitget-main/19.4.9: + searchstring = rf'/{re.escape(packagename)}/{re.escape(packageversion)}:' + else: + search_patterns = { + "package.json": rf'"{packagename}":\s*"[\^~]?{re.escape(packageversion)}"', + "yarn.lock": rf'{packagename}@{packageversion}', + "requirements.txt": rf'^{re.escape(packagename)}\s*(?:==|===|!=|>=|<=|~=|\s+)?\s*{re.escape(packageversion)}(?:\s*;.*)?$', + "pyproject.toml": rf'{packagename}\s*=\s*"{re.escape(packageversion)}"', + "Pipfile": rf'"{packagename}"\s*=\s*"{re.escape(packageversion)}"', + "go.mod": rf'require\s+{re.escape(packagename)}\s+{re.escape(packageversion)}', + "go.sum": rf'{re.escape(packagename)}\s+{re.escape(packageversion)}', + "pom.xml": rf'{re.escape(packagename)}\s*{re.escape(packageversion)}', + "build.gradle": rf'implementation\s+"{re.escape(packagename)}:{re.escape(packageversion)}"', + "Gemfile": rf'gem\s+"{re.escape(packagename)}",\s*"{re.escape(packageversion)}"', + "Gemfile.lock": rf'\s+{re.escape(packagename)}\s+\({re.escape(packageversion)}\)', + ".csproj": rf'', + ".fsproj": rf'', + "paket.dependencies": rf'nuget\s+{re.escape(packagename)}\s+{re.escape(packageversion)}', + "Cargo.toml": rf'{re.escape(packagename)}\s*=\s*"{re.escape(packageversion)}"', + "build.sbt": rf'"{re.escape(packagename)}"\s*%\s*"{re.escape(packageversion)}"', + "Podfile": rf'pod\s+"{re.escape(packagename)}",\s*"{re.escape(packageversion)}"', + "Package.swift": rf'\.package\(name:\s*"{re.escape(packagename)}",\s*url:\s*".*?",\s*version:\s*"{re.escape(packageversion)}"\)', + "mix.exs": rf'\{{:{re.escape(packagename)},\s*"{re.escape(packageversion)}"\}}', + "composer.json": rf'"{re.escape(packagename)}":\s*"{re.escape(packageversion)}"', + "conanfile.txt": rf'{re.escape(packagename)}/{re.escape(packageversion)}', + "vcpkg.json": rf'"{re.escape(packagename)}":\s*"{re.escape(packageversion)}"', + } + searchstring = search_patterns.get(file_type, rf'{re.escape(packagename)}.*{re.escape(packageversion)}') + logging.debug("Using search pattern for %s: %s", file_type, searchstring) try: with open(manifest_file, 'r', encoding="utf-8") as file: @@ -176,7 +178,7 @@ def create_security_comment_sarif(diff) -> dict: This function now: - Accepts multiple manifest files from alert.introduced_by or alert.manifests. - Generates an individual SARIF result for each manifest file. - - Appends the manifest file name to the alert name (and rule ID) to make each result unique. + - Appends the manifest file name to the rule ID and name for uniqueness. - Does NOT fall back to 'requirements.txt' if no manifest file is provided. - Adds detailed logging to validate our assumptions. """ @@ -209,6 +211,7 @@ def create_security_comment_sarif(diff) -> dict: base_rule_id = f"{pkg_name}=={pkg_version}" severity = alert.severity + # Log raw alert data for manifest extraction. logging.debug("Alert %s - introduced_by: %s, manifests: %s", base_rule_id, alert.introduced_by, getattr(alert, 'manifests', None)) manifest_files = [] @@ -223,14 +226,13 @@ def create_security_comment_sarif(diff) -> dict: manifest_files = [mf.strip() for mf in alert.manifests.split(";") if mf.strip()] logging.debug("Alert %s - extracted manifest_files: %s", base_rule_id, manifest_files) - if not manifest_files: logging.error("Alert %s: No manifest file found; cannot determine file location.", base_rule_id) continue logging.debug("Alert %s - using manifest_files for processing: %s", base_rule_id, manifest_files) - # For each manifest file, generate a separate result + # For each manifest file, create an individual SARIF result. for mf in manifest_files: logging.debug("Alert %s - Processing manifest file: %s", base_rule_id, mf) socket_url = Messages.get_manifest_type_url(mf, pkg_name, pkg_version) @@ -238,15 +240,15 @@ def create_security_comment_sarif(diff) -> dict: if line_number < 1: line_number = 1 logging.debug("Alert %s: Manifest %s, line %d: %s", base_rule_id, mf, line_number, line_content) - - # Create a unique rule id and name by appending the manifest file name + + # Create a unique rule id and name by appending the file name. unique_rule_id = f"{base_rule_id} ({mf})" rule_name = f"Alert {base_rule_id} ({mf})" - + short_desc = (f"{alert.props.get('note', '')}

Suggested Action:
{alert.suggestion}" f"
{socket_url}") full_desc = "{} - {}".format(alert.title, alert.description.replace('\r\n', '
')) - + if unique_rule_id not in rules_map: rules_map[unique_rule_id] = { "id": unique_rule_id, @@ -258,7 +260,7 @@ def create_security_comment_sarif(diff) -> dict: "level": Messages.map_severity_to_sarif(severity) }, } - + result_obj = { "ruleId": unique_rule_id, "message": {"text": short_desc}, From cae1e47f62c1f122c053dae72d20838804eba636 Mon Sep 17 00:00:00 2001 From: Orlando Barrera II Date: Wed, 19 Feb 2025 16:52:39 -0600 Subject: [PATCH 20/22] Testing the sarif file parsing --- socketsecurity/core/messages.py | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/socketsecurity/core/messages.py b/socketsecurity/core/messages.py index 08850cb..06c99d8 100644 --- a/socketsecurity/core/messages.py +++ b/socketsecurity/core/messages.py @@ -48,9 +48,6 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) file_type = Path(manifest_file).name logging.debug("Processing file for line lookup: %s", manifest_file) - # ---------------------------------------------------- - # 1) JSON-based manifest files - # ---------------------------------------------------- if file_type in ["package-lock.json", "Pipfile.lock", "composer.lock"]: try: with open(manifest_file, "r", encoding="utf-8") as f: @@ -72,7 +69,6 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) found_key = key found_info = value break - if found_key and found_info: needle_key = f'"{found_key}":' lines = raw_text.splitlines() @@ -88,13 +84,9 @@ def find_line_in_file(packagename: str, packageversion: str, manifest_file: str) logging.error("Error reading %s: %s", manifest_file, e) return 1, f"Error reading {manifest_file}" - # ---------------------------------------------------- - # 2) Text-based / line-based manifests - # ---------------------------------------------------- - # For pnpm-lock.yaml, use a different pattern since its format is YAML. + # For pnpm-lock.yaml, use a special regex pattern. if file_type.lower() == "pnpm-lock.yaml": - # Example pattern: /bitget-main/19.4.9: - searchstring = rf'/{re.escape(packagename)}/{re.escape(packageversion)}:' + searchstring = rf'^\s*/{re.escape(packagename)}/{re.escape(packageversion)}:' else: search_patterns = { "package.json": rf'"{packagename}":\s*"[\^~]?{re.escape(packageversion)}"', @@ -176,9 +168,9 @@ def create_security_comment_sarif(diff) -> dict: based on manifest type and improved
formatting for GitHub SARIF display. This function now: - - Accepts multiple manifest files from alert.introduced_by or alert.manifests. - - Generates an individual SARIF result for each manifest file. - - Appends the manifest file name to the rule ID and name for uniqueness. + - Processes every alert in diff.new_alerts. + - For alerts with multiple manifest files, generates an individual SARIF result for each file. + - Appends the manifest file name to the rule ID and name to make each result unique. - Does NOT fall back to 'requirements.txt' if no manifest file is provided. - Adds detailed logging to validate our assumptions. """ @@ -211,9 +203,7 @@ def create_security_comment_sarif(diff) -> dict: base_rule_id = f"{pkg_name}=={pkg_version}" severity = alert.severity - # Log raw alert data for manifest extraction. logging.debug("Alert %s - introduced_by: %s, manifests: %s", base_rule_id, alert.introduced_by, getattr(alert, 'manifests', None)) - manifest_files = [] if alert.introduced_by and isinstance(alert.introduced_by, list): for entry in alert.introduced_by: @@ -232,7 +222,7 @@ def create_security_comment_sarif(diff) -> dict: logging.debug("Alert %s - using manifest_files for processing: %s", base_rule_id, manifest_files) - # For each manifest file, create an individual SARIF result. + # Create an individual SARIF result for each manifest file. for mf in manifest_files: logging.debug("Alert %s - Processing manifest file: %s", base_rule_id, mf) socket_url = Messages.get_manifest_type_url(mf, pkg_name, pkg_version) @@ -241,7 +231,7 @@ def create_security_comment_sarif(diff) -> dict: line_number = 1 logging.debug("Alert %s: Manifest %s, line %d: %s", base_rule_id, mf, line_number, line_content) - # Create a unique rule id and name by appending the file name. + # Create a unique rule id and name by appending the manifest file. unique_rule_id = f"{base_rule_id} ({mf})" rule_name = f"Alert {base_rule_id} ({mf})" From 1315b64239322425da17c30c188209a15e6188ee Mon Sep 17 00:00:00 2001 From: Orlando Barrera II Date: Tue, 4 Mar 2025 09:39:31 -0600 Subject: [PATCH 21/22] Testing the sarif file parsing --- socketsecurity/__init__.py | 2 +- socketsecurity/core/messages.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/socketsecurity/__init__.py b/socketsecurity/__init__.py index 2f4f50d..121b7fc 100644 --- a/socketsecurity/__init__.py +++ b/socketsecurity/__init__.py @@ -1,2 +1,2 @@ __author__ = 'socket.dev' -__version__ = '2.0.6' +__version__ = '2.0.9' diff --git a/socketsecurity/core/messages.py b/socketsecurity/core/messages.py index 06c99d8..2940e3d 100644 --- a/socketsecurity/core/messages.py +++ b/socketsecurity/core/messages.py @@ -173,6 +173,7 @@ def create_security_comment_sarif(diff) -> dict: - Appends the manifest file name to the rule ID and name to make each result unique. - Does NOT fall back to 'requirements.txt' if no manifest file is provided. - Adds detailed logging to validate our assumptions. + """ if len(diff.new_alerts) == 0: for alert in diff.new_alerts: From da56d737adf8dd18d56d70dd418785c88b3a17e8 Mon Sep 17 00:00:00 2001 From: Orlando Barrera II <1621370+obarrera@users.noreply.github.com> Date: Fri, 7 Mar 2025 09:24:40 -0600 Subject: [PATCH 22/22] Update __init__.py --- socketsecurity/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/socketsecurity/__init__.py b/socketsecurity/__init__.py index 795f022..c554d9c 100644 --- a/socketsecurity/__init__.py +++ b/socketsecurity/__init__.py @@ -1,2 +1,2 @@ __author__ = 'socket.dev' -__version__ = '2.0.9' \ No newline at end of file +__version__ = '2.0.10'