Skip to content

Commit da4bba4

Browse files
davidchen2001MongoDB Bot
authored and
MongoDB Bot
committed
SERVER-99680: Fix legacy_comands_check.py to take inspect line-differences rather than file-differences (#31459)
GitOrigin-RevId: cd71349f51a46c6a8e18606b170fe324f7762fef
1 parent d32eb21 commit da4bba4

File tree

5 files changed

+167
-69
lines changed

5 files changed

+167
-69
lines changed

Diff for: buildscripts/legacy_commands_check.py

+16-16
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44
import os
55
import re
66
import sys
7-
from typing import Iterable
7+
from typing import List, Tuple
88

99
# Get relative imports to work when the package is not installed on the PYTHONPATH.
1010
if __name__ == "__main__" and __package__ is None:
1111
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(os.path.realpath(__file__)))))
1212

1313
# pylint: disable=wrong-import-position
14-
from buildscripts.linter.filediff import gather_changed_files_for_lint
14+
from buildscripts.linter.filediff import gather_changed_files_with_lines
1515

1616
LEGACY_TYPES = [
1717
"public Command {",
@@ -22,14 +22,15 @@
2222
]
2323

2424

25-
def check_file_for_legacy_type(file_contents: Iterable[str]) -> bool:
25+
def check_file_for_legacy_type(modified_lines: List[Tuple[int, str]]) -> bool:
2626
"""Return false if a file defines a legacy command."""
2727

2828
file_has_legacy_type = False
2929

30-
for i, line in enumerate(file_contents):
31-
if any(legacy_type in line for legacy_type in LEGACY_TYPES):
32-
print(f"On line {i}: {line.strip()}")
30+
for i in range(len(modified_lines)):
31+
modified_line = modified_lines[i][1]
32+
if any(legacy_type in modified_line for legacy_type in LEGACY_TYPES):
33+
print(f"On line {i}: {modified_line.strip()}")
3334
file_has_legacy_type = True
3435

3536
return file_has_legacy_type
@@ -54,19 +55,18 @@ def main():
5455
default_dir = os.getenv("BUILD_WORKSPACE_DIRECTORY", ".")
5556
os.chdir(default_dir)
5657

57-
files = gather_changed_files_for_lint(is_interesting_file)
58+
files = gather_changed_files_with_lines(is_interesting_file)
5859
found_legacy_command = False
5960

6061
for file in files:
61-
with open(file, "r") as search_file:
62-
hasLegacyCommandDefinition = check_file_for_legacy_type(search_file)
63-
64-
if hasLegacyCommandDefinition:
65-
found_legacy_command = True
66-
print(
67-
file
68-
+ " contains a legacy command type definition. Please define a TypedCommand instead."
69-
)
62+
hasLegacyCommandDefinition = check_file_for_legacy_type(files[file])
63+
64+
if hasLegacyCommandDefinition:
65+
found_legacy_command = True
66+
print(
67+
file
68+
+ " contains a legacy command type definition. Please define a TypedCommand instead."
69+
)
7070

7171
if found_legacy_command:
7272
sys.exit(1)

Diff for: buildscripts/linter/filediff.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import os
44
import sys
5-
from typing import Callable, List, Tuple
5+
from typing import Callable, Dict, List, Tuple
66

77
import structlog
88
from git import Repo
@@ -16,6 +16,7 @@
1616
from buildscripts.patch_builds.change_data import (
1717
RevisionMap,
1818
find_changed_files_in_repos,
19+
find_modified_lines_for_files_in_repos,
1920
generate_revision_map,
2021
)
2122

@@ -66,3 +67,19 @@ def gather_changed_files_for_lint(is_interesting_file: Callable[[str], bool]) ->
6667

6768
LOGGER.info("Found files to lint", files=files)
6869
return files
70+
71+
72+
def gather_changed_files_with_lines(
73+
is_interesting_file: Callable[[str], bool],
74+
) -> Dict[str, List[Tuple[int, str]]]:
75+
"""
76+
Get the files that have changes since the last git commit, along with details of the specific lines that have changed.
77+
78+
:param is_interesting_file: Filter for whether a file should be returned.
79+
:return: Dictionary mapping each changed file to a list of tuples, where each tuple contains the modified line number and its content.
80+
"""
81+
repos, revision_map = _get_repos_and_revisions()
82+
changed_files = find_changed_files_in_repos(repos, revision_map)
83+
84+
filtered_files = [f for f in changed_files if is_interesting_file(f)]
85+
return find_modified_lines_for_files_in_repos(repos, filtered_files, revision_map)

Diff for: buildscripts/patch_builds/change_data.py

+66-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import os
44
from itertools import chain
5-
from typing import Any, Dict, Iterable, List, Optional, Set
5+
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
66

77
import structlog
88
from git import DiffIndex, Repo
@@ -115,3 +115,68 @@ def find_changed_files_in_repos(
115115
:return: Set of changed files.
116116
"""
117117
return set(chain.from_iterable([find_changed_files(repo, revision_map) for repo in repos]))
118+
119+
120+
def find_modified_lines_for_files(
121+
repo: Repo, changed_files: List[str], revision_map: Optional[RevisionMap] = None
122+
) -> Dict[str, List[Tuple[int, str]]]:
123+
"""
124+
For each changed file, determine which lines were modified.
125+
126+
:param repo: Git repository.
127+
:param changed_files: List of file paths that have changed.
128+
:param revision_map: Map of revisions to compare against for repos.
129+
:return: Dictionary mapping files to lists of modified line numbers.
130+
"""
131+
if not revision_map:
132+
revision_map = {}
133+
134+
modified_lines_and_content = {}
135+
compare_commit = revision_map.get(repo.git_dir, repo.head.commit)
136+
for file_path in changed_files:
137+
diff = repo.git.diff(compare_commit, "--", file_path, unified=0)
138+
line_modifications = []
139+
# Read file contents
140+
with open(file_path, "r") as file:
141+
lines = file.readlines()
142+
143+
# Find line numbers and respective content of lines with modifications in git diff and store in a list of tuples
144+
for line in diff.splitlines():
145+
if not (line.startswith("@@")):
146+
continue
147+
parts = line.split(" ")
148+
for part in parts:
149+
if not (part.startswith("+")):
150+
continue
151+
start_line_count = part[1:]
152+
if "," in start_line_count:
153+
start_line, count = map(int, start_line_count.split(","))
154+
for i in range(start_line, start_line + count):
155+
if i <= len(lines):
156+
line_modifications.append((i, lines[i - 1].rstrip()))
157+
else:
158+
start_line = int(start_line_count)
159+
if start_line <= len(lines):
160+
line_modifications.append((start_line, lines[start_line - 1].rstrip()))
161+
modified_lines_and_content[file_path] = line_modifications
162+
163+
return modified_lines_and_content
164+
165+
166+
def find_modified_lines_for_files_in_repos(
167+
repos: Iterable[Repo], changed_files: List[str], revision_map: Optional[RevisionMap] = None
168+
) -> Dict[str, List[Tuple[int, str]]]:
169+
"""
170+
Find the modified lines in files with changes.
171+
172+
:param repos: List of repos containing changed files.
173+
:param revision_map: Map of revisions to compare against for repos.
174+
:return: Dictionary mapping each changed file to a list of tuples, where each tuple contains the modified line number and its content.
175+
"""
176+
modified_files_with_lines = {}
177+
for repo in repos:
178+
modified_files_with_lines.update(
179+
find_modified_lines_for_files(repo, changed_files, revision_map)
180+
)
181+
182+
return modified_files_with_lines

Diff for: buildscripts/tests/patch_builds/test_change_data.py

+41-1
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@
55
import os
66
import unittest
77

8-
from mock import MagicMock, patch
8+
from mock import MagicMock, mock_open, patch
99

1010
import buildscripts.patch_builds.change_data as under_test
1111

1212
NS = "buildscripts.patch_builds.change_data"
1313

14+
FILE_CONTENT = "Hello, Universe!\n"
15+
1416

1517
def ns(relative_name): # pylint: disable=invalid-name
1618
"""Return a full name from a name relative to the test module"s name space."""
@@ -71,3 +73,41 @@ def test_missing_repos_are_not_returned(self):
7173

7274
self.assertEqual(revision_map[mock_repo_list[0].git_dir], revision_data["mongo"])
7375
self.assertEqual(len(revision_map), 1)
76+
77+
78+
class TestFindChangedFilesAndLinesInRepos(unittest.TestCase):
79+
@patch("builtins.open", new_callable=mock_open, read_data=FILE_CONTENT)
80+
def test_detects_file_and_line_modifications_no_change(self, mock_open):
81+
repo_mock = MagicMock()
82+
repo_mock.git.diff.return_value = ""
83+
84+
changed_files = ["src/module/test1.cpp"]
85+
revision_map = {}
86+
87+
result = under_test.find_modified_lines_for_files(repo_mock, changed_files, revision_map)
88+
89+
expected_result = {}
90+
91+
mock_open.assert_called_once_with("src/module/test1.cpp", "r")
92+
93+
self.assertEqual(result, expected_result)
94+
95+
@patch("builtins.open", new_callable=mock_open, read_data=FILE_CONTENT)
96+
def test_detects_file_and_line_modifications_one_file(self, mock_open):
97+
repo_mock = MagicMock()
98+
repo_mock.git.diff.return_value = """\
99+
@@ -1,2 +1,2 @@
100+
-Hello, World!
101+
+Hello, Universe!
102+
"""
103+
104+
changed_files = ["src/module/test1.cpp"]
105+
revision_map = {}
106+
107+
result = under_test.find_modified_lines_for_files(repo_mock, changed_files, revision_map)
108+
109+
expected_result = {"src/module/test1.cpp": [(1, "Hello, Universe!")]}
110+
111+
mock_open.assert_called_once_with("src/module/test1.cpp", "r")
112+
113+
self.assertEqual(result, expected_result)

Diff for: buildscripts/tests/test_legacy_commands_check.py

+26-50
Original file line numberDiff line numberDiff line change
@@ -13,80 +13,56 @@ def create_file_iterator(file_contents: str) -> Iterable[str]:
1313

1414
class TestCheckFileForLegacyType(unittest.TestCase):
1515
def test_typed_command(self):
16-
content = """
17-
line 0
18-
class AddShardCmd : public TypedCommand
19-
"""
20-
self.assertEqual(check_file_for_legacy_type(create_file_iterator(content)), False)
16+
modified_lines = [(0, ""), (1, "class AddShardCmd : public TypedCommand")]
17+
self.assertEqual(check_file_for_legacy_type(modified_lines), False)
2118

2219
def test_command(self):
23-
content = """
24-
line 0
25-
class AddShardCmd : public Command {
26-
"""
27-
self.assertEqual(check_file_for_legacy_type(create_file_iterator(content)), True)
20+
modified_lines = [(0, ""), (1, "class AddShardCmd : public Command {")]
21+
self.assertEqual(check_file_for_legacy_type(modified_lines), True)
2822

2923
def test_basic_command(self):
30-
content = """
31-
line 0
32-
class AddShardCmd : public BasicCommand
33-
"""
34-
self.assertEqual(check_file_for_legacy_type(create_file_iterator(content)), True)
24+
modified_lines = [(0, ""), (1, "class AddShardCmd : public BasicCommand")]
25+
self.assertEqual(check_file_for_legacy_type(modified_lines), True)
3526

3627
def test_basic_command_with_reply_builder_interface(self):
37-
content = """
38-
line 0
39-
class AddShardCmd : public BasicCommandWithReplyBuilderInterface
40-
"""
41-
self.assertEqual(check_file_for_legacy_type(create_file_iterator(content)), True)
28+
modified_lines = [
29+
(0, ""),
30+
(1, "class AddShardCmd : public BasicCommandWithReplyBuilderInterface"),
31+
]
32+
self.assertEqual(check_file_for_legacy_type(modified_lines), True)
4233

4334
def test_basic_command_with_request_parser(self):
44-
content = """
45-
line 0
46-
class AddShardCmd : public BasicCommandWithRequestParser
47-
"""
48-
self.assertEqual(check_file_for_legacy_type(create_file_iterator(content)), True)
35+
modified_lines = [(0, ""), (1, "class AddShardCmd : public BasicCommandWithRequestParser")]
36+
self.assertEqual(check_file_for_legacy_type(modified_lines), True)
4937

5038
def test_errmsg_command(self):
51-
content = """
52-
line 0
53-
class AddShardCmd : public ErrmsgCommandDeprecated
54-
"""
55-
self.assertEqual(check_file_for_legacy_type(create_file_iterator(content)), True)
39+
modified_lines = [(0, ""), (1, "class AddShardCmd : public ErrmsgCommandDeprecated")]
40+
self.assertEqual(check_file_for_legacy_type(modified_lines), True)
5641

5742
def test_generic_command_inherited(self):
5843
# This might appear in non-legacy command types and should not be mistake for a Command type
5944

60-
content = """
61-
line 0
62-
public Command
63-
"""
64-
self.assertEqual(check_file_for_legacy_type(create_file_iterator(content)), False)
45+
modified_lines = [(0, ""), (1, "class AddShardCmd : public Command")]
46+
self.assertEqual(check_file_for_legacy_type(modified_lines), False)
6547

6648
def test_kCommand(self):
6749
# This log statement appears in many Command files for logging purposes and should not be
6850
# mistaken for a Command type
6951

70-
content = """
71-
line 0
72-
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kCommand
73-
"""
74-
self.assertEqual(check_file_for_legacy_type(create_file_iterator(content)), False)
52+
modified_lines = [
53+
(0, ""),
54+
(1, "#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kCommand"),
55+
]
56+
self.assertEqual(check_file_for_legacy_type(modified_lines), False)
7557

7658
def test_command_invocation(self):
7759
# This class is not a Command type and should not be mistaken for a Command type
7860

79-
content = """
80-
line 0
81-
class AddShardCmd : public CommandInvocation
82-
"""
83-
self.assertEqual(check_file_for_legacy_type(create_file_iterator(content)), False)
61+
modified_lines = [(0, ""), (1, "class AddShardCmd : public CommandInvocation")]
62+
self.assertEqual(check_file_for_legacy_type(modified_lines), False)
8463

8564
def test_command_invocation_hooks(self):
8665
# This class is not a Command type and should not be mistaken for a Command type
8766

88-
content = """
89-
line 0
90-
class AddShardCmd : public CommandInvocationHooks
91-
"""
92-
self.assertEqual(check_file_for_legacy_type(create_file_iterator(content)), False)
67+
modified_lines = [(0, ""), (1, "class AddShardCmd : public CommandInvocationHooks")]
68+
self.assertEqual(check_file_for_legacy_type(modified_lines), False)

0 commit comments

Comments
 (0)