Skip to content

Commit 3892568

Browse files
committed
fix git history
1 parent 673db14 commit 3892568

File tree

6 files changed

+54
-50
lines changed

6 files changed

+54
-50
lines changed

api/analyzers/source_analyzer.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -81,12 +81,16 @@ def first_pass(self, path: Path, ignore: list[str], graph: Graph) -> None:
8181
executor (concurrent.futures.Executor): The executor to run tasks concurrently.
8282
"""
8383

84-
if any(path.rglob('*.java')):
85-
analyzers[".java"].add_dependencies(path, self.files)
86-
if any(path.rglob('*.py')):
87-
analyzers[".py"].add_dependencies(path, self.files)
84+
if path.is_file():
85+
files = [path]
86+
else:
87+
if any(path.rglob('*.java')):
88+
analyzers[".java"].add_dependencies(path, self.files)
89+
if any(path.rglob('*.py')):
90+
analyzers[".py"].add_dependencies(path, self.files)
8891

89-
files = list(path.rglob('*.*'))
92+
files = list(path.rglob('*.*'))
93+
9094
files_len = len(files)
9195
for i, file_path in enumerate(files):
9296
# Skip none supported files
@@ -210,7 +214,7 @@ def analyze_local_repository(self, path: str, ignore: Optional[list[str]] = None
210214
# Save processed commit hash to the DB
211215
repo = Repo(path)
212216
head = repo.commit("HEAD")
213-
self.graph.set_graph_commit(head.hexsha)
217+
self.graph.set_graph_commit(head.short_id)
214218

215219
return self.graph
216220

api/git_utils/git_graph.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,9 @@ def add_commit(self, commit: Commit) -> None:
4646
"""
4747
Add a new commit to the graph
4848
"""
49-
date = commit.committed_date
49+
date = commit.commit_time
5050
author = commit.author.name
51-
hexsha = commit.hexsha
51+
hexsha = commit.short_id
5252
message = commit.message
5353
logging.info(f"Adding commit {hexsha}: {message}")
5454

api/git_utils/git_utils.py

+38-39
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,10 @@
22
import json
33
import logging
44

5-
from pygit2 import Commit
5+
from pygit2 import Commit, Diff
66
from ..info import *
77
from pygit2.repository import Repository
8+
from pygit2.enums import DeltaStatus, CheckoutStrategy
89
from pathlib import Path
910
from ..graph import Graph
1011
from .git_graph import GitGraph
@@ -31,7 +32,7 @@ def is_ignored(file_path: str, ignore_list: List[str]) -> bool:
3132

3233
return any(file_path.startswith(ignore) for ignore in ignore_list)
3334

34-
def classify_changes(diff, ignore_list: List[str]) -> tuple[list[Path], list[Path], list[Path]]:
35+
def classify_changes(diff: Diff, repo: Repository, ignore_list: List[str]) -> tuple[list[Path], list[Path], list[Path]]:
3536
"""
3637
Classifies changes into added, deleted, and modified files.
3738
@@ -45,16 +46,16 @@ def classify_changes(diff, ignore_list: List[str]) -> tuple[list[Path], list[Pat
4546

4647
added, deleted, modified = [], [], []
4748

48-
for change in diff:
49-
if change.new_file and not is_ignored(change.b_path, ignore_list):
50-
logging.debug(f"new file: {change.b_path}")
51-
added.append(Path(change.b_path))
52-
if change.deleted_file and not is_ignored(change.a_path, ignore_list):
53-
logging.debug(f"deleted file: {change.a_path}")
54-
deleted.append(Path(change.a_path))
55-
if change.change_type == 'M' and not is_ignored(change.a_path, ignore_list):
56-
logging.debug(f"change file: {change.a_path}")
57-
modified.append(Path(change.a_path))
49+
for change in diff.deltas:
50+
if change.status == DeltaStatus.ADDED and not is_ignored(change.new_file.path, ignore_list):
51+
logging.debug(f"new file: {change.new_file}")
52+
added.append(Path(f"{repo.workdir}/{change.new_file.path}"))
53+
if change.status == DeltaStatus.DELETED and not is_ignored(change.old_file.path, ignore_list):
54+
logging.debug(f"deleted file: {change.old_file.path}")
55+
deleted.append(Path(f"{repo.workdir}/{change.old_file.path}"))
56+
if change.status == DeltaStatus.MODIFIED and not is_ignored(change.new_file.path, ignore_list):
57+
logging.debug(f"change file: {change.new_file.path}")
58+
modified.append(Path(f"{repo.workdir}/{change.new_file.path}"))
5859

5960
return added, deleted, modified
6061

@@ -89,7 +90,7 @@ def build_commit_graph(path: str, repo_name: str, ignore_list: Optional[List[str
8990
# Save current git for later restoration
9091
repo = Repository('.')
9192
current_commit = repo.walk(repo.head.target).__next__()
92-
current_commit_hexsha = current_commit.hex
93+
current_commit_hexsha = current_commit.short_id
9394

9495
# Add commit to the git graph
9596
git_graph.add_commit(current_commit)
@@ -108,7 +109,7 @@ def build_commit_graph(path: str, repo_name: str, ignore_list: Optional[List[str
108109
git_graph.add_commit(parent_commit)
109110

110111
# connect child parent commits relation
111-
git_graph.connect_commits(child_commit.hex, parent_commit.hex)
112+
git_graph.connect_commits(child_commit.short_id, parent_commit.short_id)
112113

113114
# Represents the changes going backward!
114115
# e.g. which files need to be deleted when moving back one commit
@@ -120,15 +121,15 @@ def build_commit_graph(path: str, repo_name: str, ignore_list: Optional[List[str
120121

121122
# Process file changes in this commit
122123
logging.info(f"""Computing diff between
123-
child {child_commit.hexsha}: {child_commit.message}
124-
and {parent_commit.hexsha}: {parent_commit.message}""")
124+
child {child_commit.short_id}: {child_commit.message}
125+
and {parent_commit.short_id}: {parent_commit.message}""")
125126

126-
diff = child_commit.diff(parent_commit)
127-
added, deleted, modified = classify_changes(diff, ignore_list)
127+
diff = repo.diff(child_commit, parent_commit)
128+
added, deleted, modified = classify_changes(diff, repo, ignore_list)
128129

129130
# Checkout prev commit
130-
logging.info(f"Checking out commit: {parent_commit.hexsha}")
131-
repo.checkout(parent_commit.hex)
131+
logging.info(f"Checking out commit: {parent_commit.short_id}")
132+
repo.checkout_tree(parent_commit.tree, strategy=CheckoutStrategy.FORCE)
132133

133134
#-----------------------------------------------------------------------
134135
# Apply changes going backwards
@@ -138,12 +139,10 @@ def build_commit_graph(path: str, repo_name: str, ignore_list: Optional[List[str
138139
# TODO: a bit of a waste, compute in previous loop
139140
deleted_files = []
140141
for deleted_file_path in deleted:
141-
_ext = os.path.splitext(deleted_file_path)[1]
142+
_ext = deleted_file_path.suffix
142143
if _ext in supported_types:
143-
_path = os.path.dirname(deleted_file_path)
144-
_name = os.path.basename(deleted_file_path)
145144
deleted_files.append(
146-
{'path': _path, 'name': _name, 'ext' : _ext})
145+
{'path': str(deleted_file_path), 'name': deleted_file_path.name, 'ext' : _ext})
147146

148147
# remove deleted files from the graph
149148
if len(deleted_files) > 0:
@@ -167,15 +166,15 @@ def build_commit_graph(path: str, repo_name: str, ignore_list: Optional[List[str
167166

168167
# Log transitions
169168
logging.debug(f"""Save graph transition from
170-
commit: {child_commit.hex}
169+
commit: {child_commit.short_id}
171170
to
172-
commit: {parent_commit.hex}
171+
commit: {parent_commit.short_id}
173172
Queries: {queries}
174173
Parameters: {params}
175174
""")
176175

177-
git_graph.set_parent_transition(child_commit.hex,
178-
parent_commit.hex, queries, params)
176+
git_graph.set_parent_transition(child_commit.short_id,
177+
parent_commit.short_id, queries, params)
179178
# advance to the next commit
180179
child_commit = parent_commit
181180

@@ -185,24 +184,24 @@ def build_commit_graph(path: str, repo_name: str, ignore_list: Optional[List[str
185184

186185
logging.info("Computing transition queries moving forward")
187186
parent_commit = child_commit
188-
while parent_commit.hex != current_commit_hexsha:
189-
child_commit = git_graph.get_child_commit(parent_commit.hex)
187+
while parent_commit.short_id != current_commit_hexsha:
188+
child_commit = git_graph.get_child_commit(parent_commit.short_id)
190189
child_commit = repo.walk(child_commit['hash']).__next__()
191190

192191
# Represents the changes going forward
193192
# e.g. which files need to be deleted when moving forward one commit
194193

195194
# Process file changes in this commit
196195
logging.info(f"""Computing diff between
197-
child {parent_commit.hex}: {parent_commit.message}
198-
and {child_commit.hex}: {child_commit.message}""")
196+
child {parent_commit.short_id}: {parent_commit.message}
197+
and {child_commit.short_id}: {child_commit.message}""")
199198

200199
diff = repo.diff(parent_commit, child_commit)
201-
added, deleted, modified = classify_changes(diff, ignore_list)
200+
added, deleted, modified = classify_changes(diff, repo, ignore_list)
202201

203202
# Checkout child commit
204-
logging.info(f"Checking out commit: {child_commit.hex}")
205-
repo.checkout(child_commit.hex)
203+
logging.info(f"Checking out commit: {child_commit.short_id}")
204+
repo.checkout_tree(child_commit.tree, strategy=CheckoutStrategy.FORCE)
206205

207206
#-----------------------------------------------------------------------
208207
# Apply changes going forward
@@ -241,15 +240,15 @@ def build_commit_graph(path: str, repo_name: str, ignore_list: Optional[List[str
241240

242241
# Log transitions
243242
logging.debug(f"""Save graph transition from
244-
commit: {parent_commit.hex}
243+
commit: {parent_commit.short_id}
245244
to
246-
commit: {child_commit.hex}
245+
commit: {child_commit.short_id}
247246
Queries: {queries}
248247
Parameters: {params}
249248
""")
250249

251-
git_graph.set_child_transition(child_commit.hex,
252-
parent_commit.hex, queries, params)
250+
git_graph.set_child_transition(child_commit.short_id,
251+
parent_commit.short_id, queries, params)
253252
# advance to the child_commit
254253
parent_commit = child_commit
255254

api/index.py

+1
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def decorated_function(*args, **kwargs):
4343
return jsonify(message="Unauthorized"), 401
4444
return f(*args, **kwargs)
4545
return decorated_function
46+
4647
@app.route('/graph_entities', methods=['GET'])
4748
@token_required # Apply token authentication decorator
4849
def graph_entities():

api/project.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def analyze_sources(self, ignore: Optional[List[str]] = None) -> Graph:
8787
# Save processed commit hash to the DB
8888
repo = Repository(self.path)
8989
current_commit = repo.walk(repo.head.target).__next__()
90-
set_repo_commit(self.name, current_commit.hex)
90+
set_repo_commit(self.name, current_commit.short_id)
9191
except Exception:
9292
# Probably not .git folder is missing
9393
pass

tests/test_git_history.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -54,12 +54,12 @@ def test_git_graph_structure(self):
5454
c = repo.commit("HEAD")
5555

5656
while True:
57-
commits = git_graph.get_commits([c.hexsha])
57+
commits = git_graph.get_commits([c.short_id])
5858

5959
self.assertEqual(len(commits), 1)
6060
actual = commits[0]
6161

62-
self.assertEqual(c.hexsha, actual['hash'])
62+
self.assertEqual(c.short_id, actual['hash'])
6363
self.assertEqual(c.message, actual['message'])
6464
self.assertEqual(c.author.name, actual['author'])
6565
self.assertEqual(c.committed_date, actual['date'])

0 commit comments

Comments
 (0)