Skip to content

Commit a388234

Browse files
committed
Add db module to support sqlite #89
- Update Paths model to have optional repository_name field - Create data directory for sqlite db if necessary
1 parent 2cd495f commit a388234

File tree

2 files changed

+87
-0
lines changed

2 files changed

+87
-0
lines changed

github_stats_pages/db.py

+86
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
from functools import partial
2+
from pathlib import Path
3+
from typing import Union, Type
4+
5+
import pandas as pd
6+
from sqlalchemy.future import Engine
7+
from sqlalchemy.exc import NoResultFound
8+
from sqlmodel import SQLModel, Session, create_engine, select
9+
10+
from .models import Clone, Referring, Traffic, Paths
11+
from .logger import app_log as log
12+
13+
SQLITE_FILE_NAME = Path("data/sqlite3.db")
14+
15+
16+
def configure(test: bool = False, echo: bool = False) -> Engine:
17+
sqlite_file_name = (
18+
Path("tests_data/sqlite3.db") if test else SQLITE_FILE_NAME
19+
)
20+
if not sqlite_file_name.parent.exists():
21+
sqlite_file_name.parent.mkdir()
22+
sqlite_url = f"sqlite:///{sqlite_file_name}"
23+
return create_engine(sqlite_url, echo=echo)
24+
25+
26+
def create_db_and_tables(test: bool = False, echo: bool = False):
27+
engine = configure(test=test, echo=echo)
28+
SQLModel.metadata.create_all(engine)
29+
return engine
30+
31+
32+
def migrate_csv(
33+
filename: Union[str, Path],
34+
model: Type[SQLModel],
35+
engine: Engine,
36+
skip_rows: Union[int, None] = None,
37+
):
38+
"""Migrate CSV over to SQLite"""
39+
40+
names = list(
41+
map(
42+
lambda f: f.name,
43+
filter(lambda x: x.required, model.__fields__.values()),
44+
)
45+
)
46+
log.info(f"[yellow]Loading: {filename}")
47+
df = pd.read_csv(filename, header=None, skiprows=skip_rows, names=names)
48+
if isinstance(model, Paths):
49+
repository_names = [a.split("/")[2] for a in df["path"].values]
50+
df.insert(1, "repository_name", repository_names)
51+
52+
func = partial(query, engine=engine, model=model)
53+
54+
query_results = list(map(func, df["repository_name"], df["date"]))
55+
new_df: pd.DataFrame = df.iloc[
56+
[idx for idx, item in enumerate(query_results) if not item]
57+
]
58+
if new_df.empty:
59+
log.info("No new records!")
60+
else:
61+
log.info(f"New records found: {len(new_df)}")
62+
log.info("[bold yellow]Adding data")
63+
new_df.to_sql(
64+
model.__name__.lower(), engine, if_exists="append", index=False
65+
)
66+
if len(new_df) < len(df):
67+
log.info("[orange]Some records exists in db")
68+
69+
70+
def query(
71+
repository_name: str,
72+
date: str,
73+
engine: Engine,
74+
model: Union[Type[SQLModel], Clone, Referring, Paths, Traffic],
75+
) -> Union[SQLModel, Clone, Referring, Paths, Traffic, None]:
76+
77+
with Session(engine) as session:
78+
result = session.exec(
79+
select(model).where(
80+
model.repository_name == repository_name, model.date == date
81+
)
82+
)
83+
try:
84+
return result.one()
85+
except NoResultFound:
86+
return

github_stats_pages/models/paths.py

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
class Paths(SQLModel, table=True):
77
id: Optional[int] = Field(default=None, primary_key=True)
88
date: str
9+
repository_name: Optional[str]
910
path: str
1011
title: str
1112
views: int

0 commit comments

Comments
 (0)