Skip to content
This repository was archived by the owner on Dec 3, 2019. It is now read-only.

Commit 4e78157

Browse files
perezjuCommit Bot
authored and
Commit Bot
committed
[soundwave] Add pandas_sqlite.InsertOrReplaceRecords
Pandas DataFrame.to_sql method has limitation of not being able to "insert or replace" records, see e.g: pandas-dev/pandas#14553 Using pandas.io.sql primitives, however, it's not too hard to implement such a functionality (for the SQLite case only). Assuming that index columns of the frame have names, this method will use those columns as the PRIMARY KEY of the table. Bug: catapult:#4382 Change-Id: I5f70a04c18b998590b1e77e5ff6b89b2d27138af Reviewed-on: https://chromium-review.googlesource.com/1035266 Commit-Queue: Juan Antonio Navarro Pérez <[email protected]> Reviewed-by: Charlie Andrews <[email protected]>
1 parent 859155b commit 4e78157

File tree

4 files changed

+101
-9
lines changed

4 files changed

+101
-9
lines changed

experimental/soundwave/soundwave/commands.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import sqlite3
77

88
from soundwave import dashboard_api
9+
from soundwave import pandas_sqlite
910
from soundwave import tables
1011

1112

@@ -16,19 +17,13 @@ def FetchAlertsData(args):
1617
alerts = tables.alerts.DataFrameFromJson(
1718
api.GetAlertData(args.benchmark, args.days))
1819
print '%d alerts found!' % len(alerts)
19-
# TODO: Make this update rather than replace the existing table.
20-
# Note that if_exists='append' does not work since there is no way to
21-
# specify in pandas' |to_sql| a primary key or, more generally, uniqueness
22-
# constraints on columns. So this would lead to duplicate entries for
23-
# alerts with the same |key|.
24-
alerts.to_sql('alerts', conn, if_exists='replace')
20+
pandas_sqlite.InsertOrReplaceRecords(alerts, 'alerts', conn)
2521

2622
bug_ids = set(alerts['bug_id'].unique())
2723
bug_ids.discard(0) # A bug_id of 0 means untriaged.
2824
print '%d bugs found!' % len(bug_ids)
2925
bugs = tables.bugs.DataFrameFromApi(api, bug_ids)
30-
# TODO: Ditto. Make this update rather than replace the existing table.
31-
bugs.to_sql('bugs', conn, if_exists='replace')
26+
pandas_sqlite.InsertOrReplaceRecords(bugs, 'bugs', conn)
3227
finally:
3328
conn.close()
3429

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Copyright 2018 The Chromium Authors. All rights reserved.
2+
# Use of this source code is governed by a BSD-style license that can be
3+
# found in the LICENSE file.
4+
5+
"""
6+
Helper methods for dealing with a SQLite database with pandas.
7+
"""
8+
import pandas.io.sql # pylint: disable=import-error
9+
10+
11+
def _InsertOrReplaceStatement(name, keys):
12+
columns = ','.join(keys)
13+
values = ','.join('?' for _ in keys)
14+
return 'INSERT OR REPLACE INTO %s(%s) VALUES (%s)' % (name, columns, values)
15+
16+
17+
def InsertOrReplaceRecords(frame, name, conn):
18+
"""Insert or replace records from a DataFrame into a SQLite database.
19+
20+
Assumes that index columns of the frame have names, and those are used as to
21+
set the PRIMARY KEY of the table when creating anew. If the table already
22+
exists, any new records with a matching PRIMARY KEY will replace existing
23+
records.
24+
25+
Args:
26+
frame: DataFrame with records to write.
27+
name: Name of SQL table.
28+
conn: A sqlite connection object.
29+
"""
30+
db = pandas.io.sql.SQLiteDatabase(conn)
31+
if db.has_table(name):
32+
table = pandas.io.sql.SQLiteTable(
33+
name, db, frame=frame, index=True, if_exists='append')
34+
keys, data = table.insert_data()
35+
insert_statement = _InsertOrReplaceStatement(name, keys)
36+
with db.run_transaction() as c:
37+
c.executemany(insert_statement, zip(*data))
38+
else:
39+
table = pandas.io.sql.SQLiteTable(
40+
name, db, frame=frame, index=True, keys=frame.index.names,
41+
if_exists='fail')
42+
table.create()
43+
table.insert()
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# Copyright 2018 The Chromium Authors. All rights reserved.
2+
# Use of this source code is governed by a BSD-style license that can be
3+
# found in the LICENSE file.
4+
5+
import pandas # pylint: disable=import-error
6+
import sqlite3
7+
import unittest
8+
9+
from soundwave import pandas_sqlite
10+
11+
12+
class TestPandasSQLite(unittest.TestCase):
13+
def testInsertOrReplaceRecords_newTable(self):
14+
columns = ('bug_id', 'summary', 'status')
15+
df1 = pandas.DataFrame.from_records(
16+
[(123, 'Some bug', 'Started'), (456, 'Another bug', 'Assigned')],
17+
columns=columns, index=columns[0])
18+
conn = sqlite3.connect(':memory:')
19+
try:
20+
# Write new table to database, read back and check they are equal.
21+
pandas_sqlite.InsertOrReplaceRecords(df1, 'bugs', conn)
22+
df = pandas.read_sql('SELECT * FROM bugs', conn, index_col=columns[0])
23+
self.assertTrue(df.equals(df1))
24+
finally:
25+
conn.close()
26+
27+
def testInsertOrReplaceRecords_existingTable(self):
28+
columns = ('bug_id', 'summary', 'status')
29+
df1 = pandas.DataFrame.from_records(
30+
[(123, 'Some bug', 'Started'), (456, 'Another bug', 'Assigned')],
31+
columns=columns, index=columns[0])
32+
df2 = pandas.DataFrame.from_records(
33+
[(123, 'Some bug', 'Fixed'), (789, 'A new bug', 'Untriaged')],
34+
columns=columns, index=columns[0])
35+
conn = sqlite3.connect(':memory:')
36+
try:
37+
# Write first data frame to database.
38+
pandas_sqlite.InsertOrReplaceRecords(df1, 'bugs', conn)
39+
df = pandas.read_sql('SELECT * FROM bugs', conn, index_col=columns[0])
40+
self.assertEqual(len(df), 2)
41+
self.assertEqual(df.loc[123]['status'], 'Started')
42+
43+
# Write second data frame to database.
44+
pandas_sqlite.InsertOrReplaceRecords(df2, 'bugs', conn)
45+
df = pandas.read_sql('SELECT * FROM bugs', conn, index_col=columns[0])
46+
self.assertEqual(len(df), 3) # Only one extra record added.
47+
self.assertEqual(df.loc[123]['status'], 'Fixed') # Bug is now fixed.
48+
self.assertItemsEqual(df.index, (123, 456, 789))
49+
finally:
50+
conn.close()
51+
52+
53+
if __name__ == '__main__':
54+
unittest.main()

experimental/soundwave/soundwave/tables/bugs_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def testDataFrameFromApi(self):
3434
bugs = tables.bugs.DataFrameFromApi(api, [12345])
3535
self.assertEqual(len(bugs), 1)
3636

37-
bug = bugs.loc[12345]
37+
bug = bugs.loc[12345] # Get bug by id.
3838
self.assertEqual(bug['published'], datetime.datetime(
3939
year=2018, month=4, day=9, hour=17, minute=1, second=9))
4040
self.assertEqual(bug['status'], 'Fixed')

0 commit comments

Comments
 (0)