|
6 | 6 | SELECT refs.repository_id
|
7 | 7 | FROM refs
|
8 | 8 | NATURAL JOIN commits
|
9 |
| -WHERE commits.commit_author_name = 'Javi Fontan' AND refs.ref_name = 'HEAD'; |
| 9 | +WHERE commits.commit_author_name = 'Johnny Bravo' |
| 10 | + AND refs.ref_name = 'HEAD'; |
10 | 11 | ```
|
11 | 12 |
|
12 | 13 | ## Get all the HEAD references from all the repositories
|
13 | 14 |
|
14 | 15 | ```sql
|
15 |
| -SELECT * FROM refs WHERE ref_name = 'HEAD'; |
| 16 | +SELECT * |
| 17 | +FROM refs |
| 18 | +WHERE ref_name = 'HEAD'; |
16 | 19 | ```
|
17 | 20 |
|
18 | 21 | ## First commit on HEAD history for all repositories
|
19 | 22 |
|
20 | 23 | ```sql
|
21 |
| -SELECT |
22 |
| - file_path, |
23 |
| - ref_commits.repository_id |
24 |
| -FROM |
25 |
| - commit_files |
26 |
| -NATURAL JOIN |
27 |
| - ref_commits |
28 |
| -WHERE |
29 |
| - ref_commits.ref_name = 'HEAD' |
30 |
| - AND ref_commits.history_index = 0; |
| 24 | +SELECT file_path, |
| 25 | + ref_commits.repository_id |
| 26 | +FROM commit_files |
| 27 | +NATURAL JOIN ref_commits |
| 28 | +WHERE ref_commits.ref_name = 'HEAD' |
| 29 | + AND ref_commits.history_index = 0; |
31 | 30 | ```
|
32 | 31 |
|
33 | 32 | ## Commits that appear in more than one reference
|
34 | 33 |
|
35 | 34 | ```sql
|
36 |
| -SELECT * FROM ( |
37 |
| - SELECT COUNT(c.commit_hash) AS num, c.commit_hash |
38 |
| - FROM ref_commits r |
39 |
| - INNER JOIN commits c |
40 |
| - ON r.repository_id = c.repository_id AND r.commit_hash = c.commit_hash |
41 |
| - GROUP BY c.commit_hash |
42 |
| -) t WHERE num > 1; |
| 35 | +SELECT * |
| 36 | +FROM |
| 37 | + (SELECT COUNT(c.commit_hash) AS num, |
| 38 | + c.commit_hash |
| 39 | + FROM ref_commits r |
| 40 | + NATURAL JOIN commits c |
| 41 | + GROUP BY c.commit_hash) t |
| 42 | +WHERE num > 1; |
43 | 43 | ```
|
44 | 44 |
|
45 | 45 | ## Get the number of blobs per HEAD commit
|
46 | 46 |
|
47 | 47 | ```sql
|
48 |
| -SELECT COUNT(c.commit_hash), c.commit_hash |
49 |
| -FROM ref_commits as r |
50 |
| -INNER JOIN commits c |
51 |
| - ON r.ref_name = 'HEAD' |
52 |
| - AND r.repository_id = c.repository_id |
53 |
| - AND r.commit_hash = c.commit_hash |
54 |
| -INNER JOIN commit_blobs cb |
55 |
| - ON cb.repository_id = c.repository_id AND cb.commit_hash = c.commit_hash |
56 |
| -GROUP BY c.commit_hash; |
| 48 | +SELECT COUNT(commit_hash), |
| 49 | + commit_hash |
| 50 | +FROM ref_commits |
| 51 | +NATURAL JOIN commits |
| 52 | +NATURAL JOIN commit_blobs |
| 53 | +WHERE ref_name = 'HEAD' |
| 54 | +GROUP BY commit_hash; |
57 | 55 | ```
|
58 | 56 |
|
59 |
| -## Get commits per committer, per month in 2015 |
| 57 | +## Get commits per committer, per year and month |
60 | 58 |
|
61 | 59 | ```sql
|
62 |
| -SELECT COUNT(*) as num_commits, month, repo_id, committer_email |
63 |
| -FROM ( |
64 |
| - SELECT |
65 |
| - MONTH(committer_when) as month, |
66 |
| - r.repository_id as repo_id, |
67 |
| - committer_email |
68 |
| - FROM ref_commits r |
69 |
| - INNER JOIN commits c |
70 |
| - ON YEAR(c.committer_when) = 2015 |
71 |
| - AND r.repository_id = c.repository_id |
72 |
| - AND r.commit_hash = c.commit_hash |
73 |
| - WHERE r.ref_name = 'HEAD' |
74 |
| -) as t |
75 |
| -GROUP BY committer_email, month, repo_id; |
| 60 | +SELECT YEAR, |
| 61 | + MONTH, |
| 62 | + repo_id, |
| 63 | + committer_email, |
| 64 | + COUNT(*) AS num_commits |
| 65 | +FROM |
| 66 | + (SELECT YEAR(committer_when) AS YEAR, |
| 67 | + MONTH(committer_when) AS MONTH, |
| 68 | + repository_id AS repo_id, |
| 69 | + committer_email |
| 70 | + FROM ref_commits |
| 71 | + NATURAL JOIN commits |
| 72 | + WHERE ref_name = 'HEAD') AS t |
| 73 | +GROUP BY committer_email, |
| 74 | + YEAR, |
| 75 | + MONTH, |
| 76 | + repo_id; |
76 | 77 | ```
|
77 | 78 |
|
78 | 79 | ## Files from first 6 commits from HEAD references that contains some key and are not in vendor directory
|
79 | 80 |
|
80 | 81 | ```sql
|
81 |
| -select |
82 |
| - files.file_path, |
83 |
| - ref_commits.repository_id, |
84 |
| - files.blob_content |
85 |
| -FROM |
86 |
| - files |
87 |
| -NATURAL JOIN |
88 |
| - commit_files |
89 |
| -NATURAL JOIN |
90 |
| - ref_commits |
91 |
| -WHERE |
92 |
| - ref_commits.ref_name = 'HEAD' |
93 |
| - AND ref_commits.history_index BETWEEN 0 AND 5 |
94 |
| - AND is_binary(blob_content) = false |
95 |
| - AND files.file_path NOT REGEXP '^vendor.*' |
96 |
| - AND ( |
97 |
| - blob_content REGEXP '(?i)facebook.*[\'\\"][0-9a-f]{32}[\'\\"]' |
98 |
| - OR blob_content REGEXP '(?i)twitter.*[\'\\"][0-9a-zA-Z]{35,44}[\'\\"]' |
99 |
| - OR blob_content REGEXP '(?i)github.*[\'\\"][0-9a-zA-Z]{35,40}[\'\\"]' |
100 |
| - OR blob_content REGEXP 'AKIA[0-9A-Z]{16}' |
101 |
| - OR blob_content REGEXP '(?i)reddit.*[\'\\"][0-9a-zA-Z]{14}[\'\\"]' |
102 |
| - OR blob_content REGEXP '(?i)heroku.*[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}' |
103 |
| - OR blob_content REGEXP '.*-----BEGIN PRIVATE KEY-----.*' |
104 |
| - OR blob_content REGEXP '.*-----BEGIN RSA PRIVATE KEY-----.*' |
105 |
| - OR blob_content REGEXP '.*-----BEGIN DSA PRIVATE KEY-----.*' |
106 |
| - OR blob_content REGEXP '.*-----BEGIN OPENSSH PRIVATE KEY-----.*' |
107 |
| - ); |
| 82 | +SELECT file_path, |
| 83 | + repository_id, |
| 84 | + blob_content |
| 85 | +FROM files |
| 86 | +NATURAL JOIN commit_files |
| 87 | +NATURAL JOIN ref_commits |
| 88 | +WHERE ref_name = 'HEAD' |
| 89 | + AND ref_commits.history_index BETWEEN 0 AND 5 |
| 90 | + AND is_binary(blob_content) = FALSE |
| 91 | + AND files.file_path NOT REGEXP '^vendor.*' |
| 92 | + AND (blob_content REGEXP '(?i)facebook.*[\'\\"][0-9a-f]{32}[\'\\"]' |
| 93 | + OR blob_content REGEXP '(?i)twitter.*[\'\\"][0-9a-zA-Z]{35,44}[\'\\"]' |
| 94 | + OR blob_content REGEXP '(?i)github.*[\'\\"][0-9a-zA-Z]{35,40}[\'\\"]' |
| 95 | + OR blob_content REGEXP 'AKIA[0-9A-Z]{16}' |
| 96 | + OR blob_content REGEXP '(?i)reddit.*[\'\\"][0-9a-zA-Z]{14}[\'\\"]' |
| 97 | + OR blob_content REGEXP '(?i)heroku.*[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}' |
| 98 | + OR blob_content REGEXP '.*-----BEGIN PRIVATE KEY-----.*' |
| 99 | + OR blob_content REGEXP '.*-----BEGIN RSA PRIVATE KEY-----.*' |
| 100 | + OR blob_content REGEXP '.*-----BEGIN DSA PRIVATE KEY-----.*' |
| 101 | + OR blob_content REGEXP '.*-----BEGIN OPENSSH PRIVATE KEY-----.*'); |
108 | 102 | ```
|
109 | 103 |
|
110 | 104 | ## Create an index for columns on a table
|
@@ -137,106 +131,74 @@ First of all, you should check out the [bblfsh documentation](https://docs.sourc
|
137 | 131 |
|
138 | 132 | Also, you can take a look to all the UDFs and their signatures in the [functions section](/docs/using-gitbase/functions.md)
|
139 | 133 |
|
140 |
| -## Retrieving UASTs with the UDF `uast` |
| 134 | +## Extract all import paths for every *Go* file on *HEAD* reference |
141 | 135 |
|
142 | 136 | ```sql
|
143 |
| -SELECT file_path, uast(blob_content, language(file_path)) FROM files; |
| 137 | +SELECT repository_id, |
| 138 | + file_path, |
| 139 | + uast_extract(uast(blob_content, LANGUAGE(file_path), '//uast:Import/Path'), "Value") AS imports |
| 140 | +FROM commit_files |
| 141 | +NATURAL JOIN refs |
| 142 | +NATURAL JOIN blobs |
| 143 | +WHERE ref_name = 'HEAD' |
| 144 | + AND LANGUAGE(file_path) = 'Go' |
| 145 | + AND ARRAY_LENGTH(imports) > 0; |
144 | 146 | ```
|
145 | 147 |
|
146 |
| -This function allows you to directly filter the retrieved UAST by performing a XPATH query on it: |
| 148 | +## Extracting all identifier names |
147 | 149 |
|
148 | 150 | ```sql
|
149 |
| -SELECT file_path, uast(blob_content, language(file_path), "//uast:FunctionGroup") FROM files; |
150 |
| -``` |
151 |
| - |
152 |
| -This UDF will give you `semantic` UASTs by default. To get some other type see the UDF [`uast_mode`](#retrieving-different-kinds-of-uasts-using-uast_mode). |
153 |
| - |
154 |
| -## Retrieving different kinds of UASTs using `uast_mode` |
155 |
| - |
156 |
| -[bblfsh](https://docs.sourced.tech/babelfish) UAST modes: `semantic`, `annotated`, `native` |
157 |
| - |
158 |
| -```sql |
159 |
| -SELECT file_path, uast_mode("semantic", blob_content, language(file_path)) FROM files; |
160 |
| - |
161 |
| -SELECT file_path, uast_mode("annotated", blob_content, language(file_path)) FROM files; |
162 |
| - |
163 |
| -SELECT file_path, uast_mode("native", blob_content, language(file_path)) FROM files; |
164 |
| -``` |
165 |
| - |
166 |
| -## Filtering UASTs by XPath queries |
167 |
| - |
168 |
| -```sql |
169 |
| -SELECT file_path, uast_xpath(uast(blob_content, language(file_path)), "//uast:Identifier") FROM files; |
170 |
| - |
171 |
| -SELECT file_path, uast_xpath(uast_mode("annotated", blob_content, language(file_path)), "//*[@role='Function']") FROM files; |
172 |
| -``` |
173 |
| - |
174 |
| -## Extracting information from UAST nodes |
175 |
| - |
176 |
| -You can retrieve information from the UAST nodes either through the special selectors `@type`, `@token`, `@role`, `@pos`: |
177 |
| - |
178 |
| -```sql |
179 |
| -SELECT file_path, uast_extract(uast(blob_content, language(file_path), "//uast:Block"), "@pos") FROM files; |
180 |
| -``` |
181 |
| - |
182 |
| -or through a specific property: |
183 |
| - |
184 |
| -```sql |
185 |
| -SELECT file_path, uast_extract(uast(blob_content, language(file_path), "//uast:Identifier"), "Name") FROM files; |
| 151 | +SELECT file_path, |
| 152 | + uast_extract(uast(blob_content, LANGUAGE(file_path), '//uast:Identifier'), "Name") name |
| 153 | +FROM commit_files |
| 154 | +NATURAL JOIN refs |
| 155 | +NATURAL JOIN blobs |
| 156 | +WHERE ref_name='HEAD' AND LANGUAGE(file_path) = 'Go'; |
186 | 157 | ```
|
187 | 158 |
|
188 | 159 | As result, you will get an array showing a list of the retrieved information. Each element in the list matches a node in the given sequence of nodes having a value for that property. It means that the length of the properties list may not be equal to the length of the given sequence of nodes:
|
189 | 160 |
|
190 | 161 | ```sh
|
191 | 162 | +-------------------------------------------------------------------------------------------------------------------+
|
192 |
| -| file_path | uast_extract(uast(files.blob_content, language(files.file_path), "//uast:Identifier"), "Name") | |
| 163 | +| file_path | name | |
193 | 164 | +-------------------+-----------------------------------------------------------------------------------------------+
|
194 | 165 | | _example/main.go | ["main","driver","NewDefault","sqle","createTestDatabase","AddDatabase","driver","auth"] |
|
195 | 166 | +-------------------+-----------------------------------------------------------------------------------------------+
|
196 | 167 | ```
|
197 | 168 |
|
198 |
| -## Getting the children of a list of nodes |
199 |
| - |
200 |
| -The UDF `uast_children` will return a flattened array of the children nodes from all the nodes in the given array. |
201 |
| - |
202 |
| -```sql |
203 |
| -SELECT file_path, uast_children(uast(blob_content, language(file_path), "//uast:Alias")) FROM files; |
204 |
| -``` |
205 |
| - |
206 | 169 | ## Monitor the progress of a query
|
207 | 170 |
|
208 | 171 | You can monitor the progress of a gitbase query (either a regular query or an index creation query using `SHOW PROCESSLIST`).
|
209 | 172 |
|
210 | 173 | Let's say we do the following query over a huge repository:
|
211 | 174 |
|
212 | 175 | ```sql
|
213 |
| -SELECT language(file_path, blob_content) FROM files |
| 176 | +SELECT file_path, LANGUAGE(file_path) lang FROM commit_files; |
214 | 177 | ```
|
215 | 178 |
|
216 | 179 | With this query we can monitor its progress:
|
217 | 180 |
|
218 | 181 | ```sql
|
219 |
| -SHOW PROCESSLIST |
| 182 | +SHOW PROCESSLIST; |
220 | 183 | ```
|
221 | 184 |
|
222 | 185 | We'll get the following output:
|
223 | 186 |
|
224 | 187 | ```
|
225 |
| -+------+------+----------------+---------+---------+------+------------+-----------------------------------------------------+ |
226 |
| -| Id | User | Host | db | Command | Time | State | Info | |
227 |
| -+------+------+----------------+---------+---------+------+------------+-----------------------------------------------------+ |
228 |
| -| 2 | root | 127.0.0.1:3306 | gitbase | query | 36 | files(1/3) | select language(file_path, blob_content) from files | |
229 |
| -| 12 | root | 127.0.0.1:3306 | gitbase | query | 0 | running | show processlist | |
230 |
| -+------+------+----------------+---------+---------+------+------------+-----------------------------------------------------+ |
231 |
| -2 rows in set (0,00 sec) |
| 188 | ++-----+------+-----------------+---------+---------+------+-------------------+--------------------------------------------------------------+ |
| 189 | +| Id | User | Host | db | Command | Time | State | Info | |
| 190 | ++-----+------+-----------------+---------+---------+------+-------------------+--------------------------------------------------------------+ |
| 191 | +| 168 | root | 127.0.0.1:53514 | gitbase | query | 36 | commit_files(8/9) | SELECT file_path, LANGUAGE(file_path) lang FROM commit_files | |
| 192 | +| 169 | root | 127.0.0.1:53514 | gitbase | query | 0 | running | show processlist | |
| 193 | ++-----+------+-----------------+---------+---------+------+-------------------+--------------------------------------------------------------+ |
232 | 194 | ```
|
233 | 195 |
|
234 | 196 | From this output, we can obtain some information about our query:
|
235 | 197 | - It's been running for 36 seconds.
|
236 |
| -- It's only querying files table and has processed 1 out of 3 partitions. |
| 198 | +- It's querying commit_files table and has processed 8 out of 9 partitions. |
237 | 199 |
|
238 | 200 | To kill a query that's currently running you can use the value in `Id`. If we were to kill the previous query, we would need to use the following query:
|
239 | 201 |
|
240 | 202 | ```sql
|
241 |
| -KILL QUERY 2 |
| 203 | +KILL QUERY 168; |
242 | 204 | ```
|
0 commit comments