@@ -18,30 +18,29 @@ use std::sync::Arc;
18
18
use std:: time:: Duration ;
19
19
20
20
use databend_common_base:: base:: GlobalInstance ;
21
+ use databend_common_base:: runtime:: spawn;
21
22
use databend_common_base:: runtime:: GlobalIORuntime ;
22
23
use databend_common_base:: runtime:: MemStat ;
23
24
use databend_common_base:: runtime:: ThreadTracker ;
24
25
use databend_common_base:: runtime:: TrySpawn ;
25
26
use databend_common_catalog:: catalog:: CATALOG_DEFAULT ;
26
27
use databend_common_catalog:: table_context:: TableContext ;
27
28
use databend_common_config:: InnerConfig ;
29
+ use databend_common_exception:: ErrorCode ;
28
30
use databend_common_exception:: Result ;
29
31
use databend_common_license:: license:: Feature ;
30
32
use databend_common_license:: license_manager:: LicenseManagerSwitch ;
31
- use databend_common_meta_kvapi:: kvapi:: KVApi ;
32
- use databend_common_meta_store:: MetaStore ;
33
- use databend_common_meta_store:: MetaStoreProvider ;
34
- use databend_common_meta_types:: txn_condition;
35
- use databend_common_meta_types:: ConditionResult ;
36
- use databend_common_meta_types:: TxnCondition ;
37
- use databend_common_meta_types:: TxnOp ;
38
- use databend_common_meta_types:: TxnRequest ;
33
+ use databend_common_meta_client:: ClientHandle ;
34
+ use databend_common_meta_client:: MetaGrpcClient ;
35
+ use databend_common_meta_semaphore:: acquirer:: Permit ;
36
+ use databend_common_meta_semaphore:: Semaphore ;
39
37
use databend_common_sql:: Planner ;
40
38
use databend_common_storage:: DataOperator ;
41
39
use databend_common_tracing:: GlobalLogger ;
42
40
use log:: error;
43
41
use log:: info;
44
42
use rand:: random;
43
+ use tokio:: time:: sleep;
45
44
46
45
use crate :: interpreters:: InterpreterFactory ;
47
46
use crate :: persistent_log:: session:: create_session;
@@ -52,7 +51,7 @@ use crate::persistent_log::table_schemas::QueryProfileTable;
52
51
use crate :: sessions:: QueryContext ;
53
52
54
53
pub struct GlobalPersistentLog {
55
- meta_store : MetaStore ,
54
+ meta_client : Option < Arc < ClientHandle > > ,
56
55
interval : usize ,
57
56
tenant_id : String ,
58
57
node_id : String ,
@@ -61,16 +60,17 @@ pub struct GlobalPersistentLog {
61
60
initialized : AtomicBool ,
62
61
stopped : AtomicBool ,
63
62
tables : Vec < Box < dyn PersistentLogTable > > ,
64
- #[ allow( dead_code) ]
65
63
retention : usize ,
66
64
}
67
65
68
66
impl GlobalPersistentLog {
69
67
pub async fn init ( cfg : & InnerConfig ) -> Result < ( ) > {
70
68
setup_operator ( ) . await ?;
71
69
72
- let provider = MetaStoreProvider :: new ( cfg. meta . to_meta_grpc_client_conf ( ) ) ;
73
- let meta_store = provider. create_meta_store ( ) . await ?;
70
+ let meta_client =
71
+ MetaGrpcClient :: try_new ( & cfg. meta . to_meta_grpc_client_conf ( ) ) . map_err ( |_e| {
72
+ ErrorCode :: Internal ( "Create MetaClient failed for GlobalPersistentLog" )
73
+ } ) ?;
74
74
75
75
let mut tables: Vec < Box < dyn PersistentLogTable > > = vec ! [ ] ;
76
76
@@ -100,7 +100,7 @@ impl GlobalPersistentLog {
100
100
tables. push ( Box :: new ( query_log) ) ;
101
101
102
102
let instance = Arc :: new ( Self {
103
- meta_store ,
103
+ meta_client : Some ( meta_client ) ,
104
104
interval : cfg. log . persistentlog . interval ,
105
105
tenant_id : cfg. query . tenant_id . tenant_name ( ) . to_string ( ) ,
106
106
node_id : cfg. query . node_id . clone ( ) ,
@@ -112,14 +112,37 @@ impl GlobalPersistentLog {
112
112
retention : cfg. log . persistentlog . retention ,
113
113
} ) ;
114
114
GlobalInstance :: set ( instance) ;
115
- GlobalIORuntime :: instance ( ) . spawn ( async move {
116
- if let Err ( e) = GlobalPersistentLog :: instance ( ) . work ( ) . await {
117
- error ! ( "persistent log exit {}" , e) ;
118
- }
119
- } ) ;
115
+ GlobalIORuntime :: instance ( ) . try_spawn (
116
+ async move {
117
+ if let Err ( e) = GlobalPersistentLog :: instance ( ) . work ( ) . await {
118
+ error ! ( "persistent log exit {}" , e) ;
119
+ }
120
+ } ,
121
+ Some ( "persistent-log-worker" . to_string ( ) ) ,
122
+ ) ?;
120
123
Ok ( ( ) )
121
124
}
122
125
126
+ pub async fn create_dummy ( cfg : & InnerConfig ) -> Result < Self > {
127
+ setup_operator ( ) . await ?;
128
+ Ok ( Self {
129
+ meta_client : None ,
130
+ interval : cfg. log . persistentlog . interval ,
131
+ tenant_id : cfg. query . tenant_id . tenant_name ( ) . to_string ( ) ,
132
+ node_id : cfg. query . node_id . clone ( ) ,
133
+ cluster_id : cfg. query . cluster_id . clone ( ) ,
134
+ stage_name : cfg. log . persistentlog . stage_name . clone ( ) ,
135
+ initialized : AtomicBool :: new ( false ) ,
136
+ stopped : AtomicBool :: new ( false ) ,
137
+ tables : vec ! [
138
+ Box :: new( QueryDetailsTable ) ,
139
+ Box :: new( QueryProfileTable ) ,
140
+ Box :: new( QueryLogTable ) ,
141
+ ] ,
142
+ retention : cfg. log . persistentlog . retention ,
143
+ } )
144
+ }
145
+
123
146
pub fn instance ( ) -> Arc < GlobalPersistentLog > {
124
147
GlobalInstance :: get ( )
125
148
}
@@ -130,29 +153,32 @@ impl GlobalPersistentLog {
130
153
131
154
pub async fn work ( & self ) -> Result < ( ) > {
132
155
let mut prepared = false ;
133
-
134
- // // Use a counter rather than a time interval to trigger cleanup operations.
135
- // // because in cluster environment, a time-based interval would cause cleanup frequency
136
- // // to scale with the number of nodes in the cluster, whereas this count-based
137
- // // approach ensures consistent cleanup frequency regardless of cluster size.
138
- // let thirty_minutes_in_seconds = 30 * 60;
139
- // let copy_into_threshold = thirty_minutes_in_seconds / self.interval;
140
- // let mut copy_into_count = 0;
141
-
156
+ // Wait all services to be initialized
157
+ loop {
158
+ if !self . initialized . load ( Ordering :: SeqCst ) {
159
+ tokio:: time:: sleep ( Duration :: from_secs ( 1 ) ) . await ;
160
+ } else {
161
+ break ;
162
+ }
163
+ }
164
+ spawn ( async move {
165
+ if let Err ( e) = GlobalPersistentLog :: instance ( ) . clean_work ( ) . await {
166
+ error ! ( "Persistent log clean_work exit {}" , e) ;
167
+ }
168
+ } ) ;
142
169
loop {
143
- // add a random sleep time to avoid always one node doing the work
144
- let sleep_time = self . interval as u64 * 1000 + random :: < u64 > ( ) % 1000 ;
145
- tokio:: time:: sleep ( Duration :: from_millis ( sleep_time) ) . await ;
146
170
if self . stopped . load ( Ordering :: SeqCst ) {
147
171
return Ok ( ( ) ) ;
148
172
}
149
- // Wait all services to be initialized
150
- if !self . initialized . load ( Ordering :: SeqCst ) {
151
- continue ;
152
- }
153
173
// create the stage, database and table if not exists
154
- // only execute once, it is ok to do this in multiple nodes without lock
174
+ // alter the table if schema is changed
155
175
if !prepared {
176
+ let prepare_guard = self
177
+ . acquire (
178
+ format ! ( "{}/persistent_log_prepare" , self . tenant_id) ,
179
+ self . interval as u64 ,
180
+ )
181
+ . await ?;
156
182
match self . prepare ( ) . await {
157
183
Ok ( _) => {
158
184
info ! ( "Persistent log prepared successfully" ) ;
@@ -162,43 +188,41 @@ impl GlobalPersistentLog {
162
188
error ! ( "Persistent log prepare failed: {:?}" , e) ;
163
189
}
164
190
}
191
+ drop ( prepare_guard) ;
165
192
}
166
- if let Ok ( acquired_lock ) = self . try_acquire ( ) . await {
167
- if acquired_lock {
168
- if let Err ( e ) = self . do_copy_into ( ) . await {
169
- error ! ( "Persistent log copy into failed: {:?} " , e ) ;
170
- }
171
- // copy_into_count += 1;
172
- // if copy_into_count > copy_into_threshold {
173
- // if let Err(e) = self.clean().await {
174
- // error!("Persistent log delete failed: {:?}", e) ;
175
- // }
176
- // copy_into_count = 0;
177
- // }
178
- }
193
+
194
+ let guard = self
195
+ . acquire (
196
+ format ! ( "{}/persistent_log_work " , self . tenant_id ) ,
197
+ self . interval as u64 ,
198
+ )
199
+ . await ? ;
200
+ // add a random sleep time to avoid always one node doing the work
201
+ let sleep_time = self . interval as u64 * 1000 + random :: < u64 > ( ) % 1000 ;
202
+ tokio :: time :: sleep ( Duration :: from_millis ( sleep_time ) ) . await ;
203
+
204
+ if let Err ( e ) = self . do_copy_into ( ) . await {
205
+ error ! ( "Persistent log copy into failed: {:?}" , e ) ;
179
206
}
207
+
208
+ drop ( guard)
180
209
}
181
210
}
182
211
183
212
/// Multiple nodes doing the work may make commit conflict.
184
- pub async fn try_acquire ( & self ) -> Result < bool > {
185
- let meta_key = format ! ( "{}/persistent_log_lock" , self . tenant_id) ;
186
- let condition = vec ! [ TxnCondition {
187
- key: meta_key. clone( ) ,
188
- expected: ConditionResult :: Eq as i32 ,
189
- target: Some ( txn_condition:: Target :: Seq ( 0 ) ) ,
190
- } ] ;
191
-
192
- let if_then = vec ! [ TxnOp :: put_with_ttl(
193
- & meta_key,
194
- self . node_id. clone( ) . into( ) ,
195
- Some ( Duration :: from_secs( self . interval as u64 ) ) ,
196
- ) ] ;
213
+ /// acquire the semaphore to avoid this.
214
+ pub async fn acquire ( & self , meta_key : String , lease : u64 ) -> Result < Permit > {
215
+ let acquired_guard = Semaphore :: new_acquired (
216
+ self . meta_client . clone ( ) . unwrap ( ) ,
217
+ meta_key,
218
+ 1 ,
219
+ self . node_id . clone ( ) ,
220
+ Duration :: from_secs ( lease) ,
221
+ )
222
+ . await
223
+ . map_err ( |_e| "acquire semaphore failed from GlobalPersistentLog" ) ?;
197
224
198
- let txn = TxnRequest :: new ( condition, if_then) ;
199
- let resp = self . meta_store . transaction ( txn) . await ?;
200
-
201
- Ok ( resp. success )
225
+ Ok ( acquired_guard)
202
226
}
203
227
204
228
async fn execute_sql ( & self , sql : & str ) -> Result < ( ) > {
@@ -256,8 +280,8 @@ impl GlobalPersistentLog {
256
280
Ok ( ( ) )
257
281
}
258
282
259
- async fn do_copy_into ( & self ) -> Result < ( ) > {
260
- let stage_name = GlobalPersistentLog :: instance ( ) . stage_name . clone ( ) ;
283
+ pub async fn do_copy_into ( & self ) -> Result < ( ) > {
284
+ let stage_name = self . stage_name . clone ( ) ;
261
285
let operator = GlobalLogger :: instance ( ) . get_operator ( ) . await ;
262
286
if let Some ( op) = operator {
263
287
let path = format ! ( "stage/internal/{}/" , stage_name) ;
@@ -284,23 +308,35 @@ impl GlobalPersistentLog {
284
308
Ok ( ( ) )
285
309
}
286
310
287
- /// Do retention and vacuum
288
- #[ allow( dead_code) ]
289
- async fn clean ( & self ) -> Result < ( ) > {
290
- let delete = format ! (
291
- "DELETE FROM persistent_system.query_log WHERE timestamp < subtract_hours(NOW(), {})" ,
292
- self . retention
293
- ) ;
294
- self . execute_sql ( & delete) . await ?;
311
+ async fn clean_work ( & self ) -> Result < ( ) > {
312
+ loop {
313
+ let guard = self
314
+ . acquire ( format ! ( "{}/persistent_log_clean" , self . tenant_id) , 60 )
315
+ . await ?;
316
+ sleep ( Duration :: from_mins ( 60 ) ) . await ;
317
+ if let Err ( e) = self . do_clean ( ) . await {
318
+ error ! ( "persistent log clean failed: {}" , e) ;
319
+ }
320
+ drop ( guard) ;
321
+ }
322
+ }
323
+
324
+ pub async fn do_clean ( & self ) -> Result < ( ) > {
325
+ for table in & self . tables {
326
+ let clean_sql = table. clean_sql ( self . retention ) ;
327
+ self . execute_sql ( & clean_sql) . await ?;
328
+ }
295
329
296
330
let session = create_session ( & self . tenant_id , & self . cluster_id ) . await ?;
297
331
let context = session. create_query_context ( ) . await ?;
298
332
if LicenseManagerSwitch :: instance ( )
299
333
. check_enterprise_enabled ( context. get_license_key ( ) , Feature :: Vacuum )
300
334
. is_ok ( )
301
335
{
302
- let vacuum = "VACUUM TABLE persistent_system.query_log" ;
303
- self . execute_sql ( vacuum) . await ?
336
+ for table in & self . tables {
337
+ let vacuum = format ! ( "VACUUM TABLE persistent_system.{}" , table. table_name( ) ) ;
338
+ self . execute_sql ( & vacuum) . await ?
339
+ }
304
340
}
305
341
Ok ( ( ) )
306
342
}
0 commit comments