32
32
from flask .cli import with_appcontext
33
33
from sqlalchemy .orm .attributes import flag_modified
34
34
35
+ from invenio_db import db
36
+ from invenio_records_files .api import Record
37
+ from invenio_indexer .api import RecordIndexer
38
+ from cernopendata .modules .records .minters .recid import \
39
+ cernopendata_recid_minter
40
+
41
+ from invenio_files_rest .models import \
42
+ Bucket , FileInstance , ObjectVersion
43
+ from invenio_records_files .models import RecordsBuckets
44
+ from invenio_pidstore .models import PersistentIdentifier
45
+ from invenio_pidstore .errors import PIDDoesNotExistError
46
+
35
47
36
48
def get_jsons_from_dir (dir ):
37
49
"""Get JSON files inside a dir."""
@@ -43,6 +55,61 @@ def get_jsons_from_dir(dir):
43
55
return res
44
56
45
57
58
+ def create_record (schema , data , files , skip_files ):
59
+ """Creates a new record."""
60
+ bucket = Bucket .create ()
61
+
62
+ for file in files :
63
+ if skip_files :
64
+ break
65
+ assert 'uri' in file
66
+ assert 'size' in file
67
+ assert 'checksum' in file
68
+
69
+ try :
70
+ f = FileInstance .create ()
71
+ filename = file .get ("uri" ).split ('/' )[- 1 :][0 ]
72
+ f .set_uri (file .get ("uri" ), file .get (
73
+ "size" ), file .get ("checksum" ))
74
+ obj = ObjectVersion .create (
75
+ bucket ,
76
+ filename ,
77
+ _file_id = f .id
78
+ )
79
+
80
+ file .update ({
81
+ 'bucket' : str (obj .bucket_id ),
82
+ 'checksum' : obj .file .checksum ,
83
+ 'key' : obj .key ,
84
+ 'version_id' : str (obj .version_id ),
85
+ })
86
+
87
+ except Exception as e :
88
+ click .echo (
89
+ 'Recid {0} file {1} could not be loaded due '
90
+ 'to {2}.' .format (data .get ('recid' ), filename ,
91
+ str (e )))
92
+ continue
93
+
94
+ id = uuid .uuid4 ()
95
+ cernopendata_recid_minter (id , data )
96
+ record = Record .create (data , id_ = id )
97
+ record ['$schema' ] = schema
98
+ RecordsBuckets .create (
99
+ record = record .model , bucket = bucket )
100
+
101
+ return record
102
+
103
+
104
+ def update_record (pid , schema , data ):
105
+ """Updates the given record."""
106
+ record = Record .get_record (pid .object_uuid )
107
+ record ['$schema' ] = schema
108
+ record .update (data )
109
+ record .commit ()
110
+ return record
111
+
112
+
46
113
@click .group (chain = True )
47
114
def fixtures ():
48
115
"""Automate site bootstrap process and testing."""
@@ -58,8 +125,10 @@ def fixtures():
58
125
@click .option ('--profile' , is_flag = True ,
59
126
help = 'Output profiling information.' )
60
127
@click .option ('--verbose' , is_flag = True , default = False )
128
+ @click .option ('--mode' , required = True , type = click .Choice (
129
+ ['insert' , 'replace' , 'insert-or-replace' ]))
61
130
@with_appcontext
62
- def records (skip_files , files , profile , verbose ):
131
+ def records (skip_files , files , profile , verbose , mode ):
63
132
"""Load all records."""
64
133
if profile :
65
134
import cProfile
@@ -68,22 +137,14 @@ def records(skip_files, files, profile, verbose):
68
137
pr = cProfile .Profile ()
69
138
pr .enable ()
70
139
71
- from invenio_db import db
72
- from invenio_records_files .api import Record
73
- from invenio_indexer .api import RecordIndexer
74
- from cernopendata .modules .records .minters .recid import \
75
- cernopendata_recid_minter
76
-
77
- from invenio_files_rest .models import \
78
- Bucket , FileInstance , ObjectVersion
79
- from invenio_records_files .models import RecordsBuckets
80
-
81
140
indexer = RecordIndexer ()
82
141
schema = current_app .extensions ['invenio-jsonschemas' ].path_to_url (
83
142
'records/record-v1.0.0.json'
84
143
)
85
144
data = pkg_resources .resource_filename ('cernopendata' ,
86
145
'modules/fixtures/data/records' )
146
+ action = None
147
+
87
148
if files :
88
149
record_json = files
89
150
else :
@@ -103,49 +164,42 @@ def records(skip_files, files, profile, verbose):
103
164
format (data .get ('recid' )))
104
165
105
166
files = data .get ('files' , [])
106
-
107
- bucket = Bucket .create ()
108
-
109
- for file in files :
110
- if skip_files :
111
- break
112
- assert 'uri' in file
113
- assert 'size' in file
114
- assert 'checksum' in file
115
-
167
+ if mode == 'insert-or-replace' :
168
+ try :
169
+ pid = PersistentIdentifier .get ('recid' , data ['recid' ])
170
+ if pid :
171
+ record = update_record (pid , schema , data )
172
+ action = 'updated'
173
+ except PIDDoesNotExistError :
174
+ record = create_record (schema , data , files , skip_files )
175
+ action = 'inserted'
176
+ elif mode == 'insert' :
177
+ try :
178
+ pid = PersistentIdentifier .get ('recid' , data ['recid' ])
179
+ if pid :
180
+ click .echo (
181
+ 'Record recid {} exists already;'
182
+ ' cannot insert it. ' .format (
183
+ data .get ('recid' )), err = True )
184
+ return
185
+ except PIDDoesNotExistError :
186
+ record = create_record (schema , data , files , skip_files )
187
+ action = 'inserted'
188
+ else :
116
189
try :
117
- f = FileInstance .create ()
118
- filename = file .get ("uri" ).split ('/' )[- 1 :][0 ]
119
- f .set_uri (file .get ("uri" ), file .get (
120
- "size" ), file .get ("checksum" ))
121
- obj = ObjectVersion .create (
122
- bucket ,
123
- filename ,
124
- _file_id = f .id
125
- )
126
-
127
- file .update ({
128
- 'bucket' : str (obj .bucket_id ),
129
- 'checksum' : obj .file .checksum ,
130
- 'key' : obj .key ,
131
- 'version_id' : str (obj .version_id ),
132
- })
133
-
134
- except Exception as e :
190
+ pid = PersistentIdentifier .get ('recid' , data ['recid' ])
191
+ except PIDDoesNotExistError :
135
192
click .echo (
136
- 'Recid {0} file {1} could not be loaded due '
137
- 'to {2}.' .format (data .get ('recid' ), filename ,
138
- str (e )))
139
- continue
140
-
141
- id = uuid .uuid4 ()
142
- cernopendata_recid_minter (id , data )
143
- record = Record .create (data , id_ = id )
144
- record ['$schema' ] = schema
145
- RecordsBuckets .create (
146
- record = record .model , bucket = bucket )
147
-
193
+ 'Record recid {} does not exist; '
194
+ 'cannot replace it.' .format (
195
+ data .get ('recid' )), err = True )
196
+ return
197
+ record = update_record (pid , schema , data )
198
+ action = 'updated'
148
199
db .session .commit ()
200
+ click .echo (
201
+ ' Record recid {0} {1}.' .format (
202
+ data .get ('recid' ), action ))
149
203
indexer .index (record )
150
204
db .session .expunge_all ()
151
205
@@ -180,7 +234,8 @@ def glossary_terms():
180
234
with open (filename , 'rb' ) as source :
181
235
for data in json .load (source ):
182
236
if "collections" not in data and \
183
- not isinstance (data .get ("collections" , None ), basestring ):
237
+ not isinstance (
238
+ data .get ("collections" , None ), basestring ):
184
239
data ["collections" ] = []
185
240
data ["collections" ].append ({"primary" : "Terms" })
186
241
id = uuid .uuid4 ()
@@ -228,7 +283,8 @@ def docs():
228
283
with open (content_filename ) as body_field :
229
284
data ["body" ]["content" ] = body_field .read ()
230
285
if "collections" not in data and \
231
- not isinstance (data .get ("collections" , None ), basestring ):
286
+ not isinstance (
287
+ data .get ("collections" , None ), basestring ):
232
288
data ["collections" ] = []
233
289
id = uuid .uuid4 ()
234
290
cernopendata_docid_minter (id , data )
@@ -254,6 +310,7 @@ def data_policies(skip_files):
254
310
Bucket , FileInstance , ObjectVersion
255
311
from invenio_records_files .models import RecordsBuckets
256
312
from invenio_records_files .api import Record
313
+
257
314
from invenio_records .models import RecordMetadata
258
315
259
316
indexer = RecordIndexer ()
0 commit comments