|
35 | 35 | import org.apache.cloudstack.framework.jobs.AsyncJobManager;
|
36 | 36 | import org.apache.cloudstack.framework.jobs.dao.AsyncJobDao;
|
37 | 37 | import org.apache.cloudstack.framework.jobs.impl.AsyncJobVO;
|
| 38 | +import org.apache.cloudstack.jobs.JobInfo; |
38 | 39 | import org.apache.cloudstack.managed.context.ManagedContextTimerTask;
|
39 | 40 | import org.springframework.stereotype.Component;
|
40 | 41 |
|
|
47 | 48 | import com.cloud.storage.Snapshot;
|
48 | 49 | import com.cloud.storage.SnapshotPolicyVO;
|
49 | 50 | import com.cloud.storage.SnapshotScheduleVO;
|
50 |
| -import com.cloud.storage.SnapshotVO; |
51 | 51 | import com.cloud.storage.VolumeVO;
|
52 | 52 | import com.cloud.storage.dao.SnapshotDao;
|
53 | 53 | import com.cloud.storage.dao.SnapshotPolicyDao;
|
|
64 | 64 | import com.cloud.utils.concurrency.TestClock;
|
65 | 65 | import com.cloud.utils.db.DB;
|
66 | 66 | import com.cloud.utils.db.GlobalLock;
|
67 |
| -import com.cloud.utils.db.SearchCriteria; |
68 | 67 | import com.cloud.utils.db.TransactionLegacy;
|
69 | 68 | import com.cloud.vm.snapshot.VMSnapshotManager;
|
70 | 69 | import com.cloud.vm.snapshot.VMSnapshotVO;
|
@@ -144,7 +143,7 @@ public void poll(final Date currentTimestamp) {
|
144 | 143 | try {
|
145 | 144 | if (scanLock.lock(ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_COOPERATION)) {
|
146 | 145 | try {
|
147 |
| - checkStatusOfCurrentlyExecutingSnapshots(); |
| 146 | + scheduleNextSnapshotJobsIfNecessary(); |
148 | 147 | } finally {
|
149 | 148 | scanLock.unlock();
|
150 | 149 | }
|
@@ -174,68 +173,37 @@ public void poll(final Date currentTimestamp) {
|
174 | 173 | }
|
175 | 174 | }
|
176 | 175 |
|
177 |
| - private void checkStatusOfCurrentlyExecutingSnapshots() { |
178 |
| - final SearchCriteria<SnapshotScheduleVO> sc = _snapshotScheduleDao.createSearchCriteria(); |
179 |
| - sc.addAnd("asyncJobId", SearchCriteria.Op.NNULL); |
180 |
| - final List<SnapshotScheduleVO> snapshotSchedules = _snapshotScheduleDao.search(sc, null); |
181 |
| - for (final SnapshotScheduleVO snapshotSchedule : snapshotSchedules) { |
182 |
| - final Long asyncJobId = snapshotSchedule.getAsyncJobId(); |
183 |
| - final AsyncJobVO asyncJob = _asyncJobDao.findByIdIncludingRemoved(asyncJobId); |
184 |
| - switch (asyncJob.getStatus()) { |
185 |
| - case SUCCEEDED: |
186 |
| - // The snapshot has been successfully backed up. |
187 |
| - // The snapshot state has also been cleaned up. |
188 |
| - // We can schedule the next job for this snapshot. |
189 |
| - // Remove the existing entry in the snapshot_schedule table. |
190 |
| - scheduleNextSnapshotJob(snapshotSchedule); |
191 |
| - break; |
192 |
| - case FAILED: |
193 |
| - // Check the snapshot status. |
194 |
| - final Long snapshotId = snapshotSchedule.getSnapshotId(); |
195 |
| - if (snapshotId == null) { |
196 |
| - // createSnapshotAsync exited, successfully or unsuccessfully, |
197 |
| - // even before creating a snapshot record |
198 |
| - // No cleanup needs to be done. |
199 |
| - // Schedule the next snapshot. |
200 |
| - scheduleNextSnapshotJob(snapshotSchedule); |
201 |
| - } else { |
202 |
| - final SnapshotVO snapshot = _snapshotDao.findById(snapshotId); |
203 |
| - if (snapshot == null || snapshot.getRemoved() != null) { |
204 |
| - // This snapshot has been deleted successfully from the primary storage |
205 |
| - // Again no cleanup needs to be done. |
206 |
| - // Schedule the next snapshot. |
207 |
| - // There's very little probability that the code reaches this point. |
208 |
| - // The snapshotId is a foreign key for the snapshot_schedule table |
209 |
| - // set to ON DELETE CASCADE. So if the snapshot entry is deleted, the snapshot_schedule entry will be too. |
210 |
| - // But what if it has only been marked as removed? |
211 |
| - scheduleNextSnapshotJob(snapshotSchedule); |
212 |
| - } else { |
213 |
| - // The management server executing this snapshot job appears to have crashed |
214 |
| - // while creating the snapshot on primary storage/or backing it up. |
215 |
| - // We have no idea whether the snapshot was successfully taken on the primary or not. |
216 |
| - // Schedule the next snapshot job. |
217 |
| - // The ValidatePreviousSnapshotCommand will take appropriate action on this snapshot |
218 |
| - // If the snapshot was taken successfully on primary, it will retry backing it up. |
219 |
| - // and cleanup the previous snapshot |
220 |
| - // Set the userId to that of system. |
221 |
| - //_snapshotManager.validateSnapshot(1L, snapshot); |
222 |
| - // In all cases, schedule the next snapshot job |
223 |
| - scheduleNextSnapshotJob(snapshotSchedule); |
224 |
| - } |
225 |
| - } |
| 176 | + private void scheduleNextSnapshotJobsIfNecessary() { |
| 177 | + List<SnapshotScheduleVO> snapshotSchedules = _snapshotScheduleDao.getSchedulesAssignedWithAsyncJob(); |
| 178 | + logger.info("Verifying the current state of [{}] snapshot schedules and scheduling next jobs, if necessary.", snapshotSchedules.size()); |
| 179 | + for (SnapshotScheduleVO snapshotSchedule : snapshotSchedules) { |
| 180 | + scheduleNextSnapshotJobIfNecessary(snapshotSchedule); |
| 181 | + } |
| 182 | + } |
226 | 183 |
|
227 |
| - break; |
228 |
| - case IN_PROGRESS: |
229 |
| - // There is no way of knowing from here whether |
230 |
| - // 1) Another management server is processing this snapshot job |
231 |
| - // 2) The management server has crashed and this snapshot is lying |
232 |
| - // around in an inconsistent state. |
233 |
| - // Hopefully, this can be resolved at the backend when the current snapshot gets executed. |
234 |
| - // But if it remains in this state, the current snapshot will not get executed. |
235 |
| - // And it will remain in stasis. |
236 |
| - break; |
237 |
| - } |
| 184 | + protected void scheduleNextSnapshotJobIfNecessary(SnapshotScheduleVO snapshotSchedule) { |
| 185 | + Long asyncJobId = snapshotSchedule.getAsyncJobId(); |
| 186 | + AsyncJobVO asyncJob = _asyncJobDao.findByIdIncludingRemoved(asyncJobId); |
| 187 | + |
| 188 | + if (asyncJob == null) { |
| 189 | + logger.debug("The async job [{}] of snapshot schedule [{}] does not exist anymore. Considering it as finished and scheduling the next snapshot job.", |
| 190 | + asyncJobId, snapshotSchedule); |
| 191 | + scheduleNextSnapshotJob(snapshotSchedule); |
| 192 | + return; |
238 | 193 | }
|
| 194 | + |
| 195 | + JobInfo.Status status = asyncJob.getStatus(); |
| 196 | + |
| 197 | + if (JobInfo.Status.SUCCEEDED.equals(status)) { |
| 198 | + logger.debug("Last job of schedule [{}] succeeded; scheduling the next snapshot job.", snapshotSchedule); |
| 199 | + } else if (JobInfo.Status.FAILED.equals(status)) { |
| 200 | + logger.debug("Last job of schedule [{}] failed with [{}]; scheduling a new snapshot job.", snapshotSchedule, asyncJob.getResult()); |
| 201 | + } else { |
| 202 | + logger.debug("Schedule [{}] is still in progress, skipping next job scheduling.", snapshotSchedule); |
| 203 | + return; |
| 204 | + } |
| 205 | + |
| 206 | + scheduleNextSnapshotJob(snapshotSchedule); |
239 | 207 | }
|
240 | 208 |
|
241 | 209 | @DB
|
|
0 commit comments