-
Notifications
You must be signed in to change notification settings - Fork 186
/
Copy pathcheckpoints.lua
executable file
·62 lines (50 loc) · 1.69 KB
/
checkpoints.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
local checkpoint = {}
function checkpoint.latest(opt)
local latestPath = paths.concat(opt.resume, 'latest.t7')
if not paths.filep(latestPath) then
return nil
end
print('=> Loading checkpoint ' .. latestPath)
local latest = torch.load(latestPath)
local optimState = torch.load(paths.concat(opt.resume, latest.optimFile))
return latest, optimState
end
function checkpoint.load(opt)
--print(opt.epochNumber)
local epoch = opt.useCheckpoint
if epoch == -1 then
return nil
end
if epoch == 0 then
-- finding the latest epoch, requiring 'latest.t7'
return checkpoint.latest(opt)
end
local modelFile = 'model_' .. epoch .. '.t7'
local optimFile = 'optimState_' .. epoch .. '.t7'
local optimState = torch.load(paths.concat(opt.resume, optimFile))
local loaded = {
epoch = epoch,
modelFile = modelFile,
optimFile = optimFile,
}
return loaded, optimState
end
function checkpoint.save(epoch, model, optimState, bestModel, opt)
-- Don't save the DataParallelTable for easier loading on other machines
if torch.type(model) == 'nn.DataParallelTable' then
model = model:get(1)
end
local modelFile = 'model_' .. epoch .. '.t7'
local optimFile = 'optimState_' .. epoch .. '.t7'
torch.save(paths.concat(opt.resume, modelFile), model:clearState())
torch.save(paths.concat(opt.resume, optimFile), optimState)
torch.save(paths.concat(opt.resume, 'latest.t7'), {
epoch = epoch,
modelFile = modelFile,
optimFile = optimFile,
})
if bestModel then
torch.save(paths.concat(opt.resume, 'model_best.t7'), model:clearState())
end
end
return checkpoint