Skip to content
This repository was archived by the owner on Mar 10, 2020. It is now read-only.

Commit 1158951

Browse files
authored
fix: handle shard updates that create subshards of subshards (#47)
I'd assumed that adding a file to a HAMT shard involved either adding a link, updating a link or replacing a link with a subshard. You can have files that result in a subshard being created that contains a subshard with the files in. This PR handles that outcome.
1 parent 34cfd0d commit 1158951

File tree

3 files changed

+261
-128
lines changed

3 files changed

+261
-128
lines changed

src/core/utils/add-link.js

Lines changed: 173 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,16 @@ const {
66
} = require('ipld-dag-pb')
77
const CID = require('cids')
88
const waterfall = require('async/waterfall')
9-
const DirSharded = require('ipfs-unixfs-importer/src/importer/dir-sharded')
10-
const series = require('async/series')
119
const whilst = require('async/whilst')
1210
const log = require('debug')('ipfs:mfs:core:utils:add-link')
1311
const UnixFS = require('ipfs-unixfs')
12+
const DirSharded = require('ipfs-unixfs-importer/src/importer/dir-sharded')
1413
const {
15-
generatePath,
16-
updateHamtDirectory
14+
updateHamtDirectory,
15+
recreateHamtLevel,
16+
createShard,
17+
toPrefix,
18+
addLinksToHamtBucket
1719
} = require('./hamt-utils')
1820

1921
const defaultOptions = {
@@ -125,140 +127,185 @@ const addToDirectory = (context, options, callback) => {
125127
}
126128

127129
const addToShardedDirectory = (context, options, callback) => {
128-
return waterfall([
129-
(cb) => generatePath(context, options.name, options.parent, cb),
130-
({ rootBucket, path }, cb) => {
131-
updateShard(context, path.reverse(), {
132-
name: options.name,
133-
cid: options.cid,
134-
size: options.size
135-
}, 0, options, (err, result = {}) => cb(err, { rootBucket, ...result }))
136-
},
137-
({ rootBucket, node }, cb) => updateHamtDirectory(context, node.links, rootBucket, options, cb)
138-
], callback)
139-
}
140-
141-
const updateShard = (context, positions, child, index, options, callback) => {
142-
const {
143-
bucket,
144-
prefix,
145-
node
146-
} = positions[index]
147-
148-
const link = node.links
149-
.find(link => link.name.substring(0, 2) === prefix && link.name !== `${prefix}${child.name}`)
150-
151-
return waterfall([
152-
(cb) => {
153-
if (link && link.name.length > 2) {
154-
log(`Converting existing file ${link.name} into sub-shard for ${child.name}`)
155-
156-
return waterfall([
157-
(done) => createShard(context, [{
158-
name: link.name.substring(2),
159-
size: link.size,
160-
multihash: link.cid.buffer
161-
}, {
162-
name: child.name,
163-
size: child.size,
164-
multihash: child.cid.buffer
165-
}], {}, done),
166-
({ node: { links: [ shard ] } }, done) => {
167-
let position = 0
168-
169-
// step through the shard until we find the newly created sub-shard
170-
return whilst(
171-
() => position < positions.length - 1,
172-
(next) => {
173-
const shardPrefix = positions[position].prefix
174-
175-
log(`Prefix at position ${position} is ${shardPrefix} - shard.name ${shard.name}`)
176-
177-
if (shard.name.substring(0, 2) !== shardPrefix) {
178-
return next(new Error(`Unexpected prefix ${shard.name} !== ${shardPrefix}, position ${position}`))
179-
}
180-
181-
position++
182-
183-
context.ipld.get(shard.cid, (err, result) => {
184-
if (err) {
185-
return next(err)
186-
}
187-
188-
if (position < positions.length) {
189-
const nextPrefix = positions[position].prefix
190-
const nextShard = result.value.links.find(link => link.name.substring(0, 2) === nextPrefix)
191-
192-
if (nextShard) {
193-
shard = nextShard
194-
}
195-
}
196-
197-
next(err, { cid: result && result.cid, node: result && result.value })
198-
})
199-
},
200-
done
201-
)
202-
},
203-
(result, cb) => updateShardParent(context, bucket, node, link.name, result.node, result.cid, prefix, options, cb)
204-
], cb)
205-
}
130+
return addFileToShardedDirectoryy(context, options, (err, result) => {
131+
if (err) {
132+
return callback(err)
133+
}
206134

207-
if (link && link.name.length === 2) {
208-
log(`Descending into sub-shard ${link.name} for ${child.name}`)
135+
const {
136+
shard, path
137+
} = result
209138

210-
return waterfall([
211-
(cb) => updateShard(context, positions, child, index + 1, options, cb),
212-
(result, cb) => updateShardParent(context, bucket, node, link.name, result.node, result.cid, prefix, options, cb)
213-
], cb)
139+
shard.flush('', context.ipld, null, async (err, result) => {
140+
if (err) {
141+
return callback(err)
214142
}
215143

216-
log(`Adding or replacing file`, prefix + child.name)
217-
updateShardParent(context, bucket, node, prefix + child.name, child, child.cid, prefix + child.name, options, cb)
218-
}
219-
], callback)
144+
// we have written out the shard, but only one sub-shard will have been written so replace it in the original shard
145+
const oldLink = options.parent.links
146+
.find(link => link.name.substring(0, 2) === path[0].prefix)
147+
148+
const newLink = result.node.links
149+
.find(link => link.name.substring(0, 2) === path[0].prefix)
150+
151+
waterfall([
152+
(done) => {
153+
if (!oldLink) {
154+
return done(null, options.parent)
155+
}
156+
157+
DAGNode.rmLink(options.parent, oldLink.name, done)
158+
},
159+
(parent, done) => DAGNode.addLink(parent, newLink, done),
160+
(parent, done) => updateHamtDirectory(context, parent.links, path[0].bucket, options, done)
161+
], callback)
162+
})
163+
})
220164
}
221165

222-
const createShard = (context, contents, options, callback) => {
223-
const shard = new DirSharded({
224-
root: true,
225-
dir: true,
226-
parent: null,
227-
parentKey: null,
228-
path: '',
229-
dirty: true,
230-
flat: false,
231-
232-
...options
233-
})
166+
const addFileToShardedDirectoryy = (context, options, callback) => {
167+
const file = {
168+
name: options.name,
169+
cid: options.cid,
170+
size: options.size
171+
}
234172

235-
const operations = contents.map(contents => {
236-
return (cb) => {
237-
shard.put(contents.name, {
238-
size: contents.size,
239-
multihash: contents.multihash
240-
}, cb)
173+
// start at the root bucket and descend, loading nodes as we go
174+
recreateHamtLevel(options.parent.links, null, null, null, async (err, rootBucket) => {
175+
if (err) {
176+
return callback(err)
241177
}
242-
})
243178

244-
return series(
245-
operations,
246-
(err) => {
247-
if (err) {
248-
return callback(err)
179+
const shard = new DirSharded({
180+
root: true,
181+
dir: true,
182+
parent: null,
183+
parentKey: null,
184+
path: '',
185+
dirty: true,
186+
flat: false
187+
})
188+
shard._bucket = rootBucket
189+
190+
// load subshards until the bucket & position no longer changes
191+
const position = await rootBucket._findNewBucketAndPos(file.name)
192+
const path = toBucketPath(position)
193+
path[0].node = options.parent
194+
let index = 0
195+
196+
whilst(
197+
() => index < path.length,
198+
(next) => {
199+
let segment = path[index]
200+
index++
201+
let node = segment.node
202+
203+
let link = node.links
204+
.find(link => link.name.substring(0, 2) === segment.prefix)
205+
206+
if (!link) {
207+
// prefix is new, file will be added to the current bucket
208+
log(`Link ${segment.prefix}${file.name} will be added`)
209+
index = path.length
210+
return next(null, shard)
211+
}
212+
213+
if (link.name === `${segment.prefix}${file.name}`) {
214+
// file already existed, file will be added to the current bucket
215+
log(`Link ${segment.prefix}${file.name} will be replaced`)
216+
index = path.length
217+
return next(null, shard)
218+
}
219+
220+
if (link.name.length > 2) {
221+
// another file had the same prefix, will be replaced with a subshard
222+
log(`Link ${link.name} will be replaced with a subshard`)
223+
index = path.length
224+
return next(null, shard)
225+
}
226+
227+
// load sub-shard
228+
log(`Found subshard ${segment.prefix}`)
229+
context.ipld.get(link.cid, (err, result) => {
230+
if (err) {
231+
return next(err)
232+
}
233+
234+
// subshard hasn't been loaded, descend to the next level of the HAMT
235+
if (!path[index]) {
236+
log(`Loaded new subshard ${segment.prefix}`)
237+
const node = result.value
238+
239+
return recreateHamtLevel(node.links, rootBucket, segment.bucket, parseInt(segment.prefix, 16), async (err) => {
240+
if (err) {
241+
return next(err)
242+
}
243+
244+
const position = await rootBucket._findNewBucketAndPos(file.name)
245+
246+
path.push({
247+
bucket: position.bucket,
248+
prefix: toPrefix(position.pos),
249+
node: node
250+
})
251+
252+
return next(null, shard)
253+
})
254+
}
255+
256+
const nextSegment = path[index]
257+
258+
// add next level's worth of links to bucket
259+
addLinksToHamtBucket(result.value.links, nextSegment.bucket, rootBucket, (error) => {
260+
nextSegment.node = result.value
261+
262+
next(error, shard)
263+
})
264+
})
265+
},
266+
(err, shard) => {
267+
if (err) {
268+
return callback(err)
269+
}
270+
271+
// finally add the new file into the shard
272+
shard.put(file.name, {
273+
size: file.size,
274+
multihash: file.cid.buffer
275+
}, (err) => {
276+
callback(err, {
277+
shard, path
278+
})
279+
})
249280
}
250-
251-
shard.flush('', context.ipld, null, callback)
252-
}
253-
)
281+
)
282+
})
254283
}
255284

256-
const updateShardParent = (context, bucket, parent, name, node, cid, prefix, options, callback) => {
257-
waterfall([
258-
(done) => DAGNode.rmLink(parent, name, done),
259-
(parent, done) => DAGNode.addLink(parent, new DAGLink(prefix, node.size, cid), done),
260-
(parent, done) => updateHamtDirectory(context, parent.links, bucket, options, done)
261-
], callback)
285+
const toBucketPath = (position) => {
286+
let bucket = position.bucket
287+
let positionInBucket = position.pos
288+
let path = [{
289+
bucket,
290+
prefix: toPrefix(positionInBucket)
291+
}]
292+
293+
bucket = position.bucket._parent
294+
positionInBucket = position.bucket._posAtParent
295+
296+
while (bucket) {
297+
path.push({
298+
bucket,
299+
prefix: toPrefix(positionInBucket)
300+
})
301+
302+
positionInBucket = bucket._posAtParent
303+
bucket = bucket._parent
304+
}
305+
306+
path.reverse()
307+
308+
return path
262309
}
263310

264311
module.exports = addLink

src/core/utils/hamt-utils.js

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ const {
55
} = require('ipld-dag-pb')
66
const waterfall = require('async/waterfall')
77
const whilst = require('async/whilst')
8+
const series = require('async/series')
89
const Bucket = require('hamt-sharding/src/bucket')
910
const DirSharded = require('ipfs-unixfs-importer/src/importer/dir-sharded')
1011
const log = require('debug')('ipfs:mfs:core:utils:hamt-utils')
@@ -63,7 +64,10 @@ const addLinksToHamtBucket = (links, bucket, rootBucket, callback) => {
6364
return Promise.resolve()
6465
}
6566

66-
return (rootBucket || bucket).put(link.name.substring(2), true)
67+
return (rootBucket || bucket).put(link.name.substring(2), {
68+
size: link.size,
69+
multihash: link.cid
70+
})
6771
})
6872
)
6973
.then(() => callback(null, bucket), callback)
@@ -180,10 +184,45 @@ const generatePath = (context, fileName, rootNode, callback) => {
180184
})
181185
}
182186

187+
const createShard = (context, contents, options, callback) => {
188+
const shard = new DirSharded({
189+
root: true,
190+
dir: true,
191+
parent: null,
192+
parentKey: null,
193+
path: '',
194+
dirty: true,
195+
flat: false,
196+
197+
...options
198+
})
199+
200+
const operations = contents.map(contents => {
201+
return (cb) => {
202+
shard.put(contents.name, {
203+
size: contents.size,
204+
multihash: contents.multihash
205+
}, cb)
206+
}
207+
})
208+
209+
return series(
210+
operations,
211+
(err) => {
212+
if (err) {
213+
return callback(err)
214+
}
215+
216+
shard.flush('', context.ipld, null, callback)
217+
}
218+
)
219+
}
220+
183221
module.exports = {
184222
generatePath,
185223
updateHamtDirectory,
186224
recreateHamtLevel,
187225
addLinksToHamtBucket,
188-
toPrefix
226+
toPrefix,
227+
createShard
189228
}

0 commit comments

Comments
 (0)