Skip to content

Commit 94c1b30

Browse files
authored
[Multipart] Add the frame -> bytes serializer (#73)
### Motivation Second state machine, doing the inverse of #72, serializes frames into bytes. ### Modifications - A new state machine. - A new serializer wrapping the state machine. - An async sequence wrapping the serializer. ### Result We can now serialize multipart frames into bytes. ### Test Plan Unit tests for all 3 layers.
1 parent ce4fc05 commit 94c1b30

8 files changed

+513
-13
lines changed

Sources/OpenAPIRuntime/Multipart/ByteUtilities.swift

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,12 @@ enum ASCII {
3636
/// Two dash characters.
3737
static let dashes: [UInt8] = [dash, dash]
3838

39-
/// The `<CR>` character follow by the `<LF>` character.
39+
/// The `<CR>` character followed by the `<LF>` character.
4040
static let crlf: [UInt8] = [cr, lf]
4141

42+
/// The colon character followed by the space character.
43+
static let colonSpace: [UInt8] = [colon, space]
44+
4245
/// The characters that represent optional whitespace (OWS).
4346
static let optionalWhitespace: Set<UInt8> = [space, tab]
4447

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the SwiftOpenAPIGenerator open source project
4+
//
5+
// Copyright (c) 2023 Apple Inc. and the SwiftOpenAPIGenerator project authors
6+
// Licensed under Apache License v2.0
7+
//
8+
// See LICENSE.txt for license information
9+
// See CONTRIBUTORS.txt for the list of SwiftOpenAPIGenerator project authors
10+
//
11+
// SPDX-License-Identifier: Apache-2.0
12+
//
13+
//===----------------------------------------------------------------------===//
14+
15+
import HTTPTypes
16+
17+
/// A sequence that serializes multipart frames into bytes.
18+
struct MultipartFramesToBytesSequence<Upstream: AsyncSequence & Sendable>: Sendable
19+
where Upstream.Element == MultipartFrame {
20+
21+
/// The source of multipart frames.
22+
var upstream: Upstream
23+
24+
/// The boundary string used to separate multipart parts.
25+
var boundary: String
26+
}
27+
28+
extension MultipartFramesToBytesSequence: AsyncSequence {
29+
30+
/// The type of element produced by this asynchronous sequence.
31+
typealias Element = ArraySlice<UInt8>
32+
33+
/// Creates the asynchronous iterator that produces elements of this
34+
/// asynchronous sequence.
35+
///
36+
/// - Returns: An instance of the `AsyncIterator` type used to produce
37+
/// elements of the asynchronous sequence.
38+
func makeAsyncIterator() -> Iterator<Upstream.AsyncIterator> {
39+
Iterator(upstream: upstream.makeAsyncIterator(), boundary: boundary)
40+
}
41+
42+
/// An iterator that pulls frames from the upstream iterator and provides
43+
/// serialized byte chunks.
44+
struct Iterator<UpstreamIterator: AsyncIteratorProtocol>: AsyncIteratorProtocol
45+
where UpstreamIterator.Element == MultipartFrame {
46+
47+
/// The iterator that provides the multipart frames.
48+
private var upstream: UpstreamIterator
49+
50+
/// The multipart frame serializer.
51+
private var serializer: MultipartSerializer
52+
53+
/// Creates a new iterator from the provided source of frames and a boundary string.
54+
/// - Parameters:
55+
/// - upstream: The iterator that provides the multipart frames.
56+
/// - boundary: The boundary separating the multipart parts.
57+
init(upstream: UpstreamIterator, boundary: String) {
58+
self.upstream = upstream
59+
self.serializer = .init(boundary: boundary)
60+
}
61+
62+
/// Asynchronously advances to the next element and returns it, or ends the
63+
/// sequence if there is no next element.
64+
///
65+
/// - Returns: The next element, if it exists, or `nil` to signal the end of
66+
/// the sequence.
67+
mutating func next() async throws -> ArraySlice<UInt8>? {
68+
try await serializer.next { try await upstream.next() }
69+
}
70+
}
71+
}

Sources/OpenAPIRuntime/Multipart/MultipartParser.swift

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import Foundation
1616
import HTTPTypes
1717

18-
/// A parser of mutlipart frames from bytes.
18+
/// A parser of multipart frames from bytes.
1919
struct MultipartParser {
2020

2121
/// The underlying state machine.
Lines changed: 260 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,260 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// This source file is part of the SwiftOpenAPIGenerator open source project
4+
//
5+
// Copyright (c) 2023 Apple Inc. and the SwiftOpenAPIGenerator project authors
6+
// Licensed under Apache License v2.0
7+
//
8+
// See LICENSE.txt for license information
9+
// See CONTRIBUTORS.txt for the list of SwiftOpenAPIGenerator project authors
10+
//
11+
// SPDX-License-Identifier: Apache-2.0
12+
//
13+
//===----------------------------------------------------------------------===//
14+
15+
import Foundation
16+
import HTTPTypes
17+
18+
/// A serializer of multipart frames into bytes.
19+
struct MultipartSerializer {
20+
21+
/// The boundary that separates parts.
22+
private let boundary: ArraySlice<UInt8>
23+
24+
/// The underlying state machine.
25+
private var stateMachine: StateMachine
26+
27+
/// The buffer of bytes ready to be written out.
28+
private var outBuffer: [UInt8]
29+
30+
/// Creates a new serializer.
31+
/// - Parameter boundary: The boundary that separates parts.
32+
init(boundary: String) {
33+
self.boundary = ArraySlice(boundary.utf8)
34+
self.stateMachine = .init()
35+
self.outBuffer = []
36+
}
37+
/// Requests the next byte chunk.
38+
/// - Parameter fetchFrame: A closure that is called when the serializer is ready to serialize the next frame.
39+
/// - Returns: A byte chunk.
40+
/// - Throws: When a serialization error is encountered.
41+
mutating func next(_ fetchFrame: () async throws -> MultipartFrame?) async throws -> ArraySlice<UInt8>? {
42+
43+
func flushedBytes() -> ArraySlice<UInt8> {
44+
let outChunk = ArraySlice(outBuffer)
45+
outBuffer.removeAll(keepingCapacity: true)
46+
return outChunk
47+
}
48+
49+
while true {
50+
switch stateMachine.next() {
51+
case .returnNil: return nil
52+
case .emitStart:
53+
emitStart()
54+
return flushedBytes()
55+
case .needsMore:
56+
let frame = try await fetchFrame()
57+
switch stateMachine.receivedFrame(frame) {
58+
case .returnNil: return nil
59+
case .emitEvents(let events):
60+
for event in events {
61+
switch event {
62+
case .headerFields(let headerFields): emitHeaders(headerFields)
63+
case .bodyChunk(let chunk): emitBodyChunk(chunk)
64+
case .endOfPart: emitEndOfPart()
65+
case .start: emitStart()
66+
case .end: emitEnd()
67+
}
68+
}
69+
return flushedBytes()
70+
case .emitError(let error): throw SerializerError(error: error)
71+
}
72+
}
73+
}
74+
}
75+
}
76+
77+
extension MultipartSerializer {
78+
79+
/// An error thrown by the serializer.
80+
struct SerializerError: Swift.Error, CustomStringConvertible, LocalizedError {
81+
82+
/// The underlying error emitted by the state machine.
83+
var error: StateMachine.ActionError
84+
85+
var description: String {
86+
switch error {
87+
case .noHeaderFieldsAtStart: return "No header fields found at the start of the multipart body."
88+
}
89+
}
90+
91+
var errorDescription: String? { description }
92+
}
93+
}
94+
95+
extension MultipartSerializer {
96+
97+
/// Writes the provided header fields into the buffer.
98+
/// - Parameter headerFields: The header fields to serialize.
99+
private mutating func emitHeaders(_ headerFields: HTTPFields) {
100+
outBuffer.append(contentsOf: ASCII.crlf)
101+
let sortedHeaders = headerFields.sorted { a, b in a.name.canonicalName < b.name.canonicalName }
102+
for headerField in sortedHeaders {
103+
outBuffer.append(contentsOf: headerField.name.canonicalName.utf8)
104+
outBuffer.append(contentsOf: ASCII.colonSpace)
105+
outBuffer.append(contentsOf: headerField.value.utf8)
106+
outBuffer.append(contentsOf: ASCII.crlf)
107+
}
108+
outBuffer.append(contentsOf: ASCII.crlf)
109+
}
110+
111+
/// Writes the part body chunk into the buffer.
112+
/// - Parameter bodyChunk: The body chunk to write.
113+
private mutating func emitBodyChunk(_ bodyChunk: ArraySlice<UInt8>) { outBuffer.append(contentsOf: bodyChunk) }
114+
115+
/// Writes an end of part boundary into the buffer.
116+
private mutating func emitEndOfPart() {
117+
outBuffer.append(contentsOf: ASCII.crlf)
118+
outBuffer.append(contentsOf: ASCII.dashes)
119+
outBuffer.append(contentsOf: boundary)
120+
}
121+
122+
/// Writes the start boundary into the buffer.
123+
private mutating func emitStart() {
124+
outBuffer.append(contentsOf: ASCII.dashes)
125+
outBuffer.append(contentsOf: boundary)
126+
}
127+
128+
/// Writes the end double dash to the buffer.
129+
private mutating func emitEnd() {
130+
outBuffer.append(contentsOf: ASCII.dashes)
131+
outBuffer.append(contentsOf: ASCII.crlf)
132+
outBuffer.append(contentsOf: ASCII.crlf)
133+
}
134+
}
135+
136+
extension MultipartSerializer {
137+
138+
/// A state machine representing the multipart frame serializer.
139+
struct StateMachine {
140+
141+
/// The possible states of the state machine.
142+
enum State: Hashable {
143+
144+
/// Has not yet written any bytes.
145+
case initial
146+
147+
/// Emitted start, but no frames yet.
148+
case emittedStart
149+
150+
/// Finished, the terminal state.
151+
case finished
152+
153+
/// Last emitted a header fields frame.
154+
case emittedHeaders
155+
156+
/// Last emitted a part body chunk frame.
157+
case emittedBodyChunk
158+
}
159+
160+
/// The current state of the state machine.
161+
private(set) var state: State
162+
163+
/// Creates a new state machine.
164+
init() { self.state = .initial }
165+
166+
/// An error returned by the state machine.
167+
enum ActionError: Hashable {
168+
169+
/// The first frame from upstream was not a header fields frame.
170+
case noHeaderFieldsAtStart
171+
}
172+
173+
/// An action returned by the `next` method.
174+
enum NextAction: Hashable {
175+
176+
/// Return nil to the caller, no more bytes.
177+
case returnNil
178+
179+
/// Emit the initial boundary.
180+
case emitStart
181+
182+
/// Ready for the next frame.
183+
case needsMore
184+
}
185+
186+
/// Read the next byte chunk serialized from upstream frames.
187+
/// - Returns: An action to perform.
188+
mutating func next() -> NextAction {
189+
switch state {
190+
case .initial:
191+
state = .emittedStart
192+
return .emitStart
193+
case .finished: return .returnNil
194+
case .emittedStart, .emittedHeaders, .emittedBodyChunk: return .needsMore
195+
}
196+
}
197+
198+
/// An event to serialize to bytes.
199+
enum Event: Hashable {
200+
201+
/// The header fields of a part.
202+
case headerFields(HTTPFields)
203+
204+
/// A byte chunk of a part.
205+
case bodyChunk(ArraySlice<UInt8>)
206+
207+
/// A boundary between parts.
208+
case endOfPart
209+
210+
/// The initial boundary.
211+
case start
212+
213+
/// The final dashes.
214+
case end
215+
}
216+
217+
/// An action returned by the `receivedFrame` method.
218+
enum ReceivedFrameAction: Hashable {
219+
220+
/// Return nil to the caller, no more bytes.
221+
case returnNil
222+
223+
/// Write the provided events as bytes.
224+
case emitEvents([Event])
225+
226+
/// Throw the provided error.
227+
case emitError(ActionError)
228+
}
229+
230+
/// Ingest the provided frame.
231+
/// - Parameter frame: A new frame. If `nil`, then the source of frames is finished.
232+
/// - Returns: An action to perform.
233+
mutating func receivedFrame(_ frame: MultipartFrame?) -> ReceivedFrameAction {
234+
switch state {
235+
case .initial: preconditionFailure("Invalid state: \(state)")
236+
case .finished: return .returnNil
237+
case .emittedStart, .emittedHeaders, .emittedBodyChunk: break
238+
}
239+
switch (state, frame) {
240+
case (.initial, _), (.finished, _): preconditionFailure("Already handled above.")
241+
case (_, .none):
242+
state = .finished
243+
return .emitEvents([.endOfPart, .end])
244+
case (.emittedStart, .headerFields(let headerFields)):
245+
state = .emittedHeaders
246+
return .emitEvents([.headerFields(headerFields)])
247+
case (.emittedStart, .bodyChunk):
248+
state = .finished
249+
return .emitError(.noHeaderFieldsAtStart)
250+
case (.emittedHeaders, .headerFields(let headerFields)),
251+
(.emittedBodyChunk, .headerFields(let headerFields)):
252+
state = .emittedHeaders
253+
return .emitEvents([.endOfPart, .headerFields(headerFields)])
254+
case (.emittedHeaders, .bodyChunk(let bodyChunk)), (.emittedBodyChunk, .bodyChunk(let bodyChunk)):
255+
state = .emittedBodyChunk
256+
return .emitEvents([.bodyChunk(bodyChunk)])
257+
}
258+
}
259+
}
260+
}

Tests/OpenAPIRuntimeTests/Multipart/Test_MultipartBytesToFramesSequence.swift

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,20 +17,12 @@ import Foundation
1717

1818
final class Test_MultipartBytesToFramesSequence: Test_Runtime {
1919
func test() async throws {
20-
var chunk = chunkFromStringLines([
20+
let chunk = chunkFromStringLines([
2121
"--__abcd__", #"Content-Disposition: form-data; name="name""#, "", "24", "--__abcd__",
2222
#"Content-Disposition: form-data; name="info""#, "", "{}", "--__abcd__--",
2323
])
24-
let next: () async throws -> ArraySlice<UInt8>? = {
25-
if let first = chunk.first {
26-
let out: ArraySlice<UInt8> = [first]
27-
chunk = chunk.dropFirst()
28-
return out
29-
} else {
30-
return nil
31-
}
32-
}
33-
let upstream = HTTPBody(AsyncThrowingStream(unfolding: next), length: .unknown, iterationBehavior: .single)
24+
var iterator = chunk.makeIterator()
25+
let upstream = AsyncStream { iterator.next().map { ArraySlice([$0]) } }
3426
let sequence = MultipartBytesToFramesSequence(upstream: upstream, boundary: "__abcd__")
3527
var frames: [MultipartFrame] = []
3628
for try await frame in sequence { frames.append(frame) }

0 commit comments

Comments
 (0)