Skip to content

Commit 6a6194d

Browse files
committed
Merge commit 'e07c4d00c387840f70fa3701fb3a51c2a32f37b8'
* commit 'e07c4d00c387840f70fa3701fb3a51c2a32f37b8': Move guidanceScale as generation parameter (apple#46) Add brief instructions to download weights from the Hub (apple#10) Adds Negative Prompts (apple#61) Changed seed type into UInt32 (apple#47) fixes apple#77 Update README.md (apple#66) Add Filename Character Limit (apple#19) Implement DPM-Solver++ scheduler (apple#59) Fix typos: Successfully facilitate getting pipeline overridden (apple#30) Undefined name: from typing import List (apple#31) Add Availability Annotations (apple#18) README improvements and reduceMemory option in Swift
2 parents a926194 + e07c4d0 commit 6a6194d

22 files changed

+812
-178
lines changed

Package.swift

+3-3
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ import PackageDescription
66
let package = Package(
77
name: "stable-diffusion",
88
platforms: [
9-
.macOS(.v13),
10-
.iOS(.v16),
11-
],
9+
.macOS(.v11),
10+
.iOS(.v14),
11+
],
1212
products: [
1313
.library(
1414
name: "StableDiffusion",

README.md

+146-26
Large diffs are not rendered by default.

python_coreml_stable_diffusion/pipeline.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
import time
3939
import torch # Only used for `torch.from_tensor` in `pipe.scheduler.step()`
4040
from transformers import CLIPFeatureExtractor, CLIPTokenizer
41-
from typing import Union, Optional
41+
from typing import List, Optional, Union
4242

4343

4444
class CoreMLStableDiffusionPipeline(DiffusionPipeline):

python_coreml_stable_diffusion/torch2coreml.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -576,7 +576,7 @@ def convert_unet(pipe, args):
576576
# Set the output descriptions
577577
coreml_unet.output_description["noise_pred"] = \
578578
"Same shape and dtype as the `sample` input. " \
579-
"The predicted noise to faciliate the reverse diffusion (denoising) process"
579+
"The predicted noise to facilitate the reverse diffusion (denoising) process"
580580

581581
_save_mlpackage(coreml_unet, out_path)
582582
logger.info(f"Saved unet into {out_path}")

setup.py

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
"torch",
2020
"transformers",
2121
"scipy",
22+
"numpy<1.24",
2223
],
2324
packages=find_packages(),
2425
classifiers=[
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
// For licensing see accompanying LICENSE.md file.
2+
// Copyright (C) 2022 Apple Inc. and The HuggingFace Team. All Rights Reserved.
3+
4+
import Accelerate
5+
import CoreML
6+
7+
/// A scheduler used to compute a de-noised image
8+
///
9+
/// This implementation matches:
10+
/// [Hugging Face Diffusers DPMSolverMultistepScheduler](https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py)
11+
///
12+
/// It uses the DPM-Solver++ algorithm: [code](https://github.com/LuChengTHU/dpm-solver) [paper](https://arxiv.org/abs/2211.01095).
13+
/// Limitations:
14+
/// - Only implemented for DPM-Solver++ algorithm (not DPM-Solver).
15+
/// - Second order only.
16+
/// - Assumes the model predicts epsilon.
17+
/// - No dynamic thresholding.
18+
/// - `midpoint` solver algorithm.
19+
@available(iOS 16.2, macOS 13.1, *)
20+
public final class DPMSolverMultistepScheduler: Scheduler {
21+
public let trainStepCount: Int
22+
public let inferenceStepCount: Int
23+
public let betas: [Float]
24+
public let alphas: [Float]
25+
public let alphasCumProd: [Float]
26+
public let timeSteps: [Int]
27+
28+
public let alpha_t: [Float]
29+
public let sigma_t: [Float]
30+
public let lambda_t: [Float]
31+
32+
public let solverOrder = 2
33+
private(set) var lowerOrderStepped = 0
34+
35+
/// Whether to use lower-order solvers in the final steps. Only valid for less than 15 inference steps.
36+
/// We empirically find this trick can stabilize the sampling of DPM-Solver, especially with 10 or fewer steps.
37+
public let useLowerOrderFinal = true
38+
39+
// Stores solverOrder (2) items
40+
private(set) var modelOutputs: [MLShapedArray<Float32>] = []
41+
42+
/// Create a scheduler that uses a second order DPM-Solver++ algorithm.
43+
///
44+
/// - Parameters:
45+
/// - stepCount: Number of inference steps to schedule
46+
/// - trainStepCount: Number of training diffusion steps
47+
/// - betaSchedule: Method to schedule betas from betaStart to betaEnd
48+
/// - betaStart: The starting value of beta for inference
49+
/// - betaEnd: The end value for beta for inference
50+
/// - Returns: A scheduler ready for its first step
51+
public init(
52+
stepCount: Int = 50,
53+
trainStepCount: Int = 1000,
54+
betaSchedule: BetaSchedule = .scaledLinear,
55+
betaStart: Float = 0.00085,
56+
betaEnd: Float = 0.012
57+
) {
58+
self.trainStepCount = trainStepCount
59+
self.inferenceStepCount = stepCount
60+
61+
switch betaSchedule {
62+
case .linear:
63+
self.betas = linspace(betaStart, betaEnd, trainStepCount)
64+
case .scaledLinear:
65+
self.betas = linspace(pow(betaStart, 0.5), pow(betaEnd, 0.5), trainStepCount).map({ $0 * $0 })
66+
}
67+
68+
self.alphas = betas.map({ 1.0 - $0 })
69+
var alphasCumProd = self.alphas
70+
for i in 1..<alphasCumProd.count {
71+
alphasCumProd[i] *= alphasCumProd[i - 1]
72+
}
73+
self.alphasCumProd = alphasCumProd
74+
75+
// Currently we only support VP-type noise shedule
76+
self.alpha_t = vForce.sqrt(self.alphasCumProd)
77+
self.sigma_t = vForce.sqrt(vDSP.subtract([Float](repeating: 1, count: self.alphasCumProd.count), self.alphasCumProd))
78+
self.lambda_t = zip(self.alpha_t, self.sigma_t).map { α, σ in log(α) - log(σ) }
79+
80+
self.timeSteps = linspace(0, Float(self.trainStepCount-1), stepCount).reversed().map { Int(round($0)) }
81+
}
82+
83+
/// Convert the model output to the corresponding type the algorithm needs.
84+
/// This implementation is for second-order DPM-Solver++ assuming epsilon prediction.
85+
func convertModelOutput(modelOutput: MLShapedArray<Float32>, timestep: Int, sample: MLShapedArray<Float32>) -> MLShapedArray<Float32> {
86+
assert(modelOutput.scalars.count == sample.scalars.count)
87+
let (alpha_t, sigma_t) = (self.alpha_t[timestep], self.sigma_t[timestep])
88+
89+
// This could be optimized with a Metal kernel if we find we need to
90+
let x0_scalars = zip(modelOutput.scalars, sample.scalars).map { m, s in
91+
(s - m * sigma_t) / alpha_t
92+
}
93+
return MLShapedArray(scalars: x0_scalars, shape: modelOutput.shape)
94+
}
95+
96+
/// One step for the first-order DPM-Solver (equivalent to DDIM).
97+
/// See https://arxiv.org/abs/2206.00927 for the detailed derivation.
98+
/// var names and code structure mostly follow https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
99+
func firstOrderUpdate(
100+
modelOutput: MLShapedArray<Float32>,
101+
timestep: Int,
102+
prevTimestep: Int,
103+
sample: MLShapedArray<Float32>
104+
) -> MLShapedArray<Float32> {
105+
let (p_lambda_t, lambda_s) = (Double(lambda_t[prevTimestep]), Double(lambda_t[timestep]))
106+
let p_alpha_t = Double(alpha_t[prevTimestep])
107+
let (p_sigma_t, sigma_s) = (Double(sigma_t[prevTimestep]), Double(sigma_t[timestep]))
108+
let h = p_lambda_t - lambda_s
109+
// x_t = (sigma_t / sigma_s) * sample - (alpha_t * (torch.exp(-h) - 1.0)) * model_output
110+
let x_t = weightedSum(
111+
[p_sigma_t / sigma_s, -p_alpha_t * (exp(-h) - 1)],
112+
[sample, modelOutput]
113+
)
114+
return x_t
115+
}
116+
117+
/// One step for the second-order multistep DPM-Solver++ algorithm, using the midpoint method.
118+
/// var names and code structure mostly follow https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
119+
func secondOrderUpdate(
120+
modelOutputs: [MLShapedArray<Float32>],
121+
timesteps: [Int],
122+
prevTimestep t: Int,
123+
sample: MLShapedArray<Float32>
124+
) -> MLShapedArray<Float32> {
125+
let (s0, s1) = (timesteps[back: 1], timesteps[back: 2])
126+
let (m0, m1) = (modelOutputs[back: 1], modelOutputs[back: 2])
127+
let (p_lambda_t, lambda_s0, lambda_s1) = (Double(lambda_t[t]), Double(lambda_t[s0]), Double(lambda_t[s1]))
128+
let p_alpha_t = Double(alpha_t[t])
129+
let (p_sigma_t, sigma_s0) = (Double(sigma_t[t]), Double(sigma_t[s0]))
130+
let (h, h_0) = (p_lambda_t - lambda_s0, lambda_s0 - lambda_s1)
131+
let r0 = h_0 / h
132+
let D0 = m0
133+
134+
// D1 = (1.0 / r0) * (m0 - m1)
135+
let D1 = weightedSum(
136+
[1/r0, -1/r0],
137+
[m0, m1]
138+
)
139+
140+
// See https://arxiv.org/abs/2211.01095 for detailed derivations
141+
// x_t = (
142+
// (sigma_t / sigma_s0) * sample
143+
// - (alpha_t * (torch.exp(-h) - 1.0)) * D0
144+
// - 0.5 * (alpha_t * (torch.exp(-h) - 1.0)) * D1
145+
// )
146+
let x_t = weightedSum(
147+
[p_sigma_t/sigma_s0, -p_alpha_t * (exp(-h) - 1), -0.5 * p_alpha_t * (exp(-h) - 1)],
148+
[sample, D0, D1]
149+
)
150+
return x_t
151+
}
152+
153+
public func step(output: MLShapedArray<Float32>, timeStep t: Int, sample: MLShapedArray<Float32>) -> MLShapedArray<Float32> {
154+
let stepIndex = timeSteps.firstIndex(of: t) ?? timeSteps.count - 1
155+
let prevTimestep = stepIndex == timeSteps.count - 1 ? 0 : timeSteps[stepIndex + 1]
156+
157+
let lowerOrderFinal = useLowerOrderFinal && stepIndex == timeSteps.count - 1 && timeSteps.count < 15
158+
let lowerOrderSecond = useLowerOrderFinal && stepIndex == timeSteps.count - 2 && timeSteps.count < 15
159+
let lowerOrder = lowerOrderStepped < 1 || lowerOrderFinal || lowerOrderSecond
160+
161+
let modelOutput = convertModelOutput(modelOutput: output, timestep: t, sample: sample)
162+
if modelOutputs.count == solverOrder { modelOutputs.removeFirst() }
163+
modelOutputs.append(modelOutput)
164+
165+
let prevSample: MLShapedArray<Float32>
166+
if lowerOrder {
167+
prevSample = firstOrderUpdate(modelOutput: modelOutput, timestep: t, prevTimestep: prevTimestep, sample: sample)
168+
} else {
169+
prevSample = secondOrderUpdate(
170+
modelOutputs: modelOutputs,
171+
timesteps: [timeSteps[stepIndex - 1], t],
172+
prevTimestep: prevTimestep,
173+
sample: sample
174+
)
175+
}
176+
if lowerOrderStepped < solverOrder {
177+
lowerOrderStepped += 1
178+
}
179+
180+
return prevSample
181+
}
182+
}

swift/StableDiffusion/pipeline/Decoder.swift

+24-10
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,31 @@ import CoreML
66
import Accelerate
77

88
/// A decoder model which produces RGB images from latent samples
9-
public struct Decoder {
9+
@available(iOS 16.2, macOS 13.1, *)
10+
public struct Decoder: ResourceManaging {
1011

1112
/// VAE decoder model
12-
var model: MLModel
13+
var model: ManagedMLModel
1314

1415
/// Create decoder from Core ML model
1516
///
16-
/// - Parameters
17-
/// - model: Core ML model for VAE decoder
18-
public init(model: MLModel) {
19-
self.model = model
17+
/// - Parameters:
18+
/// - url: Location of compiled VAE decoder Core ML model
19+
/// - configuration: configuration to be used when the model is loaded
20+
/// - Returns: A decoder that will lazily load its required resources when needed or requested
21+
public init(modelAt url: URL, configuration: MLModelConfiguration) {
22+
self.model = ManagedMLModel(modelAt: url, configuration: configuration)
2023
}
2124

22-
/// Prediction queue
23-
let queue = DispatchQueue(label: "decoder.predict")
25+
/// Ensure the model has been loaded into memory
26+
public func loadResources() throws {
27+
try model.loadResources()
28+
}
29+
30+
/// Unload the underlying model to free up memory
31+
public func unloadResources() {
32+
model.unloadResources()
33+
}
2434

2535
/// Batch decode latent samples into images
2636
///
@@ -42,7 +52,9 @@ public struct Decoder {
4252
let batch = MLArrayBatchProvider(array: inputs)
4353

4454
// Batch predict with model
45-
let results = try queue.sync { try model.predictions(fromBatch: batch) }
55+
let results = try model.perform { model in
56+
try model.predictions(fromBatch: batch)
57+
}
4658

4759
// Transform the outputs to CGImages
4860
let images: [CGImage] = (0..<results.count).map { i in
@@ -57,7 +69,9 @@ public struct Decoder {
5769
}
5870

5971
var inputName: String {
60-
model.modelDescription.inputDescriptionsByName.first!.key
72+
try! model.perform { model in
73+
model.modelDescription.inputDescriptionsByName.first!.key
74+
}
6175
}
6276

6377
typealias PixelBufferPFx1 = vImage.PixelBuffer<vImage.PlanarF>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
// For licensing see accompanying LICENSE.md file.
2+
// Copyright (C) 2022 Apple Inc. All Rights Reserved.
3+
4+
import CoreML
5+
6+
/// A class to manage and gate access to a Core ML model
7+
///
8+
/// It will automatically load a model into memory when needed or requested
9+
/// It allows one to request to unload the model from memory
10+
@available(iOS 16.2, macOS 13.1, *)
11+
public final class ManagedMLModel: ResourceManaging {
12+
13+
/// The location of the model
14+
var modelURL: URL
15+
16+
/// The configuration to be used when the model is loaded
17+
var configuration: MLModelConfiguration
18+
19+
/// The loaded model (when loaded)
20+
var loadedModel: MLModel?
21+
22+
/// Queue to protect access to loaded model
23+
var queue: DispatchQueue
24+
25+
/// Create a managed model given its location and desired loaded configuration
26+
///
27+
/// - Parameters:
28+
/// - url: The location of the model
29+
/// - configuration: The configuration to be used when the model is loaded/used
30+
/// - Returns: A managed model that has not been loaded
31+
public init(modelAt url: URL, configuration: MLModelConfiguration) {
32+
self.modelURL = url
33+
self.configuration = configuration
34+
self.loadedModel = nil
35+
self.queue = DispatchQueue(label: "managed.\(url.lastPathComponent)")
36+
}
37+
38+
/// Instantiation and load model into memory
39+
public func loadResources() throws {
40+
try queue.sync {
41+
try loadModel()
42+
}
43+
}
44+
45+
/// Unload the model if it was loaded
46+
public func unloadResources() {
47+
queue.sync {
48+
loadedModel = nil
49+
}
50+
}
51+
52+
/// Perform an operation with the managed model via a supplied closure.
53+
/// The model will be loaded and supplied to the closure and should only be
54+
/// used within the closure to ensure all resource management is synchronized
55+
///
56+
/// - Parameters:
57+
/// - body: Closure which performs and action on a loaded model
58+
/// - Returns: The result of the closure
59+
/// - Throws: An error if the model cannot be loaded or if the closure throws
60+
public func perform<R>(_ body: (MLModel) throws -> R) throws -> R {
61+
return try queue.sync {
62+
try autoreleasepool {
63+
try loadModel()
64+
return try body(loadedModel!)
65+
}
66+
}
67+
}
68+
69+
private func loadModel() throws {
70+
if loadedModel == nil {
71+
loadedModel = try MLModel(contentsOf: modelURL,
72+
configuration: configuration)
73+
}
74+
}
75+
76+
77+
}

swift/StableDiffusion/pipeline/Random.swift

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import CoreML
99
/// This implementation matches:
1010
/// [NumPy's older randomkit.c](https://github.com/numpy/numpy/blob/v1.0/numpy/random/mtrand/randomkit.c)
1111
///
12+
@available(iOS 16.2, macOS 13.1, *)
1213
struct NumPyRandomSource: RandomNumberGenerator {
1314

1415
struct State {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// For licensing see accompanying LICENSE.md file.
2+
// Copyright (C) 2022 Apple Inc. All Rights Reserved.
3+
4+
/// Protocol for managing internal resources
5+
public protocol ResourceManaging {
6+
7+
/// Request resources to be loaded and ready if possible
8+
func loadResources() throws
9+
10+
/// Request resources are unloaded / remove from memory if possible
11+
func unloadResources()
12+
}
13+
14+
extension ResourceManaging {
15+
/// Request resources are pre-warmed by loading and unloading
16+
func prewarmResources() throws {
17+
try loadResources()
18+
unloadResources()
19+
}
20+
}

0 commit comments

Comments
 (0)