TuringLang · cpfiffer · May 23, 2020 · Apr 23, 2020 · Apr 23, 2020 · Apr 23, 2020
diff --git a/Project.toml b/Project.toml
@@ -17,6 +17,7 @@ Libtask = "6f1fad26-d15e-5dc8-ae53-837a1d7b8c9f"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LogDensityProblems = "6fdf6af0-433a-55f7-b3ed-c6c6e0b8df7c"
 MCMCChains = "c7f686f2-ff18-58e9-bc7b-31028e88f75d"
+Optim = "429524aa-4258-5aef-a3af-852621145aeb"
 ProgressLogging = "33c8b6b6-d38a-422a-b730-caa89a2f386c"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
@@ -41,6 +42,7 @@ ForwardDiff = "0.10.3"
 Libtask = "0.3.1"
 LogDensityProblems = "^0.9, 0.10"
 MCMCChains = "3.0.7"
+Optim = "0.20"
 ProgressLogging = "0.1"
 Reexport = "0.2.0"
 Requires = "0.5, 1.0"

diff --git a/src/Turing.jl b/src/Turing.jl
@@ -36,6 +36,8 @@ include("inference/Inference.jl")  # inference algorithms
 using .Inference
 include("variational/VariationalInference.jl")
 using .Variational
+include("modes/ModeEstimation.jl")
+using .ModeEstimation
 
 # TODO: re-design `sample` interface in MCMCChains, which unify CmdStan and Turing.
 #   Related: https://github.com/TuringLang/Turing.jl/issues/746
@@ -85,7 +87,7 @@ export  @model,                 # modelling
         CSMC,
         PG,
 
-        vi,                    # variational inference
+        vi,                     # variational inference
         ADVI,
 
         sample,                 # inference
@@ -108,5 +110,8 @@ export  @model,                 # modelling
         LogPoisson,
         NamedDist,
         filldist,
-        arraydist
+        arraydist,
+
+        MLE,                    # mode estimation tools
+        MAP
 end
diff --git a/src/inference/Inference.jl b/src/inference/Inference.jl
@@ -265,18 +265,20 @@ end
 # Chain making utilities #
 ##########################
 
-function _params_to_array(ts::Vector, spl::Sampler)
-    names_set = Set{String}()
+function _params_to_array(ts::Vector)
+    names = Vector{String}()
     # Extract the parameter names and values from each transition.
     dicts = map(ts) do t
         nms, vs = flatten_namedtuple(t.θ)
         for nm in nms
-            push!(names_set, nm)
+            if !(nm in names)
+                push!(names, nm)
+            end
         end
         # Convert the names and values to a single dictionary.
         return Dict(nms[j] => vs[j] for j in 1:length(vs))
     end
-    names = collect(names_set)
+    # names = collect(names_set)
     vals = [get(dicts[i], key, missing) for i in eachindex(dicts), 
         (j, key) in enumerate(names)]
 
@@ -356,7 +358,7 @@ function AbstractMCMC.bundle_samples(
 
     # Convert transitions to array format.
     # Also retrieve the variable names.
-    nms, vals = _params_to_array(ts, spl)
+    nms, vals = _params_to_array(ts)
 
     # Get the values of the extra parameters in each Transition struct.
     extra_params, extra_values = get_transition_extras(ts)

diff --git a/src/modes/ModeEstimation.jl b/src/modes/ModeEstimation.jl
@@ -0,0 +1,235 @@
+module ModeEstimation
+
+using ..Turing
+using ..Bijectors
+using LinearAlgebra
+
+import ..DynamicPPL
+import Optim
+import NamedArrays
+import ..ForwardDiff
+
+export MAP, MLE
+
+"""
+    ModeResult{
+        V<:NamedArrays.NamedArray, 
+        M<:NamedArrays.NamedArray, 
+        O<:Optim.MultivariateOptimizationResults, 
+        S<:NamedArrays.NamedArray
+    }
+
+A wrapper struct to store various results from a MAP or MLE estimation.
+
+Fields:
+
+- `values` is a vector with the resulting point estimates
+- `info_matrix` is the inverse Hessian
+- `optim_result` is the stored Optim.jl results
+- `summary_table` is a summary table with parameters, standard errors, and 
+  t-statistics computed from the information matrix.
+- `lp` is the final likelihood.
+"""
+struct ModeResult{
+    V<:NamedArrays.NamedArray, 
+    M<:Union{Missing, NamedArrays.NamedArray}, 
+    O<:Optim.MultivariateOptimizationResults, 
+    S<:NamedArrays.NamedArray
+}
+    values :: V
+    info_matrix :: M
+    optim_result :: O
+    summary_table :: S
+    lp :: Float64
+end
+
+function Base.show(io::IO, m::ModeResult)
+    show(io, m.summary_table)
+end
+
+"""
+    make_logjoint(model::DynamicPPL.Model, ctx::DynamicPPL.AbstractContext)
+
+Constructs a log density function that accepts a vector `z` and returns 
+a tuple (-likelihood, `varinfo`). The model is run using the provided 
+context `ctx`.
+"""
+function make_logjoint(model::DynamicPPL.Model, ctx::DynamicPPL.AbstractContext)
+    # setup
+    varinfo_init = Turing.VarInfo(model)
+    spl = DynamicPPL.SampleFromPrior()    
+    DynamicPPL.link!(varinfo_init, spl)
+
+    function logπ(z; unlinked = false)
+        varinfo = DynamicPPL.VarInfo(varinfo_init, spl, z)
+
+        unlinked && DynamicPPL.invlink!(varinfo_init, spl)
+        model(varinfo, spl, ctx)
+        unlinked && DynamicPPL.link!(varinfo_init, spl)
+
+        return -DynamicPPL.getlogp(varinfo)
+    end
+
+    return logπ
+end
+
+"""
+    mode_estimation(
+        model::DynamicPPL.Model, 
+        lpf; 
+        optim_options=Optim.Options(),
+        kwargs...
+    )
+
+An internal function that handles the computation of a MLE or MAP estimate.
+
+Arguments: 
+
+- `model` is a `DynamicPPL.Model`.
+- `lpf` is a function returned by `make_logjoint`.
+
+Optional arguments:
+
+- `optim_options` is a `Optim.Options` struct that allows you to change the number
+  of iterations run in an MLE estimate.
+
+"""
+function mode_estimation(
+    model::DynamicPPL.Model, 
+    lpf; 
+    optim_options=Optim.Options(),
+    kwargs...
+)
+    # Do some initialization.
+    b = bijector(model)
+    binv = inv(b)
+
+    spl = DynamicPPL.SampleFromPrior()
+    vi = DynamicPPL.VarInfo(model)
+    init_params = model(vi, spl)
+    init_vals = vi[spl]
+
+    # Construct target function.
+    target(x) = lpf(x)
+    hess_target(x) = lpf(x; unlinked=true)
+
+    # Optimize!
+    M = Optim.optimize(target, init_vals, optim_options)
+
+    # Retrieve the estimated values.
+    vals = binv(M.minimizer)
+
+    # Get the VarInfo at the MLE/MAP point, and run the model to ensure 
+    # correct dimensionality.
+    vi[spl] = vals
+    model(vi) # XXX: Is this a necessary step?
+
+    # Make one transition to get the parameter names.
+    ts = [Turing.Inference.Transition(DynamicPPL.tonamedtuple(vi), DynamicPPL.getlogp(vi))]
+    varnames, _ = Turing.Inference._params_to_array(ts)
+
+    # Store the parameters and their names in an array.
+    vmat = NamedArrays.NamedArray(vals, varnames)
+
+    # Try to generate the information matrix.
+    try
+        # Calculate Hessian and information matrix.
+        info = ForwardDiff.hessian(hess_target, vals)
+        info = inv(info)
+        mat = NamedArrays.NamedArray(info, (varnames, varnames))
+
+        # Create the standard errors.
+        ses = sqrt.(diag(info))
+
+        # Calculate t-stats.
+        tstat = vals ./ ses
+
+        # Make a summary table.
+        stable = NamedArrays.NamedArray(
+            [vals ses tstat], 
+            (varnames, ["parameter", "std_err", "tstat"]))
+
+        # Return a wrapped-up table.
+        return ModeResult(vmat, mat, M, stable, M.minimum)
+    catch err
+        @warn "Could not compute Hessian matrix" err
+        stable = NamedArrays.NamedArray([vals repeat([missing], length(vals)) repeat([missing], length(vals))], (varnames, ["parameter", "std_err", "tstat"]))
+        return ModeResult(vmat, missing, M, stable, M.minimum)
+    end
+end
+
+"""
+    MLE(model::DynamicPPL.Model; kwargs...)
+
+Returns a maximum likelihood estimate of the given `model`.
+
+Arguments: 
+
+- `model` is a `DynamicPPL.Model`.
+
+Keyword arguments:
+
+- `optim_options` is a `Optim.Options` struct that allows you to change the number
+  of iterations run in an MLE estimate.
+
+Usage:
+
+```julia
+using Turing
+
+@model function f()
+    m ~ Normal(0, 1)
+    1.5 ~ Normal(m, 1)
+    2.0 ~ Normal(m, 1)
+end
+
+model = f()
+mle_estimate = MLE(model)
+
+# Manually setting the optimizers settings.
+mle_estimate = MLE(model, optim_options=Optim.Options(iterations=500))
+```
+"""
+function MLE(model::DynamicPPL.Model; kwargs...)
+    lpf = make_logjoint(model, DynamicPPL.LikelihoodContext())
+    return mode_estimation(model, lpf; kwargs...)
+end
+
+"""
+    MAP(model::DynamicPPL.Model; kwargs...)
+
+Returns the maximum a posteriori estimate of the given `model`.
+
+Arguments: 
+
+- `model` is a `DynamicPPL.Model`.
+
+Keyword arguments:
+
+- `optim_options` is a `Optim.Options` struct that allows you to change the number
+  of iterations run in an MLE estimate.
+
+Usage:
+
+```julia
+using Turing
+
+@model function f()
+    m ~ Normal(0, 1)
+    1.5 ~ Normal(m, 1)
+    2.0 ~ Normal(m, 1)
+end
+
+model = f()
+mle_estimate = MAP(model)
+
+# Manually setting the optimizers settings.
+mle_estimate = MAP(model, optim_options=Optim.Options(iterations=500))
+```
+"""
+function MAP(model::DynamicPPL.Model; kwargs...)
+    lpf = make_logjoint(model, DynamicPPL.DefaultContext())
+    return mode_estimation(model, lpf; kwargs...)
+end
+
+end #module