diff --git a/src/contrasts.jl b/src/contrasts.jl index f4f03af8..960a8437 100644 --- a/src/contrasts.jl +++ b/src/contrasts.jl @@ -229,8 +229,13 @@ function termnames(C::AbstractContrasts, levels::AbstractVector, baseind::Intege levels[not_base] end -Base.getindex(contrasts::ContrastsMatrix, rowinds, colinds) = - getindex(contrasts.matrix, getindex.(Ref(contrasts.invindex), rowinds), colinds) +function Base.getindex(contrasts::ContrastsMatrix{C,T}, rowinds, colinds) where {C,T} + # allow rows to be missing + rows = get.(Ref(contrasts.invindex), rowinds, missing) + # create a row of nothing but missings for missing values + mrow = reduce(vcat, [missing for c in getindex(contrasts.matrix, 1, colinds)]) + vcat([r === missing ? mrow : getindex(contrasts.matrix, r, colinds) for r in rows]) +end # Making a contrast type T only requires that there be a method for # contrasts_matrix(T, baseind, n) and optionally termnames(T, levels, baseind) diff --git a/src/modelframe.jl b/src/modelframe.jl index 13c59f16..cdebcf4b 100644 --- a/src/modelframe.jl +++ b/src/modelframe.jl @@ -78,11 +78,11 @@ function ModelFrame(f::FormulaTerm, data::ColumnTable; throw(ArgumentError(msg)) end - data, _ = missing_omit(data, f) - sch = schema(f, data, contrasts) f = apply_schema(f, sch, M) - + + data, _ = missing_omit(data, f) + ModelFrame(f, sch, data, model) end diff --git a/src/schema.jl b/src/schema.jl index d05b0ad7..73daa748 100644 --- a/src/schema.jl +++ b/src/schema.jl @@ -198,6 +198,8 @@ concrete_term(t::Term, x, hint::AbstractTerm) = hint concrete_term(t, d, hint) = t concrete_term(t::Term, xs::AbstractVector{<:Number}, ::Nothing) = concrete_term(t, xs, ContinuousTerm) +# and for missing values +concrete_term(t::Term, xs::AbstractVector{Union{Missing,T}} where T<:Number, ::Nothing) = concrete_term(t, xs, ContinuousTerm) function concrete_term(t::Term, xs::AbstractVector, ::Type{ContinuousTerm}) μ, σ2 = StatsBase.mean_and_var(xs) min, max = extrema(xs) diff --git a/src/terms.jl b/src/terms.jl index f9a89c17..50e4acc4 100644 --- a/src/terms.jl +++ b/src/terms.jl @@ -506,6 +506,13 @@ lazy_modelcols(x, d) = modelcols(x, d) +# this is weird, but using import Base: copy leads to exporting type piracy +# for non missing values, the compiler should hopefully optimize down the extra +# layer of indirection +function copy end +copy(x::Any) = Base.copy(x) +copy(m::Missing) = m + modelcols(t::ContinuousTerm, d::NamedTuple) = copy.(d[t.sym]) modelcols(t::CategoricalTerm, d::NamedTuple) = t.contrasts[d[t.sym], :] diff --git a/test/terms.jl b/test/terms.jl index b58900bc..698a134c 100644 --- a/test/terms.jl +++ b/test/terms.jl @@ -31,6 +31,17 @@ StatsModels.apply_schema(mt::MultiTerm, sch::StatsModels.Schema, Mod::Type) = @test t0.min == 1.0 @test t0.max == 3.0 + vals0m = [3, missing, 1] + t0m = concrete_term(t, vals0m) + @test string(t0m) == "aaa" + @test mimestring(t0m) == "aaa(continuous)" + # compute all these values to make sure the behavior of terms matches + # the behavior of other relevant packages + @test isequal(t0m.mean, mean(vals0m)) + @test isequal(t0m.var, var(vals0m)) + @test isequal(t0m.min, min(vals0m...)) + @test isequal(t0m.max, max(vals0m...)) + t1 = concrete_term(t, [:a, :b, :c]) @test t1.contrasts isa StatsModels.ContrastsMatrix{DummyCoding} @test string(t1) == "aaa"