#==========
Julia の修行をするときに,いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。
多次元分布の平均値の差の検定(ウィルクスのΛ)
http://aoki2.si.gunma-u.ac.jp/R/wilks.html
ファイル名: wilks.jl 関数名: wilks
翻訳するときに書いたメモ
reduce(+, vars, dims=3) の結果も 3 次元(n x n x 1 だけど)とは?
ちょっと気持ち悪いので,単純に結果を累積するようにプログラムを書き換える。
==========#
using Statistics, Rmath, LinearAlgebra
function wilks(dat, g)
nv = size(dat, 2)
index, gcase = table(g)
n = sum(gcase)
k = length(gcase)
w = zeros(nv, nv)
for (i, gvar) in enumerate(index)
z = dat[g .== gvar, :]
w += cov(z) * (size(z, 1) - 1)
end
t = cov(dat) * (n - 1)
Λ = det(w) / det(t)
sl = sqrt(Λ)
if nv == 2
f = (1 - sl) * (n - k - 1) / sl / (k - 1)
df1 = 2 * (k - 1)
df2 = 2 * (n - k - 1)
p = pf(f, df1, df2, false)
println("F = $f, df1 = $df1, df2 = $df2, p vallue = $p")
Dict(:Fvalue => f, :df1 => df1, :df2 => df2, :pvalue => p)
elseif k == 2
f = (1 - Λ) * (n - k - nv + 1) / Λ / nv
df2 = n - k - nv + 1
p = pf(f, nv, df2, false)
println("F = $f, df1 = $nv, df2 = $df2, p vallue = $p")
Dict(:Fvalue => f, :df1 => nv, :df2 => df2, :pvalue => p)
elseif k == 3
f = (1 - sl) * (n - k - nv + 1) / sl / nv
df1 = 2 * nv
df2 = 2 * (n - k - nv + 1)
p = pf(f, df1, df2, false)
println("F = $f, df1 = $df1, df2 = $df2, p vallue = $p")
Dict(:Fvalue => f, :df1 => df1, :df2 => df2, :pvalue => p)
else
chisq = ((nv + k) / 2 - n + 1) * log(Λ)
df = nv * (k - 1)
p = pchisq(chisq, df, false)
println("chisq. = $chisq, df = $df, p vallue = $p")
Dict(:chisq => chisq, :df => df, :pvalue => p)
end
end
function table(x) # indices が少ないとき
indices = sort(unique(x))
counts = zeros(Int, length(indices))
for i in indexin(x, indices)
counts[i] += 1
end
return indices, counts
end
dat0 = [1 2.9 161.7 120.8
1 2.3 114.8 85.2
1 2 128.4 92
1 3.2 149.2 97.3
1 2.7 126 81.1
1 4.4 133.8 107.6
1 4.1 161.3 114
1 2.1 111.5 77.3
2 4.8 198.7 172.9
2 3.6 199.3 157.9
2 2 188.4 152.7
2 4.9 183.6 164.2
2 3.9 173.5 172.2
2 4.4 184.9 163.2];
dat = dat0[:, 2:4];
g = dat0[:, 1];
wilks(dat, g)
# F = 33.80499798162517, df1 = 3, df2 = 10, p vallue = 1.516644831663382e-5
wilks(dat0[:, 2:3], dat0[:, 1])
# F = 10.911143164354023, df1 = 2, df2 = 22, p vallue = 0.0005105098610968934
dat2 = vcat(dat0, [3 2.3 195.2 154.4; 3 3.6 189.2 150.8; 3 3.4 179.3 182.4])
wilks(dat2[:, 2:4], dat2[:, 1])
# F = 9.531929948587386, df1 = 6, df2 = 24, p vallue = 2.143115741229085e-5
dat3 = vcat(dat2, [4 2.5 165.2 157.1; 4 4.6 192.2 149.8; 4 5.2 181.2 180.6])
wilks(dat3[:, 2:4], dat3[:, 1])
# chisq. = 38.2334484265828, df = 9, p vallue = 1.5827976779857345e-5
※コメント投稿者のブログIDはブログ作成者のみに通知されます