#==========
Julia の修行をするときに,いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。
二元配置分散分析
http://aoki2.si.gunma-u.ac.jp/R/twoway-anova.html
ファイル名: twowayanova.jl 関数名: twowayanova
翻訳するときに書いたメモ
元のプログラムにある二次元配列 mab は不要だ。
==========#
using Statistics, Rmath, Printf
function twowayanova(x, a, b)
indexa, indexb, tbl = table(a, b)
n = length(x)
dft = n-1
na, nb = size(tbl)
dfa = na - 1
dfb = nb - 1
gm = mean(x)
MSt = var(x)
sst = dft * MSt
ma = [mean(x[a .== i]) for i in indexa]
mb = [mean(x[b .== j]) for j in indexb]
if any(tbl .== 0)
error("いくつかの水準の組合せにおいて,データがないセルがあります")
elseif all(tbl .== 1)
ssa = nb * sum((ma .- gm) .^ 2)
ssb = na * sum((mb .- gm) .^ 2)
sse = sst-ssa-ssb
dfe = dft-dfa-dfb
ss = [ssa, ssb, sse, sst]
df = [dfa, dfb, dfe, dft]
ms = ss ./ df
f = ms ./ ms[3]
p = pf.(f, df, df[3], false)
f[3:4] .= NaN
p[3:4] .= NaN
result = hcat(ss, df, ms, f, p)
@printf("%12s %6s %10s %10s %10s\n", "SS", "df", "MS", "F value", "P value")
@printf("a %10.7g %6d %10.7g %10.7f %10.7f\n", ssa, dfa, ms[1], f[1], p[1])
@printf("b %10.7g %6d %10.7g %10.7f %10.7f\n", ssb, dfb, ms[2], f[2], p[2])
@printf("e %10.7g %6d %10.7g\n", sse, dfe, ms[3])
@printf("T %10.7g %6d %10.7g\n", sst, dft, ms[4])
Dict(:result => result)
elseif all(tbl .>= 2) && hirei(tbl)
sse = 0
for i in 1:na, j in 1:nb
selected = x[(a .== indexa[i]) .& (b .== indexb[j])]
sse += sum((selected .- mean(selected)) .^ 2)
end
ssa = sum(sum(tbl, dims=2) .* (ma .- gm) .^ 2)
ssb = sum(transpose(sum(tbl, dims=1)) .* (mb .- gm) .^ 2)
ss = [ssa, ssb, sst-ssa-ssb-sse, sse, sst]
dfab = dfa * dfb
df = [dfa, dfb, dfab, dft-dfa-dfb-dfab, dft]
ms = ss ./ df
f = ms ./ ms[4]
p = pf.(f, df, df[4], false)
f[4:5] .= NaN
p[4:5] .= NaN
result = hcat(ss, df, ms, f, p)
names = ["a", "b", "a*b", "e", "T"]
output("Model-I", names, ss, df, ms, f, p)
f[1:2] = ms[1:2] ./ ms[3]
p[1:2] = pf.(f[1:2], df[1:2], df[3], false)
result2 = hcat(ss, df, ms, f, p)
println()
output("Model-II", names, ss, df, ms, f, p)
Dict(:model1 => result, :model2 => result2)
else
d1 = makedummy(a, indexa);
d2 = makedummy(b, indexb);
d3 = crossdummy(d1, d2);
rabab = mreg(hcat(d3, x))
rab = mreg(hcat(d1, d2, x))
ra = mreg(hcat(d1, x))
rb = mreg(hcat(d2, x))
ss = [rab[1]-rb[1], rab[1]-ra[1], rabab[1]-rab[1], rabab[2], rabab[3]]
df = [dfa, dfb, dfa*dfb, n-na*nb, n-1]
ms = ss ./ df
f = ms ./ ms[4]
p = pf.(f, df, df[4], false)
f[4:5] .= NaN
p[4:5] .= NaN
result = hcat(ss, df, ms, f, p)
names = ["a", "b", "a*b", "e", "T"]
output(names, ss, df, ms, f, p)
Dict(:result => result)
end
end
function hirei(tbl)
for i in 2:size(tbl, 1)
temp = tbl[i, :] ./ tbl[1, :]
all(temp .== temp[1]) || return false
end
return true
end
function output(model, names, ss, df, ms, f, p)
println("$model")
@printf("%14s %6s %10s %10s %10s\n", "SS", "df", "MS", "F value", "P value")
for i in 1:3
@printf("%4s%10.7g %6d %10.7g %10.7f %10.7f\n",
names[i], ss[i], df[i], ms[i], f[i], p[i])
end
for i in 4:5
@printf("%4s%10.7g %6d %10.7g\n", names[i], ss[i], df[i], ms[i])
end
end
function mreg(dat) # 回帰分析を行い,回帰変動,誤差変動,全変動を返す関数
r = cor(dat)
betas = r[1:end-1, 1:end-1] \ r[1:end-1, end]
SS = cov(dat, corrected=false) .* size(dat, 1)
prop = [SS[i, i] for i in 1:size(SS, 1)]
Sr = sum(SS[1:end-1, end] .* (betas ./ sqrt.(prop[1:end-1] ./ prop[end])))
St = SS[end, end]
(Sr, St - Sr, St) # 回帰変動,誤差変動,全変動
end
function makedummy(x, y)
n = length(x)
k = length(y)
d = zeros(n, k)
for (i, j) = enumerate(indexin(x, y))
d[i, j] = 1
end
d[:, 2:end]
end
function crossdummy(a, b)
n1, k = size(a)
n2, l = size(b)
n1 == n2 || error("data error")
d = zeros(n1, k * l)
for i = 1:l
for j = 1:k
d[:, j + (i - 1) * k] = a[:, j] .* b[:, i]
end
end
hcat(a, b, d)
end
function table(x, y)
indicesx = sort(unique(x))
indicesy = sort(unique(y))
counts = zeros(Int, length(indicesx), length(indicesy))
for (i, j) in zip(indexin(x, indicesx), indexin(y, indicesy))
counts[i, j] += 1
end
return indicesx, indicesy, counts
end
a = [1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3];
b = repeat(1:4, 3);
x = [9, 17, 12, 16, 1, 21, 16, 11, 7, 19, 6, 9];
twowayanova(x, a, b)
# SS df MS F value P value
# a 21.5 2 10.75 0.6592845 0.5510300
# b 268.6667 3 89.55556 5.4923339 0.0371924
# e 97.83333 6 16.30556
# T 388 11 35.27273
x = [24.8, 23.9, 24.1, 28.8, 22.6, 28.0, 26.4, 27.4, 29.4, 30.0, 28.7,
29.2, 25.0, 26.6, 27.9, 28.5, 27.1, 25.2, 27.9, 29.2, 26.7, 25.0,
29.9, 29.4, 27.5, 32.5, 29.5, 26.3, 28.2, 31.8, 30.2, 31.7, 29.2,
30.3, 30.9, 28.4, 29.8, 26.7, 30.7, 28.5, 26.5, 26.7, 31.7, 27.2,
25.5, 29.9, 27.3, 28.8, 28.5, 25.7, 28.7, 31.3, 29.4, 29.8, 30.3,
29.6, 31.7, 33.6, 32.0, 34.3];
a = [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 1, 1, 1, 2, 2, 2, 3, 3,
3, 4, 4, 4, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 1, 1, 1, 2, 2,
2, 3, 3, 3, 4, 4, 4, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4];
b = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5];
twowayanova(x, a, b)
# Model-I
# SS df MS F value P value
# a 51.39733 3 17.13244 4.9370667 0.0052039
# b 106.2873 4 26.57183 7.6572211 0.0001115
# a*b 52.85267 12 4.404389 1.2692154 0.2738926
# e 138.8067 40 3.470167
# T 349.344 59 5.921085
#
# Model-II
# SS df MS F value P value
# a 51.39733 3 17.13244 3.8898573 0.0373908
# b 106.2873 4 26.57183 6.0330352 0.0067194
# a*b 52.85267 12 4.404389 1.2692154 0.2738926
# e 138.8067 40 3.470167
# T 349.344 59 5.921085
a = [1, 1, 1, 1, 2, 2, 2, 2, 2, 2]
b = [1, 1, 2, 2, 1, 1, 2, 2, 2, 2]
x = [17, 16, 25, 22, 18, 26, 34, 30, 34, 30]
twowayanova(x, a, b)
# SS df MS F value P value
# a 121.4405 1 121.4405 13.7479784 0.0099953
# b 177.1905 1 177.1905 20.0592992 0.0041979
# a*b 5.142857 1 5.142857 0.5822102 0.4743687
# e 53 6 8.833333
# T 415.6 9 46.17778