#==========
Julia の修行をするときに,いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。
共分散分析
http://aoki2.si.gunma-u.ac.jp/R/covar-test.html
ファイル名: covartest.jl 関数名: covartest
翻訳するときに書いたメモ
==========#
using Statistics, Rmath, Printf
function covartest(x, y, g)
index, nj = table(g)
n = sum(nj)
k = length(nj)
mx = mean(x)
mxj = [mean(x[g .== g0]) for g0 in index]
sstx = (n - 1) * var(x)
ssbx = sum(nj .* (mxj .- mx) .^ 2)
sswx = sstx - ssbx
my = mean(y)
myj = [mean(y[g .== g0]) for g0 in index]
ssty = (n - 1) * var(y)
ssby = sum(nj .* (myj .- my) .^ 2)
sswy = ssty - ssby
spt = (n - 1) * cov(x, y)
spb = sum(nj .* (mxj .- mx) .* (myj .- my))
spw = spt - spb
sswy = sswy - spw^2 / sswx
ssty = ssty - spt^2 / sstx
ssby = ssty - sswy
hensax = x .- mxj[g]
hensay = y .- myj[g]
xy = hensax .* hensay
xx = hensax .^ 2
numerator = [sum(xy[g .== g0]) for g0 in index] # tapply(xy, g, sum)
denominator = [sum(xx[g .== g0]) for g0 in index] # tapply(xx, g, sum)
a = numerator ./ denominator
b = myj .- a .* mxj
predicty = a[g] .* x .+ b[g]
sswyj = sum((y .- predicty) .^ 2)
dfr = k - 1
dfe = n - 2 * k
dft = n - k - 1
diffreg = sswy - sswyj
msr = diffreg / dfr
mse = sswyj / dfe
msw = sswy / dft
f = msr / mse
p = pf(f, dfr, dfe, false)
part1="H0: 各群の回帰直線の傾きは同じである"
names = ["group x slope", "residual", "total"]
ss = [diffreg, sswyj, sswy]
df = [dfr, dfe, dft]
ms = [msr, mse, msw]
anovatable(part1, names, ss, df, ms, f, dfr, dfe, p)
if p > 0.05
msby = ssby / dfr
mswy = sswy / dft
f2 = msby / mswy
p2 = pf(f2, dfr, dft, false)
part2 = "H0: 共変量で調整した平均値は同じである"
names2 = ["effect & group", "residual", "total"]
ss2 = [ssby, sswy, ssty]
df2 = [dfr, dft, n-2]
ms2 = [msby, mswy, ssty/(n-2)]
println()
anovatable(part2, names, ss2, df2, ms2, f2, dfr, dft, p2)
end
end
function table(x) # indices が少ないとき
indices = sort(unique(x))
counts = zeros(Int, length(indices))
for i in indexin(x, indices)
counts[i] += 1
end
return indices, counts
end
function anovatable(title, names, ss, df, ms, f, df1, df2, p)
println(title)
@printf("%14s %12s %3s %12s\n", "", "SS", "df", "MS")
for i = 1:3
@printf("%14s %12.7f %3d %12.7f\n", names[i], ss[i], df[i], ms[i])
end
println("F value = $f, df1 = $df1, df2 = $df2, p value = $p")
end
dat = [ 5.9 1.87 1
5.7 1.19 1
5.7 1.22 1
6.7 1.46 1
6.5 1.35 1
6.2 1.16 1
6.3 1.62 1
6.7 2.00 1
5.6 1.28 2
5.3 1.06 2
5.5 1.17 2
5.6 1.74 2
5.5 1.13 2
5.3 1.18 2
5.2 1.03 2
5.6 1.23 2
5.5 0.90 2
5.5 1.24 2];
x = dat[:, 1];
y = dat[:, 2];
g = Int.(dat[:, 3]); # 整数型であること!
covartest(x, y, g)
# H0: 各群の回帰直線の傾きは同じである
# SS df MS
# group x slope 0.0357661 1 0.0357661
# residual 0.9187879 14 0.0656277
# total 0.9545539 15 0.0636369
# F value = 0.544984285639864, df1 = 1, df2 = 14, p value = 0.472568833125542
# H0: 共変量で調整した平均値は同じである
# SS df MS
# group x slope 0.0000079 1 0.0000079
# residual 0.9545539 15 0.0636369
# total 0.9545618 16 0.0596601
# F value = 0.00012428977991377308, df1 = 1, df2 = 15, p value = 0.9912518692175619
※コメント投稿者のブログIDはブログ作成者のみに通知されます