裏 RjpWiki

フィッシャーの正確確率検定
http://aoki2.si.gunma-u.ac.jp/R/fisher.html

#==========
Julia の修行をするときに，いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。

ファイル名: fisher.jl　　関数名: fisher

翻訳するときに書いたメモ

hybrid は省略した。

==========#

using Rmath, Printf

function fisher(x, y = NaN; exact = true, method = "Fisher") # or method = "Pearson"
function found()
if method == "Fisher"
nprod = temp - sum(perm_table[um .+ 1])
if nprod <= criterion + EPSILON
nntrue += exp(nprod - nntrue2 * log_expmax)
while nntrue >= EXPMAX
nntrue /= EXPMAX
nntrue2 += 1
end
end
else
hh = sum((um .- ex) .^ 2 ./ ex)
if hh >= stat_val || abs(hh - stat_val) <= EPSILON
nprod = temp - sum(perm_table[um .+ 1])
nntrue += exp(nprod - nntrue2 * log_expmax)
while nntrue >= EXPMAX
nntrue /= EXPMAX
nntrue2 += 1
end
end
end
ntables += 1
end

function search(x, y)
if y == 1
found()
elseif x == 1
t = um[1, 1] - um[y, 1]
if t >= 0
um[1, 1] = t
search(nc, y - 1)
um[1, 1] += um[y, 1]
end
else
search(x - 1, y)
while um[y, 1] != 0 && um[1, x] != 0
um[y, x] += 1
um[y, 1] -= 1
um[1, x] -= 1
search(x - 1, y)
end
um[y, 1] += um[y, x]
um[1, x] += um[y, x]
um[y, x] = 0
end
end

function exacttest()
denom2 = 0
denom = perm_table[n + 1] - sum(perm_table[ct .+ 1])
while denom > log_expmax
denom -= log_expmax
denom2 += 1
end
denom = exp(denom)
um[:, 1] = rt
um[1, :] = ct
search(nc, nr)
@printf("% s の方法による，正確な P 値 = % .10g\n", method, nntrue / denom * EXPMAX ^ (nntrue2 - denom2))
@printf("査察した分割表の個数は % s 個\n", ntables)
end

function chisqtest(t)
return sum((t .- ex) .^ 2 ./ ex)
end
function asymptotic()
@printf("カイ二乗値 = % g, 自由度 = % i, P 値 = % g\n", stat_val, df, pchisq(stat_val, df, false))
end

if ndims(x) == 2
t = x
else
indexy, indexx, t = table(y, x)
end
EPSILON = 1e-10
EXPMAX = 1e100
log_expmax = log(EXPMAX)
nr, nc = size(t)
um = zeros(Int, nr, nc)
rt = sum(t, dims=2)
ct = sum(t, dims=1)
n = sum(t)
ex = (rt * ct) ./ n
stat_val = chisqtest(t)
df = (nr - 1) * (nc - 1)
asymptotic()
if exact
perm_table = cumsum(vcat(0, log.(1:n + 1)))
temp = sum(perm_table[rt .+ 1])
criterion = temp - sum(perm_table[t .+ 1])
ntables = nntrue = nntrue2 = 0
exacttest()
end
end

function table(x, y) # 二次元
indicesx = sort(unique(x))
indicesy = sort(unique(y))
counts = zeros(Int, length(indicesx), length(indicesy))
for (i, j) in zip(indexin(x, indicesx), indexin(y, indicesy))
counts[i, j] += 1
end
return indicesx, indicesy, counts
end

分割表を与える場合

x = [5 3 2 1
4 3 5 2
2 3 1 2]

fisher(x)
# カイ二乗値 = 3.39631, 自由度 = 6, P 値 = 0.757711
# Fisher の方法による，正確な P 値 = 0.8091124268
# 査察した分割表の個数は 24871 個

fisher(x, method = "Pearson")
# カイ二乗値 = 3.39631, 自由度 = 6, P 値 = 0.757711
# Pearson の方法による，正確な P 値 = 0.7878188077
# 査察した分割表の個数は 24871 個

2 つのベクトルを与える場合

x = [1, 1, 1, 3, 2, 1, 1, 3, 3, 2, 3, 3, 1, 1, 3, 1, 1, 1, 2, 1,
1, 2, 2, 3, 1, 1, 2, 3, 3, 2];
y = [1, 2, 2, 3, 2, 1, 2, 2, 2, 3, 3, 2, 2, 1, 2, 1, 2, 2, 3, 1,
1, 1, 1, 3, 1, 2, 2, 3, 1, 3];

fisher(x, y)
# カイ二乗値 = 9.17504, 自由度 = 4, P 値 = 0.0568702
# Fisher の方法による，正確な P 値 = 0.03320873103
# 査察した分割表の個数は 1353 個

fisher(x, y, method="Pearson")
# カイ二乗値 = 9.17504, 自由度 = 4, P 値 = 0.0568702
# Pearson の方法による，正確な P 値 = 0.05323058543
# 査察した分割表の個数は 1353 個

頑張ってます。クリックお願いします。

Julia に翻訳--132　適合度の検定，exact test

適合度の検定（exact test）
http://aoki2.si.gunma-u.ac.jp/R/gft.html

#==========
Julia の修行をするときに，いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。

ファイル名: gft.jl　　関数名: gft

翻訳するときに書いたメモ

==========#

using Distributions, Printf, SpecialFunctions

function gft(o, p = repeat([1 / length(o)], length(o)))
function gen_tab(y)
if y == 1
x2 = sum((o .- e) .^ 2 ./ e)
if x2 >= x0 || abs(x2 - x0) <= EPSILON
w2 += exp(temp - sum(fact[o.+1])) * prod(p .^ o)
end
else
gen_tab(y - 1)
while o[1] > 0
o[y] += 1
o[1] -= 1
gen_tab(y - 1)
end
o[1] += o[y]
o[y] = 0
end
end

EPSILON = 1e-10
total = sum(o)
p ./= sum(p)
e = p .* total
x0 = sum((o .- e) .^ 2 ./ e)
n = length(o)
p_val = ccdf(Chisq(n - 1), x0)
@printf("カイ二乗値は %g，自由度は %i，P値は %g\n", x0, n - 1, p_val)
fact = logfactorial.(0:total)
temp = fact[total+1]
w2 = 0.0
o = zeros(Int64, n)
o[1] = total
gen_tab(n)
@printf("正確なP値は %g\n", w2)
end

gft([1, 2, 3, 4, 5, 6])
# カイ二乗値は 5，自由度は 5，P値は 0.41588
# 正確なP値は 0.467389

gft([10,1,1,1,1,1,2])
# カイ二乗値は 27.8824，自由度は 6，P値は 9.88791e-05
# 正確なP値は 0.000223375

頑張ってます。クリックお願いします。

Julia に翻訳--131　シャーリー・ウィリアムズ法，代表値の多重比較

シャーリー・ウィリアムズの方法による代表値の多重比較
http://aoki2.si.gunma-u.ac.jp/R/Shirley-Williams.html

#==========
Julia の修行をするときに，いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。

ファイル名: shirleywilliams.jl　　関数名: shirleywilliams

翻訳するときに書いたメモ

# t = (M - sum(r[group .== 1]) / ni[1]) / sqrt(V * (1 / ni[p] + 1 / ni[1]))
の行は多分バグ。（結果に違いはないが，Julia だとエラーになる）

tapply() と rep() を書いてみた。

==========#

using StatsBase, Rmath

function shirleywilliams(data, group; method = "up") # or method = "down"
func = method == "down" ? minimum : maximum
ni = tapply(data, group, length)
a = length(ni)
s = zeros(a - 1)
for p in a:-1:2 # p = 3
select = 1 .<= group .<= p
r = tiedrank(data[select])
g = group[select]
M = func(cumsum(reverse(tapply(r, g, sum))[1:p-1]) ./ cumsum(reverse(ni[2:p])))
N = sum(ni[1:p])
V = (sum(r .^ 2) - N * (N + 1) ^ 2 / 4) / (N - 1)
# t = (M - sum(r[group .== 1]) / ni[1]) / sqrt(V * (1 / ni[p] + 1 / ni[1]))
t = (M - sum(r[g .== 1]) / ni[1]) / sqrt(V * (1 / ni[p] + 1 / ni[1]))
s[p - 1] = method == "down" ? -t : t
end
t = reverse(s)
[println("$(a - i + 1): t = $t") for (i, t) in enumerate(t)]
return t
end

function table(x) # indices が少ないとき
indices = sort(unique(x))
counts = zeros(Int, length(indices))
for i in indexin(x, indices)
counts[i] += 1
end
return indices, counts
end

function tapply(x, g, func)
indices = sort(unique(g))
[func(x[g .== i]) for i in indices]
end

function rep(x, n::Array{Int64,1})
length(x) == length(n) || error("length(x) wasn't length(n)")
vcat([repeat([x[i]], n[i]) for i = 1:length(x)]...)
end

function rep(x; each::Int64)
vcat([repeat([x[i]], each) for i = 1:length(x)]...)
end

function rep(x, n::Int64)
repeat(x, n)
end

data = [
13, 23, 8, 17, 25, 34, 18, 26, 10, 28, 18, 21,
26, 22, 30, 38, 15, 24, 18, 11, 21, 30, 31, 23,
22, 10, 29, 37, 22, 13, 29, 28, 21, 16, 21, 26,
26, 34, 30, 45, 17, 19, 27, 18, 36, 24, 25, 31
];
group = rep(1:4, each=12);

shirleywilliams(data, group, method = "up")
# 4: t = 2.1898989426889655
# 3: t = 1.0917905788994513
# 2: t = 1.2434675147149

頑張ってます。クリックお願いします。

Julia に翻訳--130　スティール・ドゥワス法，代表値の多重比較

スティール・ドゥワスの方法による代表値の多重比較
http://aoki2.si.gunma-u.ac.jp/R/Steel-Dwass.html

#==========
Julia の修行をするときに，いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。

ファイル名: .jl　　関数名:

翻訳するときに書いたメモ

Rmath の ptukey() にバグがある。
lower_tail = false を指定すると NaN を返す。
回避策としては，
ptukey(q, nmeans, df, false)
を
1 - ptukey(q, nmeans, df)
とする。

==========#

using Combinatorics, StatsBase, Rmath, Printf

function steeldwass(data, group)
index, ni = table(group)
ng = length(ni)
k = binomial(ng, 2)
t = zeros(k)
pair = fill("", k)
for (n, (i, j)) in enumerate(combinations(1:ng, 2))
r = tiedrank(vcat(data[group .== i], data[group .== j]))
R = sum(r[1:ni[i]])
N = ni[i] + ni[j]
E = ni[i] * (N + 1) / 2
V = ni[i] * ni[j] / (N * (N - 1)) * (sum(r .^ 2) - N * (N + 1) ^ 2 / 4)
pair[n] = string(i) * ":" * string(j)
t[n] = abs(R - E) / sqrt(V)
end
p = 1 .- ptukey.(t * sqrt(2), ng, Inf)
@printf(" pair %10s %10s\n", "t value", "p value")
for i in 1:k
@printf("%5s %10.6f %10.6f\n", pair[i], t[i], p[i])
end
Dict(:pair => pair, :t => t, :pvalue => p)
end

function table(x) # indices が少ないとき
indices = sort(unique(x))
counts = zeros(Int, length(indices))
for i in indexin(x, indices)
counts[i] += 1
end
return indices, counts
end

data = [
6.9, 7.5, 8.5, 8.4, 8.1, 8.7, 8.9, 8.2, 7.8, 7.3, 6.8,
9.6, 9.4, 9.5, 8.5, 9.4, 9.9, 8.7, 8.1, 7.8, 8.8,
5.7, 6.4, 6.8, 7.8, 7.6, 7.0, 7.7, 7.5, 6.8, 5.9,
7.6, 8.7, 8.5, 8.5, 9.0, 9.2, 9.3, 8.0, 7.2, 7.9, 7.8
];
group = vcat([repeat([i], n) for (i, n) in enumerate([11, 10, 10, 11])]...);
steeldwass(data, group)
# pair t value p value
# 1:2 2.680234 0.036960
# 1:3 2.539997 0.053981
# 1:4 1.282642 0.574012
# 2:3 3.746076 0.001031
# 2:4 2.046776 0.170966
# 3:4 3.384456 0.003977

頑張ってます。クリックお願いします。

Julia に翻訳--129　スティール法，代表値の多重比較

スティールの方法による代表値の多重比較
http://aoki2.si.gunma-u.ac.jp/R/Steel.html

#==========
Julia の修行をするときに，いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。

ファイル名: steel.jl　　関数名: steel

翻訳するときに書いたメモ

==========#

using StatsBase, Printf

function steel(data, group)
index, ni = table(group)
a = length(ni)
control = data[group .== 1]
n1 = length(control)
t = zeros(a)
rho = sum(n1 .== ni) == a ? 0.5 : getrho(ni)
for i in 2:a
r = tiedrank(vcat(control, data[group .== i]))
R = sum(r[1:n1])
N = n1 + ni[i]
E = n1 * (N + 1) / 2
V = n1 * ni[i] / N / (N - 1) * (sum(r .^ 2) - N * (N + 1) ^ 2 / 4)
t[i] = abs(R - E) / sqrt(V)
end
@printf(" pair %10s %10s\n", "t", "rho")
for i = 2:a
@printf("%2d:%2d %10.6f %10.6f\n", 1, i, t[i], rho)
end
Dict(:pair => string.(repeat([1], a-1)) .* ":" .* string.(2:a),
:t => t[2:a], :rho => rho)
end

function table(x) # indices が少ないとき
indices = sort(unique(x))
counts = zeros(Int, length(indices))
for i in indexin(x, indices)
counts[i] += 1
end
return indices, counts
end

func(x, y, ni1) = sqrt(x / (x + ni1) * y / (y + ni1))

function getrho(ni)
k = length(ni)
rho = func.(ni, transpose(ni), ni[1])
[rho[i, i] = 0 for i =1:k]
sum(rho[2:k, 2:k]) / (k - 2) / (k - 1)
end

data = [
50, 55, 65, 63, 60, 68, 69, 60, 52, 49,
80, 86, 74, 66, 79, 81, 70, 62, 60, 72,
42, 48, 58, 63, 62, 55, 63, 60, 53, 45
];

group = vcat([repeat([i], 10) for i in 1:3]...);
steel(data, group)
# pair t rho
# 1: 2 2.952566 0.500000
# 1: 3 1.175674 0.500000

頑張ってます。クリックお願いします。

Julia に翻訳--128　ボンフェローニ，ホルム，シェイファー，ホランド・コペンハーバー，平均値の多重比較

ボンフェローニの方法による平均値の多重比較
ホルムの方法による平均値の多重比較
シェイファーの方法による平均値の多重比較
ホランド・コペンハーバーの方法による平均値の多重比較
http://aoki2.si.gunma-u.ac.jp/R/Bonferroni.html

#==========
Julia の修行をするときに，いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。

ファイル名: bonferroni.jl　　関数名: bonferroni

翻訳するときに書いたメモ

==========#

using Statistics, Rmath, Combinatorics, Printf

function bonferroni(data, group; alpha = 0.05, method = "Bonferroni") # "Holm", "Shaffer", "Holland-Copenhaver"
index, n = table(group)
a = length(n)
k = binomial(a, 2)
m = [mean(data[group .== g]) for g in index]
v = [var(data[group .== g]) for g in index]
phi = length(data) - a
V = sum((n .- 1) .* v) / phi
pair = combinations(1:a, 2)
t = [abs(m[i] - m[j]) / sqrt(V / n[i] + V / n[j]) for (i, j) in pair]
P = pt.(t, phi, false) .* 2
pairs = [i for i in pair]
result2 = hcat(pairs, t, P);
if method == "Bonferroni"
Pthreshold = repeat([alpha / k], k)
judge = (P .<= Pthreshold) .+ 0
else
result2 = result2[sortperm(result2[:, 3]), :]
if method != "Holm" && a > 9
error("Too many groups Use Holm")
method = "Holm"
end
if method == "Holm"
Pthreshold = alpha ./ (k:-1:1)
else
tbl = vcat([3, 1, 1, 6, repeat([3], 3), 2, 1, 10, repeat([6], 4),
4, 4:-1:1, 15, repeat([10], 5), repeat([7], 3),
6, 4, 4:-1:1, 21, repeat([15], 6), repeat([11], 4),
10, 9, 7, 7:-1:1, 28, repeat([21], 7), repeat([16], 5),
15, 13, 13:-1:1, 36, repeat([28], 8), repeat([22], 6),
21, repeat([18], 3), 16, 16, 15, 13, 13:-1:1]...)
h = tbl[(a * (a - 1) * (a - 2) ÷ 6):((a ^ 3 - a) ÷ 6 - 1)]
Pthreshold = method == "Shaffer" ? alpha ./ h : 1 .- (1 - alpha) .^ (1 ./ h)
end
judge = (cumprod(result2[:, 3] .<= Pthreshold) .!= 0) .+ 0
if method == "Holm" || method == "Shaffer" || method == "Holland-Copenhaver"
minpos = argmin(judge)
if judge[minpos] == 0
judge[minpos:k] .= 2
end
end
end
judge2 = [["not significant", "significant", "suspended"][j+1] for j in judge]
result2 = hcat(result2, Pthreshold, judge2)
@printf(" pair %7s %12s %12s %7s\n", "t value", "p value", "p threshold", "judge")
for i = 1:k
@printf("%2d:%2d %7.5f %12.7g %12.7g %7s\n",
result2[i, 1][1], result2[i, 1][2], result2[i, 2],
result2[i, 3], Pthreshold[i], judge2[i])
end
Dict(:alpha => alpha, :noftests => k, :result2 => result2)
end

function table(x) # indices が少ないとき
indices = sort(unique(x))
counts = zeros(Int, length(indices))
for i in indexin(x, indices)
counts[i] += 1
end
return indices, counts
end

data = [
10.7, 9.7, 8.5, 9.4, 8.8, 8.4, 10.6,
8.1, 8.3, 8.7, 6.9, 5.7, 9.5, 6.7,
7.9, 7.5, 7.4, 9.2, 5.7, 8.3, 9.7,
6.2, 7.1, 5.5, 4.7, 6.3, 6.9, 7.5
]
group = vcat([repeat([i], 7) for i = 1:4]...)

bonferroni(data, group) # method = "Bonferroni"
# pair t value p value p threshold judge
# 1: 2 2.83517 0.009148816 0.008333333 not significant
# 1: 3 2.41686 0.02362029 0.008333333 not significant
# 1: 4 5.08935 3.315126e-05 0.008333333 significant
# 2: 3 0.41830 0.6794449 0.008333333 not significant
# 2: 4 2.25419 0.03358738 0.008333333 not significant
# 3: 4 2.67249 0.01331927 0.008333333 not significant

bonferroni(data, group, method = "Holm")
# pair t value p value p threshold judge
# 1: 4 5.08935 3.315126e-05 0.008333333 significant
# 1: 2 2.83517 0.009148816 0.01 significant
# 3: 4 2.67249 0.01331927 0.0125 suspended
# 1: 3 2.41686 0.02362029 0.01666667 suspended
# 2: 4 2.25419 0.03358738 0.025 suspended
# 2: 3 0.41830 0.6794449 0.05 suspended

bonferroni(data, group, method = "Shaffer")
# pair t value p value p threshold judge
# 1: 4 5.08935 3.315126e-05 0.008333333 significant
# 1: 2 2.83517 0.009148816 0.01666667 significant
# 3: 4 2.67249 0.01331927 0.01666667 significant
# 1: 3 2.41686 0.02362029 0.01666667 suspended
# 2: 4 2.25419 0.03358738 0.025 suspended
# 2: 3 0.41830 0.6794449 0.05 suspended

bonferroni(data, group, method = "Holland-Copenhaver")
# pair t value p value p threshold judge
# 1: 4 5.08935 3.315126e-05 0.008512445 significant
# 1: 2 2.83517 0.009148816 0.01695243 significant
# 3: 4 2.67249 0.01331927 0.01695243 significant
# 1: 3 2.41686 0.02362029 0.01695243 suspended
# 2: 4 2.25419 0.03358738 0.02532057 suspended
# 2: 3 0.41830 0.6794449 0.05 suspended

頑張ってます。クリックお願いします。

Julia に翻訳--127　シェッフェ法，平均値の線形比較

シェッフェの方法による平均値の線形比較
http://aoki2.si.gunma-u.ac.jp/R/scheffe.html

#==========
Julia の修行をするときに，いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。

ファイル名: scheffe.jl　　関数名: scheffe

翻訳するときに書いたメモ

かなりスイスイ書けるようになってきた。

==========#

using Rmath

function scheffe(n, m, u, g1, g2; conflevel = 0.95)
all(length(n) == length(m) == length(u) &&
all(n .> 1) &&
all(u .> 0) &&
all(floor.(n) .== n) &&
all(floor.(g1) .== g1) &&
all(floor.(g2) .== g2)) || error("parameter error")
ng = length(n)
k1 = ng - 1
nc = sum(n)
dfw = nc - ng
Vw = sum(u .* (n .- 1)) / dfw
n1 = length(g1)
n2 = length(g2)
g0 = collect(1:ng)
g0 = [i for i in g0 if !(i in g1)]
g0 = [i for i in g0 if !(i in g2)]
n0 = ng - n1 - n2
weight = vcat([repeat([i], j)
for (i, j) in zip([1 / n1, - 1 / n2, 0], [n1, n2, n0])]...)
weight = weight[sortperm(vcat([g1, g2, g0]...))]
theta = sum(weight .* m)
Vtheta = Vw * sum(weight .^ 2 ./ n)
confint = theta .- [1, - 1] *
sqrt(k1 * qf(1 - conflevel, k1, dfw, false) * Vtheta)
F0 = theta ^ 2 / k1 / Vtheta
p = pf(F0, k1, dfw, false)
println("theta = $theta, V(theta) = $Vtheta")
println("F = $F0, df1 = $k1, df2 = $dfw, p value = $p")
println("$(100conflevel) percent confidence interval = $confint")
Dict(:theta => theta, :Vtheta => Vtheta, :F => F0,
:df1 => k1, :df2 => dfw, :pvalue => p, :confint => confint)
end

n = [8, 11, 22, 6];
m = [135.83, 160.49, 178.35, 188.06];
sd = [19.59, 12.28, 15.01, 9.81];

scheffe(n, m, sd .^ 2, 1:2, 3:4)
# theta = -35.04499999999997, V(theta) = 23.40938365266032
# F = 17.488030202230846, df1 = 3, df2 = 43, p value = 1.435044718014149e-7
# 95.0 percent confidence interval = [-49.1218509474347, -20.968149052565245]

scheffe(n, m, sd .^ 2, 1:3, 4)
# theta = -29.836666666666673, V(theta) = 42.81362201961475
# F = 6.9310236305159085, df1 = 3, df2 = 43, p value = 0.000658278780876043
# 95.0 percent confidence interval = [-48.87379807609931, -10.799535257234034]

頑張ってます。クリックお願いします。

Julia に翻訳--126　テューキー法，平均値の線形比較

#==========
Julia の修行をするときに，いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。

テューキーの方法による平均値の線形比較
http://aoki2.si.gunma-u.ac.jp/R/tukey2.html

ファイル名: tlc.jl　　関数名: tlc

翻訳するときに書いたメモ

Rmath の ptukey()，qtukey() にバグがある。
lower_tail = false を指定すると NaN を返す。
回避策としては，
ptukey(q, nmeans, df, false)
を
1 - ptukey(q, nmeans, df)
とする。

qtukey(p, nmeans, df, false)
を
qtukey(1-p, nmeans, df)
とする。

==========#

using Rmath

function tlc(n, m, u, g1, g2; conflevel = 0.95)
all(length(n) == length(m) == length(u) &&
all(n .> 1) &&
all(u .> 0) &&
all(floor.(n) .== n) &&
all(n .== n[1]) &&
all(floor.(g1) .== g1) &&
all(floor.(g2) .== g2)) || error("parameter error")
ng = length(n)
nc = sum(n)
dfw = nc - ng
Vw = sum(u .* (n .- 1)) / dfw
n1 = length(g1)
n2 = length(g2)
g0 = collect(1:ng)
g0 = [i for i in g0 if !(i in g1)]
g0 = [i for i in g0 if !(i in g2)]
n0 = ng - n1 - n2
weight = vcat([repeat([i], j)
for (i, j) in zip([1 / n1, - 1 / n2, 0], [n1, n2, n0])]...)
weight = weight[sortperm(vcat([g1, g2, g0]...))]
theta = sum(weight .* m)
sq = sqrt(Vw / n[1])
q = qtukey(conflevel, ng, dfw)
p = 1 - ptukey(abs(theta / sq), ng, dfw)
confint = theta .- [1, - 1] * q * sq
println("theta = $theta, a = $ng, df = $dfw, pvalue = $p")
println("$(100conflevel) percent confidence interval: $confint")
Dict(:theta => theta, :a => ng, :df => dfw,
:pvalue => p, :confint => confint)
end

n = repeat([20], 5)
m = [35.6, 32.7, 29.4, 27.9, 25.7]
u = [5.6, 4.6, 4.9, 3.7, 2.5]

tlc(n, m, u, 1:3, 4:5)
# theta = 5.766666666666664, a = 5, df = dfw, pvalue = 4.640164918967571e-10
# 95.0 percent confidence interval: [3.951633188914924, 7.581700144418404]

tlc(n, m, u, 1:2, 4:5)
# theta = 7.350000000000007, a = 5, df = dfw, pvalue = 4.6348025417586314e-10
# 95.0 percent confidence interval: [5.534966522248267, 9.165033477751747]

頑張ってます。クリックお願いします。

Julia に翻訳--125　ウィリアムズ法，平均値の多重比較

ウィリアムズの方法による平均値の多重比較
http://aoki2.si.gunma-u.ac.jp/R/Williams.html

#==========
Julia の修行をするときに，いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。

ファイル名: williams.jl　　関数名: williams

翻訳するときに書いたメモ

p 値は求めない

==========#

using Statistics, Rmath

function williams(data, group; method = "up") # or method = "down"
func = method == "down" ? minimum : maximum
index, ni = table(group)
sumi = [sum(data[group .== g]) for g in index]
meani = [mean(data[group .== g]) for g in index]
vi = [var(data[group .== g]) for g in index]
a = length(ni)
phie = sum(ni) - a
ve = sum((ni .- 1) .* vi) / phie
t = [(func(cumsum(reverse(sumi[2:p])) ./
cumsum(reverse(ni[2:p]))) .- meani[1]) /
sqrt(ve * (1 / ni[1] + 1 / ni[p])) for p = a:-1:2]
println("phi.e = $phie")
println("t = $t")
Dict(:phie => phie, :t => method == "down" ? -t : t)
end

function table(x) # indices が少ないとき
indices = sort(unique(x))
counts = zeros(Int, length(indices))
for i in indexin(x, indices)
counts[i] += 1
end
return indices, counts
end

data = [
415, 380, 391, 413, 372, 359, 401,
387, 378, 359, 391, 362, 351, 348,
357, 379, 401, 412, 392, 356, 366,
361, 351, 378, 332, 318, 344, 315,
299, 308, 323, 351, 311, 285, 297
];
group = [repeat([i], 7) for i = 1:5]
group = vcat(group...)

williams(data, group, method = "down")
# phi.e = 30
# t = [-7.076037814702816, -4.217674245029326, -1.4164779467493078, -1.9690949035528496]

頑張ってます。クリックお願いします。

Julia に翻訳--124　ダネット法，平均値の多重比較

ダネットの方法による平均値の多重比較
http://aoki2.si.gunma-u.ac.jp/R/dunnett.html

#==========
Julia の修行をするときに，いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。

ファイル名: dunnett.jl　　関数名: dunnett

翻訳するときに書いたメモ

mvtnorm の pmvt 関数は Julia にはないので，RCall で R を呼び出して計算させる。

==========#

using Statistics, Rmath, RCall, Printf

function dunnett(data, group)
index, ni = table(group)
a = length(ni)
n = length(data)
mi = [mean(data[group .== g]) for g in index]
vi = [var(data[group .== g]) for g in index]
Vw = sum(vi .* (ni .- 1)) / (n .- a)
rho = getrho(ni)
t = (abs.(mi .- mi[1]) ./ sqrt.(Vw .* (1 ./ ni .+ 1 / ni[1])))[2:a]
p = pdunnett.(t, a, n - a, rho)
p = vcat([float.(p[i]) for i = 1:a-1]...)
@printf(" pair %7s %12s\n", "t value", "p value")
for i = 1:a-1
@printf("%2d:%2d %7.5f %12.7g\n", 1, i+1, t[i], p[i])
end
Dict(:t => t, :pvalue =>p)
end

func(x, y, ni1) = sqrt(x / (x + ni1) * y / (y + ni1))

function getrho(ni)
k = length(ni)
rho = func.(ni, transpose(ni), ni[1])
[rho[i, i] = 0 for i =1:k]
sum(rho[2:k, 2:k]) / (k - 2) / (k - 1)
end

function pdunnett(x, a, df, r)
R"""
library(mvtnorm)
corr = diag($a - 1)
corr[lower.tri(corr)] = $r
1 - pmvt(lower = -$x, upper = $x, delta = numeric($a - 1), df = $df,
corr = corr, abseps = 0.0001)[1]
"""
end

function table(x) # indices が少ないとき
indices = sort(unique(x))
counts = zeros(Int, length(indices))
for i in indexin(x, indices)
counts[i] += 1
end
return indices, counts
end

data = [
14, 15, 14, 16, 15, 17, 17,
17, 16, 17, 16, 15, 18, 19, 15,
18, 19, 20, 19, 17, 17,
20, 21, 19, 20, 19, 22, 20,
19, 20, 19, 17, 17, 17, 18 ]

group = [repeat([i], count) for (i, count) in enumerate([7, 8, 6, 7, 7])]
group = vcat(group...)

dunnett(data, group)

# pair t value p value
# 1: 2 1.85409 0.2156511
# 1: 3 4.18754 0.0008677534
# 1: 4 7.07368 1.02906e-07
# 1: 5 4.07273 0.001131645

頑張ってます。クリックお願いします。

Julia に翻訳--123　テューキー法，平均値の対比較

テューキーの方法による平均値の対比較
http://aoki2.si.gunma-u.ac.jp/R/tukey.html

#==========
Julia の修行をするときに，いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。

ファイル名: tukey.jl　　関数名: tukey

翻訳するときに書いたメモ

Rmath の ptukey() にもバグがある。
lower_tail = false を指定すると NaN を返す。
回避策としては，
ptukey(q, nmeans, df, false)
を
1 - ptukey(q, nmeans, df)
とする。

二重配列（リスト）を平坦化するには，vcat(A...) を使う。

==========#

using Statistics, Combinatorics, Rmath, Printf

function tukey(data, group; method = "Tukey") # or method = "Games-Howell"
indices, n = table(group)
a = length(n)
npairs = binomial(a, 2)
phie = sum(n) - a
Mean = [mean(data[group .== g]) for g in indices]
Variance = [var(data[group .== g]) for g in indices]
pair = combinations(1:a, 2)
if method == "Tukey"
ve = sum((n .- 1) .* Variance) / phie
t = [abs(Mean[i] - Mean[j]) / sqrt(ve * (1 / n[i] + 1 / n[j])) for (i, j) in pair]
p = 1 .- ptukey.(t .* sqrt(2), a, phie)#, false)
@printf(" pair %7s %7s\n", "t value", "p value")
for (i, (j, k)) in enumerate(pair)
@printf("%2d:%2d %7.5f %7.5f\n", j, k, t[i], p[i])
end
Dict(:t => t, :pvalue => p, :phi => phie, :v => ve)
else
t = [ts0(Mean[i], Variance[i], n[i], Mean[j], Variance[j], n[j]) for (i, j) in pair]
df = [dfs0(Variance[i], n[i], Variance[j], n[j]) for (i, j) in pair]
p = 1 .- ptukey.(t .* sqrt(2), a, df) #, false)
@printf(" pair %7s %9s %7s\n", "t value", "df", "p value")
for (i, (j, k)) in enumerate(pair)
@printf("%2d:%2d %7.5f %9.5f %7.5f\n", j, k, t[i], df[i], p[i])
end
Dict(:t => t, :df => df, :pvalue => p)
end
end

ts0(mi, vi, ni, mj, vj, nj) = abs(mi - mj) / sqrt(vi / ni + vj / nj)

dfs0(vi, ni, vj, nj) = (vi / ni + vj / nj) ^ 2 /
((vi / ni)^2 / (ni - 1) + (vj / nj)^2 / (nj - 1))

function table(x) # indices が少ないとき
indices = sort(unique(x))
counts = zeros(Int, length(indices))
for i in indexin(x, indices)
counts[i] += 1
end
return indices, counts
end

data = [
14., 15, 14, 16, 15, 17, 17,
17, 16, 17, 16, 15, 18, 19, 15,
18, 19, 20, 19, 17, 17,
20, 21, 19, 20, 19, 22, 20,
19, 20, 19, 17, 17, 17, 18
];

group = [repeat([i], count) for (i, count) in enumerate([7, 8, 6, 7, 7])]
group = vcat(group...)

tukey(data, group)
# pair t value p value
# 1: 2 1.85409 0.36302
# 1: 3 4.18754 0.00198
# 1: 4 7.07368 0.00000
# 1: 5 4.07273 0.00269
# 2: 3 2.53703 0.10906
# 2: 4 5.45158 0.00006
# 2: 5 2.35220 0.15676
# 3: 4 2.60863 0.09414
# 3: 5 0.27459 0.99868
# 4: 5 3.00096 0.03978

tukey(data, group, method = "Games-Howell")
# pair t value df p value
# 1: 2 1.72859 12.97639 0.45125
# 1: 3 4.21141 10.84628 0.01043
# 1: 4 7.50517 11.65358 0.00007
# 1: 5 4.08185 11.97449 0.01084
# 2: 3 2.43499 11.69245 0.17285
# 2: 4 5.48706 12.78706 0.00087
# 2: 5 2.24124 12.99984 0.22492
# 3: 4 2.83393 10.14001 0.10023
# 3: 5 0.28228 10.70743 0.99838
# 4: 5 3.26970 11.80872 0.04412

頑張ってます。クリックお願いします。

Julia に翻訳--122　平均値の多重比較，ライアン法，テューキー法

平均値の多重比較（ライアンの方法，テューキーの方法）
http://aoki2.si.gunma-u.ac.jp/R/m_multi_comp.html

#==========
Julia の修行をするときに，いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。

ファイル名: mmulticomp.jl　　関数名: mmulticomp

翻訳するときに書いたメモ

Rmath の qtukey() にバグがある。

qtukey(q::Number, nmeans::Number, df::Number, nranges::Number=1.0,
lower_tail::Bool=true, log_p::Bool=false) =
ccall((:qtukey ,libRmath), Float64,
(Float64, Float64, Float64, Float64, Int32, Int32),
p, nranges, nmeans, df, lower_tail, log_p)
end

# 最初の引数 'q::Number' は q ではなく，p である。すなわち，'p::Number'
# しかし，それを直しても lower_tail = false を指定すると NaN を返す。
# 回避策としては，
# qtukey(p, nmeans, df, false)
# を
# qtukey(1-p, nmeans, df)
# とする。

==========#

using Rmath, Roots, Printf

function mmulticomp(n, me, s,; alpha = 0.05, method = "ryan") # or method = "tukey"
k = length(n)
all(k == length(me) && k == length(s) && all(n .> 0) && all(floor.(n) .== n) && all(s .> 0)) || error("parameter error")
o = sortperm(me)
sn = n[o]
sm = me[o]
ss = s[o]
nt = sum(sn)
mt = sum(sn .* sm) / nt
dfw = nt - k
vw = sum((sn .- 1) .* ss .^ 2) / dfw
numsignificant = nsn = 0
nss = zeros(Int, k * (k - 1) ÷ 2)
nsb = zeros(Int, k * (k - 1) ÷ 2)
for m = k:-1:2
for small = 1:k - m + 1
big = small + m - 1
if check(small, big, nsn, nss, nsb)
t0 = (sm[big] - sm[small]) / sqrt(vw * (1 / sn[big] + 1 / sn[small]))
if method == "ryan"
P = pt(t0, dfw, false) * 2
nominalalpha = 2 * alpha / (k * (m - 1))
result = P <= nominalalpha
else
t0 *= sqrt(2)
q = (qtukey(0.95, k, dfw) + qtukey(0.95, m, dfw)) / 2
WSD = q * sqrt(vw * (1 / sn[big] + 1 / sn[small]) / 2)
P = find_zero(p -> ((qtukey(1 - p, k, dfw) +
qtukey(1 - p, m, dfw)) / 2 - t0), [0, 1], Bisection())
result = sm[big] - sm[small] >= WSD
end
if result
numsignificant = 1
@printf("mean[ % 2i] = %7.5f vs mean[ % 2i] = %7.5f : diff = % 7.5f, ",
o[small], me[small], o[big], me[big], me[big] - me[small])
if method == "ryan"
@printf("t = %7.5f : P = %7.5f, alpha' = %7.5f\n", t0, P, nominalalpha)
else
@printf("WSD = %7.5f : t = %7.5f : P = %7.5f\n", WSD, t0, P)
end
else
nsn += 1
nss[nsn] = small
nsb[nsn] = big
end
end
end
end
if numsignificant == 0
print("Not significant at all")
end
end

function check(s, b, nsn, nss, nsb)
if nsn > 1
for i in 1:nsn
if (nss[i] <= s <= nsb[i]) && (nss[i] <= b <= nsb[i])
return false
end
end
end
return true
end

mmulticomp([8, 11, 22, 6], [135.83, 160.49, 178.35, 188.06], [19.59, 12.28, 15.01, 9.81], method = "ryan")
# mean[ 1] = 135.83000 vs mean[ 4] = 188.06000 : diff = 52.23000, t = 6.53866 : P = 0.00000, alpha' = 0.00833
# mean[ 1] = 135.83000 vs mean[ 3] = 178.35000 : diff = 42.52000, t = 6.96308 : P = 0.00000, alpha' = 0.01250
# mean[ 2] = 160.49000 vs mean[ 4] = 188.06000 : diff = 27.57000, t = 3.67279 : P = 0.00066, alpha' = 0.01250
# mean[ 1] = 135.83000 vs mean[ 2] = 160.49000 : diff = 24.66000, t = 3.58814 : P = 0.00085, alpha' = 0.02500
# mean[ 2] = 160.49000 vs mean[ 3] = 178.35000 : diff = 17.86000, t = 3.26998 : P = 0.00212, alpha' = 0.02500

mmulticomp([8, 11, 22, 6], [135.83, 160.49, 178.35, 188.06], [19.59, 12.28, 15.01, 9.81], method = "tukey")
# mean[ 1] = 135.83000 vs mean[ 4] = 188.06000 : diff = 52.23000, WSD = 21.34697 : t = 9.24706 : P = 0.00000
# mean[ 1] = 135.83000 vs mean[ 3] = 178.35000 : diff = 42.52000, WSD = 15.57114 : t = 9.84728 : P = 0.00000
# mean[ 2] = 160.49000 vs mean[ 4] = 188.06000 : diff = 27.57000, WSD = 19.14118 : t = 5.19411 : P = 0.00258
# mean[ 1] = 135.83000 vs mean[ 2] = 160.49000 : diff = 24.66000, WSD = 16.11328 : t = 5.07440 : P = 0.00195
# mean[ 2] = 160.49000 vs mean[ 3] = 178.35000 : diff = 17.86000, WSD = 12.80554 : t = 4.62444 : P = 0.00482

頑張ってます。クリックお願いします。

Julia に翻訳--121　13日の金曜日

次の 13 日の金曜日はいつか？
https://blog.goo.ne.jp/r-de-r/e/f806d8152abc1740ad261335aa4618cc

python だと

def Zeller(y, m, d):
w = ['Sat', 'Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri']
if m == 1 or m ==2:
m += 12
y -= 1
C = y // 100
Y = y % 100
return w[(d + 26*(m+1)//10 + Y + Y//4 -2*C + C // 4) % 7]

for y in range(2021, 2030):
for m in range(1, 13):
if Zeller(y, m, 13) == 'Fri':
print(y, m)

であった。

Julia でも，移植は簡単だろうと，安易に以下のようにすると，間違った答えが出てしまう。
Python の // は Julia では ÷ なのは間違いないのだが。

function zeller(y, m, d)
w = ["Sat", "Sun", "Mon", "Tue", "Wed", "Thu", "Fri"]
if m == 1 | m ==2
m += 12
y -= 1
end
C = floor(Int, y / 100)
Y = y % 100
return w[((d + 26*(m+1) ÷ 10 + Y + Y ÷ 4 -2*C + C ÷ 4) % 7) + 1]
end

for y = 2021:2029
for m = 1:12
if zeller(y, m, 13) == "Fri"
println("$y, $m")
end
end
end

出力結果

2021, 8
2022, 5
2023, 2
2023, 10
2024, 1
2024, 9
2024, 12
2025, 6
2026, 3
2026, 11
2027, 8
2028, 2
2028, 10
2029, 4
2029, 7

デバッグ宜しく（答えを見るなら，スクロールしてね）

バグは
if m == 1 | m ==2
のところ。

ここは，ビット演算子ではなく論理演算子でなくてはならない。
つまり，
if m == 1 || m ==2

これを修正して，以下のようにすると，正しい答えが得られる。

function zeller(y, m, d)
w = ["Sat", "Sun", "Mon", "Tue", "Wed", "Thu", "Fri"]
if m == 1 || m ==2
m += 12
y -= 1
end
C = y ÷ 100
Y = y % 100
return w[((d + 26*(m+1) ÷ 10 + Y + Y ÷ 4 -2*C + C ÷ 4) % 7) + 1]
end

for y = 2021:2029
for m = 1:12
if zeller(y, m, 13) == "Fri"
println("$y, $m")
end
end
end

出力結果

2021, 8
2022, 5
2023, 1
2023, 10
2024, 9
2024, 12
2025, 6
2026, 2
2026, 3
2026, 11
2027, 8
2028, 10
2029, 4
2029, 7

頑張ってます。クリックお願いします。

Julia に翻訳--120　比率の多重比較，ライアン法，テューキー法

比率の多重比較（ライアンの方法，テューキーの方法）
http://aoki2.si.gunma-u.ac.jp/R/p_multi_comp.html

#==========
Julia の修行をするときに，いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。

ファイル名: pmulticomp.jl　　関数名: pmulticomp

翻訳するときに書いたメモ

Rmath の qtukey() にバグがある。

qtukey(q::Number, nmeans::Number, df::Number, nranges::Number=1.0,
lower_tail::Bool=true, log_p::Bool=false) =
ccall((:qtukey ,libRmath), Float64,
(Float64, Float64, Float64, Float64, Int32, Int32),
p, nranges, nmeans, df, lower_tail, log_p)
end

# 最初の引数 'q::Number' は q ではなく，p である。すなわち，'p::Number'
# しかし，それを直しても lower_tail = false を指定すると NaN を返す。
# 回避策としては，
# qtukey(p, nmeans, df, false)
# を
# qtukey(1-p, nmeans, df)
# とする。

==========#

using Rmath, Printf

function pmulticomp(n, r; alpha = 0.05, method = "ryan") # or method = "tukey"
k = length(n)
all(k == length(r) && all(n .> 0) && all(r .>= 0) && all(r .<= n) &&
all(floor.(n) .== n) && all(floor.(r) .== r)) || error("parameter error")
o = sortperm(r ./ n)
sr = r[o]
sn = n[o]
srsn = sr ./ sn
numsignificant = nsn = 0
nss = zeros(Int, k * (k - 1) ÷ 2)
nsb = zeros(Int, k * (k - 1) ÷ 2)
# m = k; small = 1
q = NaN # dummy これがないと動かない
for m = k:-1:2
for small = 1:k - m + 1
big = small + m - 1
if check(small, big, nsn, nss, nsb)
prop = sum(sr[small:big]) / sum(sn[small:big])
se = sqrt(prop * (1 - prop) * (1 / sn[small] + 1 / sn[big]))
diff = srsn[big] - srsn[small]
if method == "ryan"
nominalalpha = 2 * alpha / (k * (m - 1))
rd = se * qnorm(nominalalpha / 2, false)
z = se == 0 ? 0 : diff / se
p = pnorm(z, false) * 2
result = p <= nominalalpha
else
if m == k
q = qtukey(1-alpha, k, Inf)
qq = q
else
qq = (q + qtukey(1-alpha, m, Inf)) / 2
end
WSD = se * qq / sqrt(2)
result = diff != 0 && diff >= WSD
end
if result
numsignificant = 1
@printf("p[%2i] = %7.5f vs p[%2i] = %7.5f : diff = %7.5f, ",
o[small], srsn[small], o[big], srsn[big], diff)
if method == "ryan"
@printf("RD = %7.5f : P = %7.5f, alpha' = %7.5f\n", rd, p, nominalalpha)
else
@printf("WSD = %7.5f\n", WSD)
end
else
nsn += 1
nss[nsn] = small
nsb[nsn] = big
end
end
end
end
if numsignificant == 0
println("Not significant at all")
end
end

function check(s, b, nsn, nss, nsb)
if nsn > 1
for i in 1:nsn
if (nss[i] <= s <= nsb[i]) && (nss[i] <= b <= nsb[i])
return false
end
end
end
return true
end

n = [30, 35, 47, 21, 45];
r = [2, 4, 14, 13, 39];

pmulticomp(n, r)
# p[ 1] = 0.06667 vs p[ 5] = 0.86667 : diff = 0.80000, RD = 0.32472 : P = 0.00000, alpha' = 0.00500
# p[ 1] = 0.06667 vs p[ 4] = 0.61905 : diff = 0.55238, RD = 0.33341 : P = 0.00001, alpha' = 0.00667
# p[ 2] = 0.11429 vs p[ 5] = 0.86667 : diff = 0.75238, RD = 0.30528 : P = 0.00000, alpha' = 0.00667
# p[ 1] = 0.06667 vs p[ 3] = 0.29787 : diff = 0.23121, RD = 0.23054 : P = 0.00979, alpha' = 0.01000
# p[ 2] = 0.11429 vs p[ 4] = 0.61905 : diff = 0.50476, RD = 0.32612 : P = 0.00007, alpha' = 0.01000
# p[ 3] = 0.29787 vs p[ 5] = 0.86667 : diff = 0.56879, RD = 0.26479 : P = 0.00000, alpha' = 0.01000
# p[ 3] = 0.29787 vs p[ 4] = 0.61905 : diff = 0.32118, RD = 0.29877 : P = 0.01239, alpha' = 0.02000

pmulticomp(n, r, method="tukey")
# p[ 1] = 0.06667 vs p[ 5] = 0.86667 : diff = 0.80000, WSD = 0.31555
# p[ 1] = 0.06667 vs p[ 4] = 0.61905 : diff = 0.55238, WSD = 0.32546
# p[ 2] = 0.11429 vs p[ 5] = 0.86667 : diff = 0.75238, WSD = 0.29800
# p[ 1] = 0.06667 vs p[ 3] = 0.29787 : diff = 0.23121, WSD = 0.22695
# p[ 2] = 0.11429 vs p[ 4] = 0.61905 : diff = 0.50476, WSD = 0.32104
# p[ 3] = 0.29787 vs p[ 5] = 0.86667 : diff = 0.56879, WSD = 0.26067
# p[ 3] = 0.29787 vs p[ 4] = 0.61905 : diff = 0.32118, WSD = 0.30102

頑張ってます。クリックお願いします。

Julia に翻訳--119　一対比較データの双対尺度法，西里