#==========
Julia の修行をするときに,いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。
マン・ホイットニーの U 検定(exact test)
http://aoki2.si.gunma-u.ac.jp/R/exact-mw.html
ファイル名: exactmw.jl 関数名: exactmw
翻訳するときに書いたメモ
global は不要だった。
==========#
using Rmath, Printf
function exactmw(x, y = NaN; exact = true)
function found()
hh = abs(temp2 - sum(um[1, :] .* r))
if hh >= stat_val || abs(hh - stat_val) <= EPSILON
nprod = sum(perm_table[rt .+ 1]) - sum(perm_table[um .+ 1])
nntrue += exp(nprod - nntrue2 * log_expmax)
while nntrue >= EXPMAX
nntrue /= EXPMAX
nntrue2 += 1
end
end
ntables += 1
end
function search(x, y)
if y == 1
found()
elseif x == 1
t = um[1, 1] - um[y, 1]
if t >= 0
um[1, 1] = t
search(nc, y - 1)
um[1, 1] += um[y, 1]
end
else
search(x - 1, y)
while um[y, 1] != 0 && um[1, x] != 0
um[y, x] += 1
um[y, 1] -= 1
um[1, x] -= 1
search(x - 1, y)
end
um[y, 1] += um[y, x]
um[1, x] += um[y, x]
um[y, x] = 0
end
end
function exacttest()
denom2 = 0
denom = perm_table[n + 1] - sum(perm_table[ct .+ 1])
while denom > log_expmax
denom -= log_expmax
denom2 += 1
end
denom = exp(denom)
um[:, 1] = rt
um[1, :] = ct
search(nc, nr)
@printf("正確な P 値 = % .10g\n", nntrue / denom * EXPMAX ^ (nntrue2 - denom2))
@printf("査察した分割表の個数は % s 個\n", ntables)
end
function asymptotic()
z = stat_val / sqrt(n1n2 / (n * (n - 1)) * (n ^ 3 - n - sum(ct .^ 3 .- ct)) / 12)
@printf("U = % g, Z = % g, P 値 = % g\n", n1n2 / 2 - stat_val, z, pnorm(z, false) * 2)
end
if ndims(x) == 2
t = x
else
nx = length(x)
ny = length(y)
indexx, indexy, t = table(rep(1:2, [nx, ny]), vcat(x, y))
end
EPSILON = 1e-10
EXPMAX = 1e100
log_expmax = log(EXPMAX)
nr, nc = size(t)
nr == 2 || error("nrow(table) wasn't 2")
um = zeros(Int, nr, nc)
rt = sum(t, dims=2)
ct = transpose(sum(t, dims=1))
n1 = rt[1]
n2 = rt[2]
n1n2 = n1 * n2
n = n1 + n2
r = cumsum(vcat(0, ct[1:nc-1])) .+ (ct .+ 1) ./ 2
temp = n1n2 + n1 * (n1 + 1) / 2
temp2 = temp - n1n2 / 2
stat_val = abs(temp2 - sum(t[1, :] .* r))
asymptotic()
if exact
perm_table = cumsum(vcat(0, log.(1:n .+ 1)))
ntables = nntrue = nntrue2 = 0
exacttest()
end
end
function rep(x, n::Array{Int64,1})
length(x) == length(n) || error("length(x) wasn't length(n)")
vcat([repeat([x[i]], n[i]) for i = 1:length(x)]...)
end
function table(x, y) # 二次元
indicesx = sort(unique(x))
indicesy = sort(unique(y))
counts = zeros(Int, length(indicesx), length(indicesy))
for (i, j) in zip(indexin(x, indicesx), indexin(y, indicesy))
counts[i, j] += 1
end
return indicesx, indicesy, counts
end
x = [5 3 2 1
2 3 1 2];
exactmw(x)
# U = 33.5, Z = 0.907299, P 値 = 0.364248
# 正確な P 値 = 0.3837421608
# 査察した分割表の個数は 91 個
2 つのデータベクトルが与えられる場合
x = [78, 64, 75, 45, 82];
y = [110, 70, 53, 53];
exactmw(x, y)
[1 0 1 0 1 1 1 0; 0 2 0 1 0 0 0 1]
# U = 9, Z = 0.245976, P 値 = 0.805701
# 正確な P 値 = 0.8492063492
# 査察した分割表の個数は 91 個