using HypothesisTests
★ 一標本 t 検定
# OneSampleTTest(v::AbstractVector{T<:Real}, μ0::Real = 0)
# using Random; Random.seed!(777)
# x = round.(10randn(20), digits=1);
x = [-2.3, -2.2, -12.9, -9.0, -10.5, -19.6, 0.1, 0.9, 5.4, -2.4, -6.0, 5.6, 0.5, -3.3, -14.8, -7.1, 10.1, -11.6, -16.1, -20.0];
using Statistics
obj1 = OneSampleTTest(x)
#=
One sample t-test
-----------------
Population details:
parameter of interest: Mean
value under h_0: 0
point estimate: -5.76
95% confidence interval: (-9.731, -1.789)
Test summary:
outcome with 95% confidence: reject h_0
two-sided p-value: 0.0068
Details:
number of observations: 20
t-statistic: -3.035933027748186
degrees of freedom: 19
empirical standard error: 1.89727505427625
=#
obj1.n # 20
obj1.xbar # -5.76
obj1.df # 19
obj1.stderr # 1.89727505427625
obj1.t # -3.035933027748186
obj1.μ0 # 0
pvalue(obj1) # 0.006798032384913854
confint(obj1) # (-9.731042326429021, -1.788957673570978)
☆ 二次データから検定
# OneSampleTTest(xbar::Real, stddev::Real, n::Int, μ0::Real = 0)
原データがあれば必要ないが,サンプルサイズと平均値と標準偏差しかわからないようなときに使う。
OneSampleTTest(mean(x), std(x), length(x))
#=
One sample t-test
-----------------
Population details:
parameter of interest: Mean
value under h_0: 0
point estimate: -5.76
95% confidence interval: (-9.731, -1.789)
Test summary:
outcome with 95% confidence: reject h_0
two-sided p-value: 0.0068
Details:
number of observations: 20
t-statistic: -3.035933027748186
degrees of freedom: 19
empirical standard error: 1.89727505427625
=#
★ 対応のある標本の t 検定
# OneSampleTTest(x::AbstractVector{T<:Real}, y::AbstractVector{T<:Real}, μ0::Real = 0)
内部で,2 つのベクトルの差をとって,OneSampleTTest(v::AbstractVector{T<:Real}, μ0::Real = 0) を呼ぶ。
# using Random; Random.seed!(777)
# x = round.(10randn(20), digits=1);
# y = round.(10randn(20), digits=1);
x = [-2.3, -2.2, -12.9, -9.0, -10.5, -19.6, 0.1, 0.9, 5.4, -2.4, -6.0, 5.6, 0.5, -3.3, -14.8, -7.1, 10.1, -11.6, -16.1, -20.0];
y = [0.2, 2.9, 2.4, 14.8, -15.1, -6.4, 5.3, 9.8, 0.3, 17.1, -4.5, 4.6, -0.8, 16.5, -8.0, -0.7, 0.9, -3.1, 16.6, -6.7];
obj2 = OneSampleTTest(x, y)
#=
One sample t-test
-----------------
Population details:
parameter of interest: Mean
value under h_0: 0
point estimate: -8.065
95% confidence interval: (-13.0169, -3.1131)
Test summary:
outcome with 95% confidence: reject h_0
two-sided p-value: 0.0029
Details:
number of observations: 20
t-statistic: -3.4088330999414715
degrees of freedom: 19
empirical standard error: 2.3659122531221826
=#
obj2.n # 20
obj2.xbar # -8.065000000000001
obj2.df # 19
obj2.stderr # 2.3659122531221826
obj2.t # -3.4088330999414715
obj2.μ0 # 0
pvalue(obj2) # 0.002945026367929675
confint(obj2) # (-13.01691125640409, -3.113088743595913)
★ 独立二標本の t 検定(等分散を仮定する)
# EqualVarianceTTest(x::AbstractVector{T<:Real}, y::AbstractVector{T<:Real})
# using Random; Random.seed!(888)
# x = round.(10randn(20), digits=1);
# y = round.(10randn(25), digits=1);
x = [-8.7, -10.3, -3.9, 10.5, -0.2, 25.7, -1.9, -6.9, 0.5, -7.0, -2.0, 4.3, 7.8, -5.1, 9.9, -12.1, -8.8, 7.7, 10.6, -4.2];
y = [18.3, 12.8, -6.5, 17.9, 12.3, 1.0, -0.1, 2.9, -2.2, -10.5, 3.9, 6.7, 4.1, 5.5, -11.6, -9.2, -4.1, 13.8, -1.3, -2.6, -7.3, -0.7, 1.3, -1.4, -15.5];
obj3 = EqualVarianceTTest(x, y)
#=
Two sample t-test (equal variance)
----------------------------------
Population details:
parameter of interest: Mean difference
value under h_0: 0
point estimate: -0.805
95% confidence interval: (-6.3666, 4.7566)
Test summary:
outcome with 95% confidence: fail to reject h_0
two-sided p-value: 0.7718
Details:
number of observations: [20,25]
t-statistic: -0.2919013756195737
degrees of freedom: 43
empirical standard error: 2.757780768560449
=#
obj3.n_x # 20
obj3.n_y # 25
obj3.xbar # -0.8050000000000002
obj3.df # 43
obj3.stderr # 2.757780768560449
obj3.t # -0.2919013756195737
obj3.μ0 # 0
pvalue(obj3) # 0.7717657306481219
confint(obj3) # (-6.36659496313637, 4.75659496313637)
★ 独立二標本の t 検定(等分散を仮定しない)
# UnequalVarianceTTest(x::AbstractVector{T<:Real}, y::AbstractVector{T<:Real})
obj4 = UnequalVarianceTTest(x, y)
#=
Two sample t-test (unequal variance)
------------------------------------
Population details:
parameter of interest: Mean difference
value under h_0: 0
point estimate: -0.805
95% confidence interval: (-6.4074, 4.7974)
Test summary:
outcome with 95% confidence: fail to reject h_0
two-sided p-value: 0.7730
Details:
number of observations: [20,25]
t-statistic: -0.2904053873841886
degrees of freedom: 40.00425561749165
empirical standard error: 2.771987142700746
=#
obj4.n_x # 20
obj4.n_y # 25
obj4.xbar # -0.8050000000000002
obj4.df # 40.00425561749165
obj4.stderr # 2.771987142700746
obj4.t # -0.2904053873841886
obj4.μ0 # 0
pvalue(obj4) # 0.7730062899251595
confint(obj4) # (-6.407376431491471, 4.79737643149147)
★ 二次データからの検定
一標本検定の場合には二次データからの検定関数が用意されているが,それ以外の場合にはない。
そこで,以下のような関数を定義しておく。
なお,検定結果の精度は分析に用いる二次データの精度に依存する。
output(obj) = println("t = $(obj.t), df = $(obj.df), p value = $(pvalue(obj))")
☆ 二次データを用いて対応のある標本の t 検定
それぞれの標本の平均値と標準偏差,サンプルサイズと標本間の相関係数が必要である。
function OneSampleTTest(mx::Real, sdx::Real, my::Real, sdy::Real, n::Int, r::Real, μ0::Real = 0)
stderr = sqrt((sdx^2 + sdy^2 - 2r*sqrt(sdx^2 * sdy^2)) / n)
xbar = mx - my
t = (xbar - μ0) / stderr
df = n - 1
OneSampleTTest(n, xbar, df, stderr, t, μ0)
end
Random.seed!(124)
x = randn(30);
y = randn(30);
a = OneSampleTTest(mean(x), std(x), mean(y), std(y), length(x), cor(x, y));
output(a) # t = -1.4421321433740857, df = 29, p value = 0.159978702161144
b = OneSampleTTest(x, y);
output(b) # t = -1.4421321433740855, df = 29, p value = 0.15997870216114402
☆ 二次データを用いて等分散を仮定する独立二標本の t 検定
各群の平均値,標準偏差,サンプルサイズが必要である。
function EqualVarianceTTest(mx::Real, sdx::Real, nx::Int, my::Real, sdy::Real, ny::Int, μ0::Real = 0)
xbar = mx - my
df = nx + ny - 2
stddev = sqrt(((nx - 1)*sdx^2 + (ny - 1)*sdy^2) / df)
stderr = stddev * sqrt(1/nx + 1/ny)
t = (xbar - μ0) / stderr
EqualVarianceTTest(nx, ny, xbar, df, stderr, t, μ0)
end
mx, sdx, nx, my, sdy, ny = 10.0, 2.0, 15, 12.0, 3.0, 30
c = EqualVarianceTTest(mx, sdx, nx, my, sdy, ny);
output(c) # t = -2.329349159719606, df = 43, p value = 0.024606293004729628
これが正しいことを示すには,以下の関数で mx, sdx, nx および my, sdy, ny となるデータ x, y を生成し,EqualVarianceTTest(x::AbstractVector{T<:Real}, y::AbstractVector{T<:Real}) を呼んでみればよい。
function gendat(mx, sdx, nx)
x = rand(nx) # どんな分布でもよい。極端な場合は x = 1:nx でもよい。
((x .- mean(x)) / std(x)) .* sdx .+ mx
end
d = EqualVarianceTTest(gendat(mx, sdx, nx), gendat(my, sdy, ny));
output(d) # t = -2.3293491597196025, df = 43, p value = 0.024606293004729822
☆ 二次データを用いて等分散を仮定しない独立二標本の t 検定
各群の平均値,標準偏差,サンプルサイズが必要である。
function UnequalVarianceTTest(mx::Real, sdx::Real, nx::Int, my::Real, sdy::Real, ny::Int, μ0::Real = 0)
xbar = mx - my
varx, vary = sdx^2, sdy^2
stderr = sqrt(varx/nx + vary/ny)
t = (xbar - μ0) / stderr
df = (varx/nx + vary/ny)^2 / ((varx/nx)^2 / (nx - 1) + (vary/ny)^2 / (ny - 1))
UnequalVarianceTTest(nx, ny, xbar, df, stderr, t, μ0)
end
mx, sdx, nx, my, sdy, ny = 10.0, 2.0, 15, 12.0, 3.0, 30
e = UnequalVarianceTTest(mx, sdx, nx, my, sdy, ny);
output(e) # t = -2.6568446566202857, df = 39.24214046822742, p value = 0.01134748068840573
f = UnequalVarianceTTest(gendat(mx, sdx, nx), gendat(my, sdy, ny));
output(f) # t = -2.6568446566202883, df = 39.242140468227426, p value = 0.011347480688405645
※コメント投稿者のブログIDはブログ作成者のみに通知されます