#==========
Julia の修行をするときに,いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。
度数分布表から基礎統計量
http://aoki2.si.gunma-u.ac.jp/R/basic-stat.html
ファイル名: basicstat.jl 関数名: basicstat
翻訳するときに書いたメモ
g1, g2 は不偏ではない方なので,G1, G2 の変数名に直した。
==========#
function basicstat(x, f; verbose=true)
w = x[2] - x[1]
x .+= (w / 2)
n = sum(f)
m = sum(f .* x) / n
v = sum(f .* (x .- m) .^2) / (n-1)
SD = sqrt(v)
CV = SD / m * 100
G1 = sum(f .* (x .- m) .^ 3) * (n/(n-1)/(n-2)/SD^3)
G2 = sum(f .* (x .- m) .^ 4) * (n*(n+1)/(n-1)/(n-2)/(n-3)/SD^4) - 3*(n-1)^2/(n-2)/(n-3)
println("標本の大きさ = $n")
println("算術平均値 = $m")
println("不偏分散 = $v")
println("標準偏差 = $SD")
println("歪度 = $G1")
println("尖度 = $G2")
println("変動係数 = $CV")
Dict(:n => n, :mean => m, :variance => v, :sd => SD, :G1 => G1, :G2 => G2, :CV => CV)
end
x = collect(59.5:8:171.5);
f = [2, 6, 39, 385, 888, 1729, 2240, 2007, 1233, 641, 201, 74, 14, 5, 1];
basicstat(x, f)
# 標本の大きさ = 9465
# 算術平均値 = 113.89957739038563
# 不偏分散 = 184.8010141916274
# 標準偏差 = 13.594153676916683
# 歪度 = 0.18935593848947097
# 尖度 = 0.08913261364986402
# 変動係数 = 11.935209935260199
# Dict{Symbol,Real} with 7 entries:
# :G2 => 0.0891326
# :CV => 11.9352
# :mean => 113.9
# :n => 9465
# :G1 => 0.189356
# :variance => 184.801
# :sd => 13.5942
function rep(x, y)
length(y) != length(x) && (y = repeat([y], length(x)))
b = repeat([x[1]], y[1])
for i = 2:length(x)
append!(b, repeat([x[i]], y[i]))
end
b
end
z = rep(x, f);
length(z)
using Statistics
mean(z) # 113.89957739038563
var(z) # 184.8010141916273
std(z) # 13.594153676916681
scale(x; corrected=true) = (x .- mean(x)) ./ std(x; corrected)
function kurt(x; corrected = true)
n = length(x)
return corrected ?
n*(n+1)*sum(scale(x) .^ 4)/(n-1)/(n-2)/(n-3)-3*(n-1)^2/(n-2)/(n-3) :
sum(scale(x; corrected).^4)/n-3
end
function skew(x; corrected = true)
n = length(x)
return corrected ?
n*sum(scale(x) .^ 3)/(n-1)/(n-2) :
sum(scale(x; corrected) .^3) / n
end
skew(z) # 0.18935593848947108; G1
skew(z, corrected=false) # 0.18932592830327385; g1
kurt(z) # 0.08913261364986402; G2
kurt(z, corrected=false) # 0.08845168778550017; g2
using StatsBase
skewness(z) # 0.1893259283033084 biased
kurtosis(z) # 0.08845168778591805 biased
using RCall
R"""
options(digits=16)
library(e1071)
cat("skewness, type1 g1 ", skewness($z, type=1), "\n")
cat("skewness, type2 G1 ", skewness($z, type=2), "\n")
cat("skewness, type3 b1 ", skewness($z, type=3), "\n")
cat("kurtosis, type1 g2 ", kurtosis($z, type=1), "\n")
cat("kurtosis, type2 G2 ", kurtosis($z, type=2), "\n")
cat("kurtosis, type3 b2 ", kurtosis($z, type=3), "\n")
"""
# skewness, type1 g1 0.1893259283032742
# skewness, type2 G1 0.189355938489471
# skewness, type3 b1 0.1892959249867083
# kurtosis, type1 g2 0.08845168778549573
# kurtosis, type2 G2 0.08913261364986394
# kurtosis, type3 b2 0.0877991175717372