#==========
Julia の修行をするときに,いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。
総当たり法による重回帰分析
http://aoki2.si.gunma-u.ac.jp/R/All_possible_subset_selection.html
ファイル名: allpossiblesubsetselection.jl
関数名: allpossiblesubsetselection(dat; vname=[], limit = 10)
printallpossiblesubsetselection(obj; models = 20, sortby = "adj")
翻訳するときに書いたメモ
分析結果を並べ替えて表示する関数を別立てにしている。
==========#
using Statistics, Combinatorics, Printf
function allpossiblesubsetselection(dat; vname=[], limit = 10)
nv = size(dat, 2)
nv <= limit || error("独立変数が $limit 個以上である(多すぎる)。\nlimit 引数で変更できる。")
length(vname) != 0 || (vname = vcat(["x" * string(i) for i = 1:nv-1], "y"))
nv -= 1
n = 2 ^ nv
depname = vname[end]
indepname = vname[1:end-1]
indepdat = copy(dat[:, 1:nv])
formula = fill("", n);
results = zeros(n, 4);
for (i, j) in enumerate(Iterators.product([[0, 1] for i = 1:nv]...))
suf = [k == 1 for k in j]
str = indepname[suf]
dat1 = hcat(indepdat[:, suf], dat[:, end])
formula[i] = depname * " ~ " * join(str, " + ")
results[i, :] .= mreg(dat1)
end
res = Dict(:results => (results[2:end, :], formula[2:end]))
printallpossiblesubsetselection(res; models = 20, sortby = "rsq")
res
end
function mreg(dat) # 回帰分析を行い R2, R2a, AIC, Loglik を返す
n, nc = size(dat)
nv = nc - 1
r = cor(dat)
betas = r[1:end-1, 1:end-1] \ r[1:end-1, end]
Sr = sum(r[1:nv, nc] .* betas)
Se = 1 - Sr
Vy = var(dat[:, nc]) * (n - 1)
R2 = Sr
R2a = 1 - Se / (n - nv - 1) * (n - 1)
LogLik = -0.5n * (log(2π) + 1 - log(n) + log(Se * Vy))
AIC = 2nc + 2 - 2LogLik
[R2, R2a, AIC, LogLik]
end
function printallpossiblesubsetselection(obj; models = 20, sortby = "adj") # "rsq", "AIC", "loglik"
formula = obj[:results][2]
results = obj[:results][1]
key = indexin([sortby],["adj", "rsq", "AIC", "loglik"])[1]
o = sortperm(results[:, key], rev=(sortby!="AIC"))
results = results[o, :]
formula = formula[o]
@printf("\n R square Adjusted R square AIC log-likelihood Formula\n")
for i in 1:min(models, size(results, 1))
@printf(" %8.5f %13.5f %13.5f %13.5f %s\n",
results[i, 1], results[i, 2], results[i, 3], results[i, 4], formula[i])
end
end
using RDatasets
iris = dataset("datasets", "iris");
vname=["SepalLength", "SepalWidth", "PetalLength", "PetalWidth"];
dat = Array(iris[:, 1:4]);
res = allpossiblesubsetselection(dat, vname=vname)
#=
R square Adjusted R square AIC log-likelihood Formula
0.93785 0.93657 -63.50218 36.75109 PetalWidth ~ SepalLength + SepalWidth + PetalLength
0.92975 0.92879 -47.11940 27.55970 PetalWidth ~ SepalWidth + PetalLength
0.92902 0.92806 -45.58470 26.79235 PetalWidth ~ SepalLength + PetalLength
0.92711 0.92662 -43.59109 24.79555 PetalWidth ~ PetalLength
0.74293 0.73943 147.46929 -69.73464 PetalWidth ~ SepalLength + SepalWidth
0.66903 0.66679 183.37107 -88.68553 PetalWidth ~ SepalLength
0.13405 0.12820 327.64025 -160.82012 PetalWidth ~ SepalWidth
Dict{Symbol,Tuple{Array{Float64,2},Array{String,1}}} with 1 entry:
:results => ([0.669028 0.666791 183.371 -88.6855; 0.134048 0.128197 327.64 -1…
=#
printallpossiblesubsetselection(res, sortby="AIC")
#=
R square Adjusted R square AIC log-likelihood Formula
0.93785 0.93657 -63.50218 36.75109 PetalWidth ~ SepalLength + SepalWidth + PetalLength
0.92975 0.92879 -47.11940 27.55970 PetalWidth ~ SepalWidth + PetalLength
0.92902 0.92806 -45.58470 26.79235 PetalWidth ~ SepalLength + PetalLength
0.92711 0.92662 -43.59109 24.79555 PetalWidth ~ PetalLength
0.74293 0.73943 147.46929 -69.73464 PetalWidth ~ SepalLength + SepalWidth
0.66903 0.66679 183.37107 -88.68553 PetalWidth ~ SepalLength
0.13405 0.12820 327.64025 -160.82012 PetalWidth ~ SepalWidth
=#
printallpossiblesubsetselection(res, sortby="loglik")
#=
R square Adjusted R square AIC log-likelihood Formula
0.93785 0.93657 -63.50218 36.75109 PetalWidth ~ SepalLength + SepalWidth + PetalLength
0.92975 0.92879 -47.11940 27.55970 PetalWidth ~ SepalWidth + PetalLength
0.92902 0.92806 -45.58470 26.79235 PetalWidth ~ SepalLength + PetalLength
0.92711 0.92662 -43.59109 24.79555 PetalWidth ~ PetalLength
0.74293 0.73943 147.46929 -69.73464 PetalWidth ~ SepalLength + SepalWidth
0.66903 0.66679 183.37107 -88.68553 PetalWidth ~ SepalLength
0.13405 0.12820 327.64025 -160.82012 PetalWidth ~ SepalWidth
=#
※コメント投稿者のブログIDはブログ作成者のみに通知されます