裏 RjpWiki

Julia ときどき R, Python によるコンピュータプログラム,コンピュータ・サイエンス,統計学

Julia に翻訳--204 数量化 II 類

2021年04月29日 | ブログラミング
#==========
Julia の修行をするときに,いろいろなプログラムを書き換えるのは有効な方法だ。
以下のプログラムを Julia に翻訳してみる。

数量化 II 類
http://aoki2.si.gunma-u.ac.jp/R/qt2.html

ファイル名: qt2.jl  関数名: qt2, printqt2, plotqt2

翻訳するときに書いたメモ

カテゴリーデータのまま入力できるようにしようかとも思ったが,止めた。

==========#

using StatsBase, LinearAlgebra, Rmath, NamedArrays, Plots, StatsPlots

function qt2(dat, group; vname=[], cname=[], gname=[])
    na = NamedArray
    function geneig(a, b)
        if size(a, 1) == 1
            return a / b, 1
        else
            values, vectors = eigen(b, sortby=x-> -x)
            g = Diagonal(1 ./ sqrt.(values))
            v = vectors
            values, vectors = eigen(g * v' * a * v * g, sortby=x-> -x)
            vectors = v * g * vectors
            return values, vectors
        end
    end

    nc, item = size(dat)
    index, n = table(group)
    ng = length(n)
    dat = hcat(dat, group)
    cat = vec(maximum(dat, dims=1))
    length(vname) > 0 || (vname = ["x$i" for i = 1:item])
    length(gname) > 0 || (gname = ["g$i" for i = 1:ng])
    if length(cname) == 0
        cname = []
        for i = 1:item
            append!(cname, ["$(vname[i]).$j" for j = 1:cat[i]])
        end
    end
    junjo = vcat(0, cumsum(cat)[1:end-1])
    nobe2 = sum(cat)
    nobe = nobe2 - ng
    dat2 = zeros(Int, nc, nobe2)
    for i = 1:nc
        x0 = []
        for j = 1:item + 1
            zeros0 = zeros(cat[j])
            zeros0[dat[i, j]] = 1
            append!(x0, zeros0)
        end
        dat2[i, :] = x0
    end
    a2 = zeros(Int, nobe2, nobe2, nc)
    for j = 1:nc
        a2[:, :, j] = dat2[j, :] .* dat2[j, :]'
    end
    x = sum(a2, dims=3)
    pcros = x[1:nobe, 1:nobe]
    gcros = x[(nobe + 1):nobe2, 1:nobe]
    w = (n * n') ./ nc
    [w[i, i] = 0 for i = 1:ng]
    grgr = Diagonal(n .- n .^ 2 / nc) - w
    w = diag(pcros)
    grpat = gcros - (n .* w') / nc
    pat = pcros - (w .* w') / nc
    select = (junjo .+ 1)[1:item]
    suf = trues(nobe)
    suf[select] .= false
    pat = pat[suf, suf]
    grpat = grpat[2:end, suf]
    r = grgr[2:end, 2:end]
    ndim = ng - 1
    axisname = ["Axis-$i" for i = 1:ndim]
    m = size(pat, 2)
    c = grpat * (pat \ grpat')
    values, vectors = geneig(c, r)
    w = sqrt.(values)' .\ (pat \ grpat' * vectors)
    a = zeros(nobe, ndim)
    ie = 0
    for j in 1:item
        is = ie + 1
        ie = is + cat[j] - 2
        offset = junjo[j] + 2
        a[offset:(offset + ie - is), :] = w[is:ie, :]
    end
    w = diag(pcros) .* a
    for j in 1:item
        is = junjo[j] + 1
        ie = is + cat[j] - 1
        s = sum(w[is:ie, :], dims=1) / nc
        a[is:ie, :] = a[is:ie, :] .- s
    end
    a = sqrt.(diag(a' * pcros * a) ./ nc)' .\ a
    samplescore = dat2[:, 1:nobe] * a
    centroid = n .* vcat(0, vectors)
    centroid = vcat(0, vectors) .- sum(centroid, dims=1) / nc
    centroid = sqrt.(vec(sum(centroid .^ 2 .* n, dims=1)) ./ nc ./ values)' .\ centroid
    item1 = item + 1
    partialcorr = zeros(item, ndim)
    for l = 1:ndim
        pat = zeros(item1, item1)
        pat[1, 1] = sum(n .* centroid[:, l] .^ 2)
        temp = sum(a[:, l] .* transpose(gcros .* centroid[:, l]), dims=2)
        is = junjo[1:end-1]
        ie = junjo[1:end-1] + cat[1:end-1]
        pat[1, 2:(item + 1)] = [sum(temp[is[k]+1:ie[k]]) for k = 1:length(is)]
        temp = (a[:, l] * a[:, l]') .* pcros
        for i = 1:item
            is = junjo[i] + 1
            ie = is + cat[i] - 1
            for j = 1:i
                pat[j + 1, i + 1] = sum(temp[is:ie, (junjo[j] + 1):(junjo[j] + cat[j])])
            end
        end
        d = diag(pat)
        pat = pat ./ sqrt.(d * d')
        pat = pat + transpose(pat)
        [pat[i, i] = 1 for i = 1:item1]
        pat = inv(pat)
        partialcorr[:, l] = -pat[2:item1, 1] ./ sqrt.(pat[1, 1] * diag(pat)[2:item1])
    end
    Dict(:nc => nc, :ndim => ndim, :group => group, :ng => ng, :categoryscore => a,
         :partialcorr => partialcorr, :centroid => centroid, :eta => values,
         :samplescore => samplescore, :vname => vname, :gname => gname,
         :cname => cname, :axisname => axisname)
end

function table(x) # indices が少ないとき
    indices = sort(unique(x))
    counts = zeros(Int, length(indices))
    for i in indexin(x, indices)
        counts[i] += 1
    end
    return indices, counts
end

function table(x, y) # 二次元
    indicesx = sort(unique(x))
    indicesy = sort(unique(y))
    counts = zeros(Int, length(indicesx), length(indicesy))
    for (i, j) in zip(indexin(x, indicesx), indexin(y, indicesy))
        counts[i, j] += 1
    end
    return indicesx, indicesy, counts
end

function printqt2(obj)
    axisname = obj[:axisname]
    println("\ncategory score\n", NamedArray(obj[:categoryscore], (obj[:cname], axisname)))
    println("\npartial correlation coefficient\n", NamedArray(obj[:partialcorr], (obj[:vname], axisname)))
    println("\nsample score\n", NamedArray(obj[:samplescore], (1:obj[:nc], axisname)))
    println("\ncentroids\n", NamedArray(obj[:centroid], (obj[:gname], axisname)))
    println("\neta\n", NamedArray(reshape(obj[:eta], 1, :), (["eta"], axisname)))
end

function plotqt2(obj; i = 1, j = 2, color=:blue, nclass = 20, which = "boxplot") # or "barplot" or "categoryscore"
    pyplot()
    if which == "categoryscore"
        categoryscore = obj[:categoryscore][end:-1:1, i]
        cname = [" $s " for s in obj[:cname][end:-1:1]]
        align = [c > 0 ? :right : :left for c in categoryscore]
        plt = bar(categoryscore, orientation=:h, grid=false,
                  yshowaxis=false, yticks=false,
                  xlabel=which, label="")
        annotate!.(0, 1:length(cname), text.(cname, align, 8))
        vline!([0], color=:black, label="")
    else
        group = obj[:group]
        if obj[:ndim] > 1
            xlabel, ylabel = obj[:axisname][[i, j]]
            grouplevels = obj[:gname]
            samplescore = obj[:samplescore]
            plt = scatter(samplescore[:, i], samplescore[:, j], grid=false,
                          tick_direction = :out, xlabel = xlabel,
                          ylabel = ylabel, color = color, markerstrokecolor=color,
                          label="")
        else
            if which == "boxplot"
                plt = boxplot(obj[:gname], obj[:samplescore][:, 1], grid=false,
                              tick_direction = :out, xlabel = "group",
                              ylabel = "sample score", label="")
            elseif which == "barplot"
                samplescore = obj[:samplescore][:, 1];
                minx, maxx = extrema(samplescore)
                w = (maxx - minx) / (nclass - 1)
                samplescore2 = floor.(Int, (samplescore .- minx) ./ w)
                index1, index2, res = table(samplescore2, group)
                plt = groupedbar(res, xlabel = "sample score", label="")
            end
        end
    end
    display(plt)
end

dat = [
    3  2
    3  1
    3  2
    2  1
    2  1
    3  1
    2  1
    1  2]

group = [1, 2, 2, 1, 2, 2, 1, 2]

obj = qt2(dat, group)
printqt2(obj)
#=====

category score
5×1 Named Matrix{Float64}
A ╲ B │   Axis-1
──────┼─────────
x1.1  │  2.31046
x1.2  │ -1.61032
x1.3  │ 0.630126
x2.1  │ 0.630126
x2.2  │ -1.05021

partial correlation coefficient
2×1 Named Matrix{Float64}
A ╲ B │   Axis-1
──────┼─────────
x1    │ 0.612372
x2    │ 0.421464

sample score
8×1 Named Matrix{Float64}
A ╲ B │    Axis-1
──────┼──────────
1     │ -0.420084
2     │   1.26025
3     │ -0.420084
4     │ -0.980196
5     │ -0.980196
6     │   1.26025
7     │ -0.980196
8     │   1.26025

centroids
2×1 Named Matrix{Float64}
A ╲ B │    Axis-1
──────┼──────────
g1    │ -0.793492
g2    │  0.476095

eta
1×1 Named Matrix{Float64}
A ╲ B │   Axis-1
──────┼─────────
eta   │ 0.377778
=====#


x1 = [3, 3, 3, 2, 2, 3, 2, 1];
x2 = [2, 1, 2, 1, 1, 1, 1, 2];
group = [1, 2, 2, 1, 2, 3, 3, 3];
dat = hcat(x1, x2);

obj = qt2(dat, group)
printqt2(obj)
#=====
category score
5×2 Named Matrix{Float64}
A ╲ B │     Axis-1      Axis-2
──────┼───────────────────────
x1.1  │    3.14099    0.267697
x1.2  │  -0.917694     1.19527
x1.3  │ -0.0969778   -0.963377
x2.1  │   0.712515   -0.655608
x2.2  │   -1.18752     1.09268

partial correlation coefficient
2×2 Named Matrix{Float64}
A ╲ B │   Axis-1    Axis-2
──────┼───────────────────
x1    │ 0.630249  0.239844
x2    │ 0.513893  0.203824

sample score
8×2 Named Matrix{Float64}
A ╲ B │    Axis-1     Axis-2
──────┼─────────────────────
1     │   -1.2845   0.129304
2     │  0.615537   -1.61899
3     │   -1.2845   0.129304
4     │ -0.205179   0.539662
5     │ -0.205179   0.539662
6     │  0.615537   -1.61899
7     │ -0.205179   0.539662
8     │   1.95347    1.36038

centroids
3×2 Named Matrix{Float64}
A ╲ B │    Axis-1     Axis-2
──────┼─────────────────────
g1    │ -0.744841   0.334483
g2    │ -0.291382  -0.316673
g3    │  0.787942  0.0936848

eta
1×2 Named Matrix{Float64}
A ╲ B │    Axis-1     Axis-2
──────┼─────────────────────
eta   │  0.403355  0.0688667
=====#
plotqt2(obj)
plotqt2(obj, which="barplot")
plotqt2(obj, which="categoryscore") # savefig("fig1.png")

using RDatasets
iris = dataset("datasets", "iris");
data = Matrix(iris[:, 1:4]);
group = vcat(repeat([1], 50), repeat([2], 50), repeat([3], 50)...);

for i = 1:4
    q = quantile(data[:, i])
    for k = 1:150
        x = data[k, i]
        for j = 2:5
            if x <= q[j]
                data[k, i] = j-1
                break
            end
        end
    end
end
obj = qt2(Int.(data), group)
color = vcat(repeat([:blue], 50), repeat([:black], 50), repeat([:red], 50)...);
plotqt2(obj, color=color) # savefig("fig2.png")
コメント    この記事についてブログを書く
  • X
  • Facebookでシェアする
  • はてなブックマークに追加する
  • LINEでシェアする
« Julia の小ネタ--022 5 ^ 10... | トップ | Julia に翻訳--205 量化 III 類 »
最新の画像もっと見る

コメントを投稿

ブログラミング」カテゴリの最新記事