半角チルダ

ExcelVBA、その他。
覚え書きや、補足資料などのスクラップブック。
end-u(1037781)

■VBA c.oshiete.goo.ne.jp/profile/answer/history

2010-12-07 22:00:00 | scrap
昨日のネタフリでできたUserFormに以下。

'UserForm Module
Option Explicit

Private Const READYSTATE_COMPLETE As Long = 4
Private Const MX As Long = 10 '使用するWebBrowser数
Private Const cx As Long = 8 'データ取得用配列の列サイズ
'エラーor読み込みor位置 判断keyword
Private Const keyW1 = "非公開に設定されています"
Private Const keyW2 = "404 Not Found"
Private Const keyW3 = "<DIV class=ok_lq_qa_list_r>"
Private Const keyW4 = "<DIV id=paginater_history_answers>"
'RegExp用pattern
Private Const PTNd = "<SPAN id=answer_count>(\d+)</SPAN>"
Private Const PTN0 = "質問者:| -日付:| -回答数:|カテゴリ:|" _
          & "<STRONG>画像</STRONG>|<STRONG>絵</STRONG>"
Private Const PTN1 = ">([^<]+)<"
Private Const PTN2 = "<P class=qat><A href=""([^<]+)"">"

Private escFlg As Boolean 'Cancel用フラグ
Private reg  As Object 'VBScript.RegExp
Private buf() As String 'data取得用
'-------------------------------------------------
Private Sub main()
  Dim sUrL(0 To 5)    As String 'URL文字列
  Dim v(0 To 50, 0 To cx) As String '書き出し用配列
  Dim wFlg(1 To MX)    As Boolean 'WebBrowser稼動判定
  Dim dFlg  As Boolean 'LoopExit判定用
  Dim x    As Object 'HTMLelement
  Dim mc   As Object 'RegExp.Match
  Dim ret   As String 'innerHTMLチェック用
  Dim dat()  As String 'data取得用
  Dim i    As Long  'カウンタ
  Dim j    As Long  'カウンタ
  Dim k    As Long  'カウンタ
  Dim p    As Long  'InStr結果。文字検出位置
  Dim cnt   As Long  'countup用
  Dim pg   As Long  'Page数
  Dim ac   As Long  '回答数
  Dim wx   As Long  'Loop設定用
  Dim timeout As Single 'タイムアウト用
  Dim uid, page
  
  On Error GoTo exitLine
  
  timeout = Timer + 100 'タイムアウト設定
  'If MsgBox("Activesheetに書き出します。OK?", vbOKCancel) = vbNo Then Exit Sub
  '回答者ユーザー№
  uid = 510433 'Application.InputBox("user№", , xxxxxxx, type:=1)
  If VarType(uid) = vbBoolean Then Exit Sub

  'sUrL(0) = "http://bekkoame.okwave.jp"
  sUrL(0) = "http://c.oshiete.goo.ne.jp"
  sUrL(1) = "/profile/answer/history/u"
  sUrL(2) = CStr(uid)
  sUrL(3) = ".html?page="
  sUrL(4) = 1
  sUrL(5) = "#tabs"

  Set reg = CreateObject("VBScript.RegExp")
  reg.Global = True
  reg.Pattern = PTNd

  'WebBrowser1を使って回答数などの基本情報取得
  With Me.WebBrowser1
    .Navigate Join(sUrL, "")
    '待ち
    While .Busy Or (.ReadyState <> READYSTATE_COMPLETE)
      DoEvents
    Wend
    With .Document
      '待ち と回答数取得
      While (Not dFlg)
        DoEvents
        ret = .body.innerhtml
        If InStr(ret, keyW1) > 0 Then GoTo exitLine
        If InStr(ret, keyW2) > 0 Then GoTo exitLine
        Set mc = reg.Execute(ret)
        dFlg = (mc.Count > 0)
      Wend
      'ユーザー名
      For Each x In .getElementsByTagName("h3")
        v(cnt, 0) = x.innertext
        Exit For
      Next
      'UserForm1.Tagに文字"a"をセットした時だけ有効
      If Me.Tag = "a" Then
        '登録日やプロフィールなど取得
        cnt = 1
        For Each x In .getElementsByTagName("p")
          If cnt = 2 Then Exit For
          If x.innertext Like "登録日:*" Then
            v(cnt, 0) = x.innertext
            cnt = cnt + 1
          End If
        Next
        v(cnt, 0) = x.innertext
        For Each x In .getElementsByTagName("table")
          If x.classname = "ok_mypage_userdata" Or _
            x.classname = "ok_mypage_userprofile" Then
            For i = 0 To x.Rows.length - 1
              cnt = cnt + 1
              For j = 0 To x.Rows(i).Cells.length - 1
                v(cnt, j) = x.Rows(i).Cells(j).innertext
              Next
            Next
          End If
        Next
      End If
    End With
  End With

  '読み込みページ数をセット
  ac = CLng(mc(0).submatches(0))
  pg = (ac - 1) \ 20 + 1
  If Me.Tag = "a" Then
    page = Application.InputBox("page", , pg, type:=1)
    If (VarType(page) = vbBoolean) Or (page < 1) Then GoTo exitLine
    If pg > page Then
      pg = page
      ac = page * 20
    End If
  End If

  '配列サイズとLoop数調整
  ReDim dat(1 To pg) As String
  If pg > MX Then
    wx = MX
  Else
    wx = pg
  End If
  k = 0
  'WebBrowserLoopして初回読み込み開始
  For i = 1 To wx
    'Navigate
    With Me.Controls("WebBrowser" & i)
      k = k + 1
      sUrL(4) = CStr(k)
      .Tag = sUrL(4)
      .Navigate2 Join(sUrL, "")
      wFlg(i) = True
    End With
  Next
  '読み込み終わったWebBrowserから順次、次のページ読み込み
  Do
    dFlg = False
    For i = 1 To wx
      DoEvents
      If escFlg Then GoTo exitLine
      If timeout < Timer Then GoTo exitLine
      'innerHTML取得
      With Me.Controls("WebBrowser" & i)
        If wFlg(i) Then
          If (Not .Busy) And _
            (.ReadyState = READYSTATE_COMPLETE) Then
            ret = .Document.body.innerhtml
            If InStr(ret, keyW4) > 0 Then
              p = InStr(ret, keyW3)
              If p > 0 Then
                ret = Mid$(ret, p + Len(keyW3))
                p = InStr(ret, keyW4)
                dat(CLng(.Tag)) = Left$(ret, p - 1)
              End If
              If k = pg Then
                wFlg(i) = False
              Else
                k = k + 1
                sUrL(4) = CStr(k)
                .Tag = sUrL(4)
                .Navigate2 Join(sUrL, "")
              End If
            End If
          End If
        End If
      End With
    Next
    '終了チェック
    For i = 1 To wx
      If wFlg(i) Then
        dFlg = True
        Exit For
      End If
    Next
    If Not dFlg Then Exit Do
  Loop

  ReDim buf(1 To ac, 1 To cx) As String
  'RegExpでのデータ分割へ
  Call matchToV(Join(dat, ""), ac)

  With Sheets.Add 'ActiveSheet
    .UsedRange.ClearContents
    .Cells(1).Resize(cnt + 1, cx).Value = v
    .Cells(cnt + 2, 1).Resize(ac, cx).Value = buf
  End With

exitLine:
  Erase dat
  Erase buf
  Set mc = Nothing
  Set reg = Nothing
  Unload Me
  With Err()
    If .Number <> 0 Then
      MsgBox .Number & "::" & .Description
    End If
  End With
  Debug.Print Timer - (timeout - 100)
End Sub
'-------------------------------------------------
Private Sub matchToV(ByVal ret As String, ByVal ac As Long)
  Dim dic As Object 'Scriping.Dictionary
  Dim mc1 As Object 'RegExp.Match
  Dim mc2 As Object 'RegExp.Match
  Dim flg As Boolean '重複フラグ
  Dim key As String '重複チェックkey
  Dim x  As Long
  Dim y  As Long
  Dim i  As Long
  Dim j  As Long
  Dim k  As Long
  Dim idx As Long
  Dim cnt As Long

  reg.Pattern = "\r\n"
  ret = reg.Replace(ret, "")
  reg.Pattern = PTN0
  ret = reg.Replace(ret, "<>")
  reg.Pattern = PTN1
  Set mc1 = reg.Execute(ret)
  reg.Pattern = PTN2
  Set mc2 = reg.Execute(ret)
  Set dic = CreateObject("Scripting.Dictionary")
  x = 0
  y = 0
  k = ac
  For i = 1 To k
    flg = False
    key = mc2(y).submatches(0)
    y = y + 1
    If dic.exists(key) Then
      idx = dic(key)
      flg = mc1(x).submatches(0) <> "ベストアンサー"
    Else
      cnt = cnt + 1
      idx = cnt
      dic(key) = idx
      buf(idx, 1) = idx
      buf(idx, 2) = key
    End If

    If flg Then
      x = x + 6
    Else
      For j = 3 To cx
        buf(idx, j) = mc1(x).submatches(0)
        x = x + 1
      Next
    End If
  Next

  Set mc1 = Nothing
  Set mc2 = Nothing
  Set dic = Nothing
End Sub
'-------------------------------------------------
Private Sub CommandButton1_Click()
  escFlg = True
End Sub
'-------------------------------------------------
Private Sub UserForm_Activate()
  Call main
End Sub


With UserForm1
  .Tag = ""
  .Show
End With

With UserForm1
  .Tag = "a"
  .Show
End With
などとやって呼び出し。

使えなくなった■教えて!gooユーザープロフィール取得コードの代替。
1コのWebBrowserControlを回すよりちょっと速い。
もうちょっとコード整理したいと思いつつもまぁ..動いてるからいいか、なんて :(
Comment
  • X
  • Facebookでシェアする
  • はてなブックマークに追加する
  • LINEでシェアする