緩い解析でいいので、minidomで手を抜くことを考えます。
とりあえず、xmlファイルを読んでタグ名(要素名)と属性名を印字するプログラムを習作。
-----------------------------
import xml.dom.minidom as minidom
def dom_walk(node, enter_func, exit_func, recurse_predicate):
def dwi(node):
enter_func(node)
for childNode in filter(recurse_predicate, node.childNodes):
dwi(childNode)
exit_func(node)
dwi(node)
if __name__ == '__main__':
import sys
if len(sys.argv) == 1:
print "usage walkxml <somexmlfile.xml>"
sys.exit(0)
xmlFileName = sys.argv[1]
dom = minidom.parse(xmlFileName)
def enter_func(node):
if node.attributes:
print "<%s %s>" % (node.nodeName, " ".join(node.attributes.keys()))
else:
print "<%s>" % node.nodeName
def exit_func(node):
print "</%s>" % node.nodeName
def recurse_predicate(node):
return node.nodeName != "#text"
dom_walk(dom, enter_func, exit_func, recurse_predicate)
-----------------------------
とりあえず、xmlファイルを読んでタグ名(要素名)と属性名を印字するプログラムを習作。
-----------------------------
import xml.dom.minidom as minidom
def dom_walk(node, enter_func, exit_func, recurse_predicate):
def dwi(node):
enter_func(node)
for childNode in filter(recurse_predicate, node.childNodes):
dwi(childNode)
exit_func(node)
dwi(node)
if __name__ == '__main__':
import sys
if len(sys.argv) == 1:
print "usage walkxml <somexmlfile.xml>"
sys.exit(0)
xmlFileName = sys.argv[1]
dom = minidom.parse(xmlFileName)
def enter_func(node):
if node.attributes:
print "<%s %s>" % (node.nodeName, " ".join(node.attributes.keys()))
else:
print "<%s>" % node.nodeName
def exit_func(node):
print "</%s>" % node.nodeName
def recurse_predicate(node):
return node.nodeName != "#text"
dom_walk(dom, enter_func, exit_func, recurse_predicate)
-----------------------------