XPath test
amachang の XPath テストコードを HTML::TreeBuilder::XPath で試してみる。 - TokuLog 改め だまってコードを書けよハゲを参考にコード書いてみたけどわかんなくてベタ書き。
XPath Testでfirebugコンソールでdataをperlの配列に整形して貼って、テスト二つ追加した。
関数が苦手っぽい。processing instruction, commentは使わないからこけてても別にいい。
*ng .//blockquote/comment() expected: c:blockquoteComment result : *ng .//blockquote/processing-instruction() expected: p:pi result : *ng .//blockquote/processing-instruction("pi") expected: p:pi result : *ng .//blockquote/node() expected: c:blockquoteComment t:blockquoteText1: br t:blockquoteText2 p p:pi font result : t:blockquoteText1: br t:blockquoteText2 p font *ng .//blockquote/descendant::*[position() < 4] expected: br p del result : br del ins *xpath evaluation failed. id(.//font/@face) Can't locate object method "look_down" via package "HTML::TreeBuilder::XPath::Attribute" at /Library/Perl/5.8.6/HTML/TreeBuilder/XPath.pm line 34. *ng .//blockquote/descendant::*[4] expected: ins result : p *ng .//blockquote/descendant-or-self::*[4] expected: del result : p *ng .//blockquote/preceding::*[4] expected: strong result : h1 *xpath evaluation failed. .//*[substring(.,2,1) = "u"] substr outside of string at /Library/Perl/5.8.6/XML/XPathEngine/Function.pm line 246. *xpath evaluation failed. .//*[substring(.,2) = "up"] substr outside of string at /Library/Perl/5.8.6/XML/XPathEngine/Function.pm line 249. *xpath evaluation failed. .//*[lang("it")] axis axis_attribute not implemented [Can't locate object method "getNamespace" via package "HTML::TreeBuilder::XPath::Attribute" at /Library/Perl/5.8.6/XML/XPathEngine.pm line 177. ] *ng substring-after(id("n13")/@id,"n") expected: 13 result : *ng count(.//*) expected: 13 result :
package t::Amachang; use strict; use warnings; use HTML::TreeBuilder::XPath; use List::MoreUtils qw/any/; use FindBin; use File::Spec; use Test::More; # if ($context ne '/') { # my ($newtree,) = $tree->findnodes($context); # ok $newtree; # $tree = $newtree; # } my @tests = ( {context=>'',xpath=>q{.//blockquote/*},expected=>q{br p font}}, {context=>'',xpath=>q{.//blockquote/child::*},expected=>q{br p font}}, {context=>'',xpath=>q{.//blockquote/parent::*},expected=>q{center}}, {context=>'',xpath=>q{.//blockquote/descendant::*},expected=>q{br p del ins font}}, {context=>'',xpath=>q{.//blockquote/descendant-or-self::*},expected=>q{blockquote br p del ins font}}, {context=>'',xpath=>q{.//blockquote/ancestor::*},expected=>q{html body div center}}, {context=>'',xpath=>q{.//blockquote/ancestor-or-self::*},expected=>q{html body div center blockquote}}, {context=>'',xpath=>q{.//blockquote/following-sibling::*},expected=>q{h3 h4}}, {context=>'',xpath=>q{.//blockquote/preceding-sibling::*},expected=>q{h1 h2}}, {context=>'',xpath=>q{.//blockquote/following::*},expected=>q{h3 dfn a h4 sub sup span abbr q}}, {context=>'',xpath=>q{.//blockquote/preceding::*},expected=>q{head title script dl dt dd h1 em strong h2 b s}}, {context=>'',xpath=>q{.//blockquote/self::*},expected=>q{blockquote}}, {context=>'',xpath=>q{.//blockquote/attribute::id/parent::*},expected=>q{blockquote}}, {context=>'',xpath=>q{.//blockquote/@id/parent::*},expected=>q{blockquote}}, {context=>'',xpath=>q{.//*[blockquote]},expected=>q{center}}, {context=>'',xpath=>q{.//*[child::blockquote]},expected=>q{center}}, {context=>'',xpath=>q{.//*[parent::blockquote]},expected=>q{br p font}}, {context=>'',xpath=>q{.//*[descendant::blockquote]},expected=>q{div center}}, {context=>'',xpath=>q{.//*[descendant-or-self::blockquote]},expected=>q{div center blockquote}}, {context=>'',xpath=>q{.//*[ancestor::blockquote]},expected=>q{br p del ins font}}, {context=>'',xpath=>q{.//*[ancestor-or-self::blockquote]},expected=>q{blockquote br p del ins font}}, {context=>'',xpath=>q{.//*[following-sibling::blockquote]},expected=>q{h1 h2}}, {context=>'',xpath=>q{.//*[preceding-sibling::blockquote]},expected=>q{h3 h4}}, {context=>'',xpath=>q{.//*[following::blockquote]},expected=>q{dl dt dd h1 em strong h2 b s}}, {context=>'',xpath=>q{.//*[preceding::blockquote]},expected=>q{h3 dfn a h4 sub sup span abbr q}}, {context=>'',xpath=>q{.//*[self::blockquote]},expected=>q{blockquote}}, {context=>'',xpath=>q{.//*[@id]},expected=>q{div dl dt dd center h1 em strong h2 b s blockquote br p del ins font h3 dfn a h4 sub sup span abbr q}}, {context=>'',xpath=>q{.//*[attribute::id]},expected=>q{div dl dt dd center h1 em strong h2 b s blockquote br p del ins font h3 dfn a h4 sub sup span abbr q}}, {context=>'',xpath=>q{.//blockquote/text()},expected=>q{t:blockquoteText1: t:blockquoteText2}}, {context=>'',xpath=>q{.//blockquote/comment()},expected=>q{c:blockquoteComment}}, {context=>'',xpath=>q{.//blockquote/processing-instruction()},expected=>q{p:pi}}, {context=>'',xpath=>q{.//blockquote/processing-instruction("pi")},expected=>q{p:pi}}, {context=>'',xpath=>q{.//blockquote/node()},expected=>q{c:blockquoteComment t:blockquoteText1: br t:blockquoteText2 p p:pi font}}, {context=>'',xpath=>q{.//blockquote/p},expected=>q{p}}, {context=>'',xpath=>q{.//blockquote/*},expected=>q{br p font}}, {context=>'',xpath=>q{.//*[child::* and preceding::font]},expected=>q{h3 h4 span}}, {context=>'',xpath=>q{.//*[not(child::*) and preceding::font]},expected=>q{dfn a sub sup abbr q}}, {context=>'',xpath=>q{.//*[preceding::blockquote or following::blockquote]},expected=>q{dl dt dd h1 em strong h2 b s h3 dfn a h4 sub sup span abbr q}}, {context=>'',xpath=>q{.//blockquote/ancestor::* | .//blockquote/descendant::*},expected=>q{html body div center br p del ins font}}, {context=>'',xpath=>q{.//*[.="sub"]},expected=>q{sub}}, {context=>'',xpath=>q{.//*[@title > 12 and @class < 15]},expected=>q{br p del ins font}}, {context=>'',xpath=>q{.//*[@title != @class]},expected=>q{div dl dt dd center em strong b s blockquote br p del ins font dfn a sub sup span abbr q}}, {context=>'',xpath=>q{.//*[((@class * @class + @title * @title) div (@class + @title)) > ((@class - @title) * (@class - @title))]},expected=>q{dl h1 h2 s blockquote br p font h3 dfn a h4 sub sup span abbr q}}, {context=>'',xpath=>q{.//*[@title mod 2 = 0]},expected=>q{dl dd h1 strong b blockquote p ins h3 a sub span q}}, {context=>'',xpath=>q{.//blockquote/child::*[last()]},expected=>q{font}}, {context=>'',xpath=>q{.//blockquote/descendant::*[position() < 4]},expected=>q{br p del}}, {context=>'',xpath=>q{id(.//font/@face)},expected=>q{strong q}}, {context=>'',xpath=>q{.//*[name(.) = "sub"]},expected=>q{sub}}, {context=>'',xpath=>q{.//*[name() = "sub"]},expected=>q{sub}}, {context=>'',xpath=>q{.//blockquote/child::*[2]},expected=>q{p}}, {context=>'',xpath=>q{.//blockquote/descendant::*[4]},expected=>q{ins}}, {context=>'',xpath=>q{.//blockquote/descendant-or-self::*[4]},expected=>q{del}}, {context=>'',xpath=>q{.//blockquote/ancestor::*[2]},expected=>q{div}}, {context=>'',xpath=>q{.//blockquote/ancestor-or-self::*[2]},expected=>q{center}}, {context=>'',xpath=>q{.//blockquote/following-sibling::*[1]},expected=>q{h3}}, {context=>'',xpath=>q{.//blockquote/preceding-sibling::*[1]},expected=>q{h2}}, {context=>'',xpath=>q{.//blockquote/following::*[4]},expected=>q{h4}}, {context=>'',xpath=>q{.//blockquote/preceding::*[4]},expected=>q{strong}}, {context=>'',xpath=>q{.//*[starts-with(.,"s")]},expected=>q{strong s h4 sub sup}}, {context=>'',xpath=>q{.//*[string(@title - 1) = "0"]},expected=>q{div}}, {context=>'',xpath=>q{.//*[string() = "sub"]},expected=>q{sub}}, {context=>'',xpath=>q{.//*[string(.) = "sub"]},expected=>q{sub}}, {context=>'',xpath=>q{.//*[concat(.,..) = "subsubsup"]},expected=>q{sub}}, {context=>'',xpath=>q{.//node()[concat(.,..,../..) = "bbbs"]},expected=>q{t:b}}, {context=>'',xpath=>q{.//*[starts-with(.,"s")]},expected=>q{strong s h4 sub sup}}, {context=>'',xpath=>q{.//*[substring-before(.,"u") = "s"]},expected=>q{h4 sub sup}}, {context=>'',xpath=>q{.//*[substring-after(.,"on") = "t"]},expected=>q{blockquote font}}, {context=>'',xpath=>q{.//*[substring(.,2,1) = "u"]},expected=>q{h4 sub sup}}, {context=>'',xpath=>q{.//*[substring(.,2) = "up"]},expected=>q{sup}}, {context=>'',xpath=>q{.//*[contains(.,"b")]},expected=>q{div center h2 b blockquote h4 sub span abbr}}, {context=>'',xpath=>q{.//*[string-length() = 3]},expected=>q{del ins dfn sub sup}}, {context=>'',xpath=>q{.//*[string-length(.) = 3]},expected=>q{del ins dfn sub sup}}, {context=>'',xpath=>q{.//*[.=translate(normalize-space(" s u b ")," ","")]},expected=>q{sub}}, {context=>'',xpath=>q{.//*[normalize-space()="q"]},expected=>q{q}}, {context=>'',xpath=>q{.//*[boolean(@title - 1) = false()]},expected=>q{div}}, {context=>'',xpath=>q{.//*[not(@title - 1) = true()]},expected=>q{div}}, {context=>'',xpath=>q{.//*[lang("it")]},expected=>q{q}}, {context=>'',xpath=>q{.//*[number(@title) < number(@class)]},expected=>q{div dl center blockquote span}}, {context=>'',xpath=>q{.//*[sum(ancestor::*/@title) < sum(descendant::*/@title)]},expected=>q{div dl center h1 h2 blockquote p h3 h4 span}}, {context=>'',xpath=>q{.//*[floor(@title div @class) = 1]},expected=>q{h1 em strong h2 b s br p del ins font h3 dfn a h4 sub sup abbr q}}, {context=>'',xpath=>q{.//*[ceiling(@title div @class) = 1]},expected=>q{div dl center h1 h2 blockquote h3 h4 span}}, {context=>'',xpath=>q{.//*[round(@title div @class) = 1]},expected=>q{dl h1 h2 b s blockquote br p del ins font h3 dfn a h4 sub sup span abbr q}}, {context=>'',xpath=>q{.//*[blockquote]},expected=>q{center}}, {context=>'',xpath=>q{substring-after(id("n13")/@id,"n")},expected=>q{13}}, {context=>'',xpath=>q{count(.//*)},expected=>q{13}}, ); my $html = <<'__HTML__'; <html ><head> <title>XPath Test</title> <script></script> </head ><body ><div id="n1" title="1" class="26" xml:lang="en" ><dl id="n2" title="2" class="3" ><dt id="n3" title="3" class="1">dt</dt ><dd id="n4" title="4" class="2">dd</dd ></dl ><center id="n5" title="5" class="22" ><h1 id="n6" title="6" class="6" ><em id="n7" title="7" class="4">em</em ><strong id="n8" title="8" class="5">strong</strong ></h1 ><h2 id="n9" title="9" class="9" ><b id="n10" title="10" class="7">b</b><s id="n11" title="11" class="8">s</s ></h2 ><blockquote id="n12" title="12" class="15" ><!--blockquoteComment-->blockquoteText1:<br id="n13" title="13" class="10" />blockquoteText2<p id="n14" title="14" class="13" ><del id="n15" title="15" class="11">del</del ><ins id="n16" title="16" class="12">ins</ins ></p><?pi name="value"?><font id="n17" title="17" class="14" face="n8 n26" >font</font ></blockquote ><h3 id="n18" title="18" class="18" ><dfn id="n19" title="19" class="16">dfn</dfn><a id="n20" title="20" class="17">a</a></h3 ><h4 id="n21" title="21" class="21" ><sub id="n22" title="22" class="19">sub</sub><sup id="n23" title="23" class="20">sup</sup></h4 ></center ><span id="n24" title="24" class="25" ><abbr id="n25" title="25" class="23" >abbr</abbr ><q id="n26" title="26" class="24" cite="n8 n17" xml:lang="it" >q</q ></span ></div ></body ></html> __HTML__ use Data::Dumper ; my $tree = HTML::TreeBuilder::XPath->new; $tree->parse($html); my @nodes = $tree->findnodes( '//body' ); my $body = shift @nodes; foreach my $t (@tests) { my $x = $t->{xpath}; my $lastOne; my $tags = (); my @nodes = eval { $body->findnodes($x, $body); }; if ( $@ ) { print "*xpath evaluation failed. $x\n"; print "$@\n"; next; } my $literal = eval { join " ", map { $lastOne = $_ ; ( $_->isa("HTML::TreeBuilder::XPath::TextNode") ) ? "t:" . $_->getValue : $_->tag; } @nodes; }; if ( $@ ) { print "*code failed.\n"; print "xpath $x\n"; print "$@\n"; print $t->{expected}; print Dumper $lastOne; print "\n"; exit; } my $b = ( $literal eq $t->{expected}); my $res = ( $b ? "ok" : "ng"); $b and next; print "*$res $x\n"; print "\texpected: $t->{expected}\n"; print "\tresult : $literal\n"; }