XPath test

amachang の XPath テストコードを HTML::TreeBuilder::XPath で試してみる。 - TokuLog 改め だまってコードを書けよハゲを参考にコード書いてみたけどわかんなくてベタ書き。

XPath Testfirebugコンソールでdataをperlの配列に整形して貼って、テスト二つ追加した。

関数が苦手っぽい。processing instruction, commentは使わないからこけてても別にいい。

*ng .//blockquote/comment()
        expected: c:blockquoteComment
        result  :
*ng .//blockquote/processing-instruction()
        expected: p:pi
        result  :
*ng .//blockquote/processing-instruction("pi")
        expected: p:pi
        result  :
*ng .//blockquote/node()
        expected: c:blockquoteComment t:blockquoteText1: br t:blockquoteText2 p p:pi font
        result  : t:blockquoteText1: br t:blockquoteText2 p font
*ng .//blockquote/descendant::*[position() < 4]
        expected: br p del
        result  : br del ins
*xpath evaluation failed. id(.//font/@face)
Can't locate object method "look_down" via package "HTML::TreeBuilder::XPath::Attribute" at /Library/Perl/5.8.6/HTML/TreeBuilder/XPath.pm line 34.

*ng .//blockquote/descendant::*[4]
        expected: ins
        result  : p
*ng .//blockquote/descendant-or-self::*[4]
        expected: del
        result  : p
*ng .//blockquote/preceding::*[4]
        expected: strong
        result  : h1
*xpath evaluation failed. .//*[substring(.,2,1) = "u"]
substr outside of string at /Library/Perl/5.8.6/XML/XPathEngine/Function.pm line 246.

*xpath evaluation failed. .//*[substring(.,2) = "up"]
substr outside of string at /Library/Perl/5.8.6/XML/XPathEngine/Function.pm line 249.

*xpath evaluation failed. .//*[lang("it")]
axis axis_attribute not implemented [Can't locate object method "getNamespace" via package "HTML::TreeBuilder::XPath::Attribute" at /Library/Perl/5.8.6/XML/XPathEngine.pm line 177.
]

*ng substring-after(id("n13")/@id,"n")
        expected: 13
        result  :
*ng count(.//*)
        expected: 13
        result  :
package t::Amachang;
use strict;
use warnings;
use HTML::TreeBuilder::XPath;
use List::MoreUtils qw/any/;
use FindBin;
use File::Spec;
use Test::More;

#    if ($context ne '/') {
#        my ($newtree,) = $tree->findnodes($context);
#        ok $newtree;
#        $tree = $newtree;
#    }

my @tests = (
{context=>'',xpath=>q{.//blockquote/*},expected=>q{br p font}},
{context=>'',xpath=>q{.//blockquote/child::*},expected=>q{br p font}},
{context=>'',xpath=>q{.//blockquote/parent::*},expected=>q{center}},
{context=>'',xpath=>q{.//blockquote/descendant::*},expected=>q{br p del ins font}},
{context=>'',xpath=>q{.//blockquote/descendant-or-self::*},expected=>q{blockquote br p del ins font}},
{context=>'',xpath=>q{.//blockquote/ancestor::*},expected=>q{html body div center}},
{context=>'',xpath=>q{.//blockquote/ancestor-or-self::*},expected=>q{html body div center blockquote}},
{context=>'',xpath=>q{.//blockquote/following-sibling::*},expected=>q{h3 h4}},
{context=>'',xpath=>q{.//blockquote/preceding-sibling::*},expected=>q{h1 h2}},
{context=>'',xpath=>q{.//blockquote/following::*},expected=>q{h3 dfn a h4 sub sup span abbr q}},
{context=>'',xpath=>q{.//blockquote/preceding::*},expected=>q{head title script dl dt dd h1 em strong h2 b s}},
{context=>'',xpath=>q{.//blockquote/self::*},expected=>q{blockquote}},
{context=>'',xpath=>q{.//blockquote/attribute::id/parent::*},expected=>q{blockquote}},
{context=>'',xpath=>q{.//blockquote/@id/parent::*},expected=>q{blockquote}},
{context=>'',xpath=>q{.//*[blockquote]},expected=>q{center}},
{context=>'',xpath=>q{.//*[child::blockquote]},expected=>q{center}},
{context=>'',xpath=>q{.//*[parent::blockquote]},expected=>q{br p font}},
{context=>'',xpath=>q{.//*[descendant::blockquote]},expected=>q{div center}},
{context=>'',xpath=>q{.//*[descendant-or-self::blockquote]},expected=>q{div center blockquote}},
{context=>'',xpath=>q{.//*[ancestor::blockquote]},expected=>q{br p del ins font}},
{context=>'',xpath=>q{.//*[ancestor-or-self::blockquote]},expected=>q{blockquote br p del ins font}},
{context=>'',xpath=>q{.//*[following-sibling::blockquote]},expected=>q{h1 h2}},
{context=>'',xpath=>q{.//*[preceding-sibling::blockquote]},expected=>q{h3 h4}},
{context=>'',xpath=>q{.//*[following::blockquote]},expected=>q{dl dt dd h1 em strong h2 b s}},
{context=>'',xpath=>q{.//*[preceding::blockquote]},expected=>q{h3 dfn a h4 sub sup span abbr q}},
{context=>'',xpath=>q{.//*[self::blockquote]},expected=>q{blockquote}},
{context=>'',xpath=>q{.//*[@id]},expected=>q{div dl dt dd center h1 em strong h2 b s blockquote br p del ins font h3 dfn a h4 sub sup span abbr q}},
{context=>'',xpath=>q{.//*[attribute::id]},expected=>q{div dl dt dd center h1 em strong h2 b s blockquote br p del ins font h3 dfn a h4 sub sup span abbr q}},
{context=>'',xpath=>q{.//blockquote/text()},expected=>q{t:blockquoteText1: t:blockquoteText2}},
{context=>'',xpath=>q{.//blockquote/comment()},expected=>q{c:blockquoteComment}},
{context=>'',xpath=>q{.//blockquote/processing-instruction()},expected=>q{p:pi}},
{context=>'',xpath=>q{.//blockquote/processing-instruction("pi")},expected=>q{p:pi}},
{context=>'',xpath=>q{.//blockquote/node()},expected=>q{c:blockquoteComment t:blockquoteText1: br t:blockquoteText2 p p:pi font}},
{context=>'',xpath=>q{.//blockquote/p},expected=>q{p}},
{context=>'',xpath=>q{.//blockquote/*},expected=>q{br p font}},
{context=>'',xpath=>q{.//*[child::* and preceding::font]},expected=>q{h3 h4 span}},
{context=>'',xpath=>q{.//*[not(child::*) and preceding::font]},expected=>q{dfn a sub sup abbr q}},
{context=>'',xpath=>q{.//*[preceding::blockquote or following::blockquote]},expected=>q{dl dt dd h1 em strong h2 b s h3 dfn a h4 sub sup span abbr q}},
{context=>'',xpath=>q{.//blockquote/ancestor::* | .//blockquote/descendant::*},expected=>q{html body div center br p del ins font}},
{context=>'',xpath=>q{.//*[.="sub"]},expected=>q{sub}},
{context=>'',xpath=>q{.//*[@title > 12 and @class < 15]},expected=>q{br p del ins font}},
{context=>'',xpath=>q{.//*[@title != @class]},expected=>q{div dl dt dd center em strong b s blockquote br p del ins font dfn a sub sup span abbr q}},
{context=>'',xpath=>q{.//*[((@class * @class + @title * @title) div (@class + @title)) > ((@class - @title) * (@class - @title))]},expected=>q{dl h1 h2 s blockquote br p font h3 dfn a h4 sub sup span abbr q}},
{context=>'',xpath=>q{.//*[@title mod 2 = 0]},expected=>q{dl dd h1 strong b blockquote p ins h3 a sub span q}},
{context=>'',xpath=>q{.//blockquote/child::*[last()]},expected=>q{font}},
{context=>'',xpath=>q{.//blockquote/descendant::*[position() < 4]},expected=>q{br p del}},
{context=>'',xpath=>q{id(.//font/@face)},expected=>q{strong q}},
{context=>'',xpath=>q{.//*[name(.) = "sub"]},expected=>q{sub}},
{context=>'',xpath=>q{.//*[name() = "sub"]},expected=>q{sub}},
{context=>'',xpath=>q{.//blockquote/child::*[2]},expected=>q{p}},
{context=>'',xpath=>q{.//blockquote/descendant::*[4]},expected=>q{ins}},
{context=>'',xpath=>q{.//blockquote/descendant-or-self::*[4]},expected=>q{del}},
{context=>'',xpath=>q{.//blockquote/ancestor::*[2]},expected=>q{div}},
{context=>'',xpath=>q{.//blockquote/ancestor-or-self::*[2]},expected=>q{center}},
{context=>'',xpath=>q{.//blockquote/following-sibling::*[1]},expected=>q{h3}},
{context=>'',xpath=>q{.//blockquote/preceding-sibling::*[1]},expected=>q{h2}},
{context=>'',xpath=>q{.//blockquote/following::*[4]},expected=>q{h4}},
{context=>'',xpath=>q{.//blockquote/preceding::*[4]},expected=>q{strong}},
{context=>'',xpath=>q{.//*[starts-with(.,"s")]},expected=>q{strong s h4 sub sup}},
{context=>'',xpath=>q{.//*[string(@title - 1) = "0"]},expected=>q{div}},
{context=>'',xpath=>q{.//*[string() = "sub"]},expected=>q{sub}},
{context=>'',xpath=>q{.//*[string(.) = "sub"]},expected=>q{sub}},
{context=>'',xpath=>q{.//*[concat(.,..) = "subsubsup"]},expected=>q{sub}},
{context=>'',xpath=>q{.//node()[concat(.,..,../..) = "bbbs"]},expected=>q{t:b}},
{context=>'',xpath=>q{.//*[starts-with(.,"s")]},expected=>q{strong s h4 sub sup}},
{context=>'',xpath=>q{.//*[substring-before(.,"u") = "s"]},expected=>q{h4 sub sup}},
{context=>'',xpath=>q{.//*[substring-after(.,"on") = "t"]},expected=>q{blockquote font}},
{context=>'',xpath=>q{.//*[substring(.,2,1) = "u"]},expected=>q{h4 sub sup}},
{context=>'',xpath=>q{.//*[substring(.,2) = "up"]},expected=>q{sup}},
{context=>'',xpath=>q{.//*[contains(.,"b")]},expected=>q{div center h2 b blockquote h4 sub span abbr}},
{context=>'',xpath=>q{.//*[string-length() = 3]},expected=>q{del ins dfn sub sup}},
{context=>'',xpath=>q{.//*[string-length(.) = 3]},expected=>q{del ins dfn sub sup}},
{context=>'',xpath=>q{.//*[.=translate(normalize-space("  s  u  b  ")," ","")]},expected=>q{sub}},
{context=>'',xpath=>q{.//*[normalize-space()="q"]},expected=>q{q}},
{context=>'',xpath=>q{.//*[boolean(@title - 1) = false()]},expected=>q{div}},
{context=>'',xpath=>q{.//*[not(@title - 1) = true()]},expected=>q{div}},
{context=>'',xpath=>q{.//*[lang("it")]},expected=>q{q}},
{context=>'',xpath=>q{.//*[number(@title) < number(@class)]},expected=>q{div dl center blockquote span}},
{context=>'',xpath=>q{.//*[sum(ancestor::*/@title) < sum(descendant::*/@title)]},expected=>q{div dl center h1 h2 blockquote p h3 h4 span}},
{context=>'',xpath=>q{.//*[floor(@title div @class) = 1]},expected=>q{h1 em strong h2 b s br p del ins font h3 dfn a h4 sub sup abbr q}},
{context=>'',xpath=>q{.//*[ceiling(@title div @class) = 1]},expected=>q{div dl center h1 h2 blockquote h3 h4 span}},
{context=>'',xpath=>q{.//*[round(@title div @class) = 1]},expected=>q{dl h1 h2 b s blockquote br p del ins font h3 dfn a h4 sub sup span abbr q}},
{context=>'',xpath=>q{.//*[blockquote]},expected=>q{center}},
{context=>'',xpath=>q{substring-after(id("n13")/@id,"n")},expected=>q{13}},
{context=>'',xpath=>q{count(.//*)},expected=>q{13}},

);

my $html = <<'__HTML__';
<html
><head>
  <title>XPath Test</title>
  <script></script>
</head
><body
        ><div id="n1" title="1" class="26" xml:lang="en"
                ><dl id="n2" title="2" class="3"
                        ><dt id="n3" title="3" class="1">dt</dt
                        ><dd id="n4" title="4" class="2">dd</dd
                ></dl
                ><center id="n5" title="5" class="22"
                        ><h1 id="n6" title="6" class="6"
                                ><em id="n7" title="7" class="4">em</em
                                ><strong id="n8" title="8" class="5">strong</strong
                        ></h1
                        ><h2 id="n9" title="9" class="9"
                                ><b id="n10" title="10" class="7">b</b><s id="n11" title="11" class="8">s</s
                        ></h2
                        ><blockquote id="n12" title="12" class="15"
                                ><!--blockquoteComment-->blockquoteText1:<br id="n13" title="13" class="10"
                                />blockquoteText2<p id="n14" title="14" class="13"
                                        ><del id="n15" title="15" class="11">del</del
                                        ><ins id="n16" title="16" class="12">ins</ins
                                ></p><?pi name="value"?><font id="n17" title="17" class="14" face="n8 n26"
                                        >font</font
                                ></blockquote
                        ><h3 id="n18" title="18" class="18"
                                ><dfn id="n19" title="19" class="16">dfn</dfn><a id="n20" title="20" class="17">a</a></h3
                        ><h4 id="n21" title="21" class="21"
                                ><sub id="n22" title="22" class="19">sub</sub><sup id="n23" title="23" class="20">sup</sup></h4
                        ></center
                ><span id="n24" title="24" class="25"
                        ><abbr id="n25" title="25" class="23"
                                >abbr</abbr
                        ><q id="n26" title="26" class="24" cite="n8 n17" xml:lang="it"
                                >q</q
                        ></span
                ></div
        ></body
></html>
__HTML__

use Data::Dumper ;
my $tree = HTML::TreeBuilder::XPath->new;
$tree->parse($html);

my @nodes = $tree->findnodes( '//body' );
my $body = shift @nodes;

foreach my $t (@tests) {
        my $x = $t->{xpath};
        my $lastOne;
        my $tags = ();
        my @nodes = eval {
                $body->findnodes($x, $body);
        };
        if ( $@ ) {
                print "*xpath evaluation failed. $x\n";
                print "$@\n";
                next;
        }

        my $literal = eval {
                join " ", map {
                        $lastOne = $_ ;

                        ( $_->isa("HTML::TreeBuilder::XPath::TextNode") ) ?  "t:" . $_->getValue : $_->tag;
                } @nodes;
        };
        if ( $@ ) {
                print "*code failed.\n";
                print "xpath $x\n";
                print "$@\n";
                print $t->{expected};
                print Dumper $lastOne;
                print "\n";
                exit;
        }

        my $b = ( $literal eq $t->{expected});
        my $res =  ( $b ? "ok" : "ng");
        $b and next;
        print "*$res $x\n";


        print "\texpected: $t->{expected}\n";
        print "\tresult  : $literal\n";

}