############################################################################ # Copyright (c) 1998 Enno Derksen # All rights reserved. # This program is free software; you can redistribute it and/or modify it # under the same terms as Perl itself. ############################################################################ # # Extra functionality that is not part of the XQL spec # package XML::XQL; use strict; BEGIN { die "don't use/require XML::XQL::Plus, either use/require XML::XQL or XML::XQL::Strict" unless $XML::XQL::Included; }; defineComparisonOperators ( "=~" => \&XML::XQL::match_oper, "!~" => \&XML::XQL::no_match_oper, "match" => \&XML::XQL::match_oper, "no_match" => \&XML::XQL::no_match_oper, "isa" => \&XML::XQL::isa_oper, "can" => \&XML::XQL::can_oper, ); sub match_oper { my ($node, $expr) = @_; return [] if isEmptyList ($node); #?? can this happen? my $str = $node->xql_toString; $expr = prepareRvalue ($expr->solve ([$node])); return [] if isEmptyList ($expr); #?? can this happen? $expr = $expr->xql_toString; croak "bad search pattern '$expr' for =~" unless $expr =~ m!^\s*[m/]!o; my $res = eval "\$str =~ $expr"; croak "bad search pattern '$expr' for =~ operator: $@" if ($@); $res; } sub no_match_oper { my ($node, $expr) = @_; return [] if isEmptyList ($node); #?? can this happen? my $str = $node->xql_toString; $expr = prepareRvalue ($expr->solve ([$node])); return [] if isEmptyList ($expr); #?? can this happen? $expr = $expr->xql_toString; croak "bad search pattern '$expr' for !~" unless $expr =~ m!^\s*[m/]!o; my $res = eval "\$str !~ $expr"; croak "bad search pattern '$expr' for !~ operator: $@" if ($@); $res; } sub isa_oper { my ($node, $expr) = @_; return [] if isEmptyList ($node); #?? can this happen? $expr = prepareRvalue ($expr->solve ([$node])); return [] if isEmptyList ($expr); #?? can this happen? $expr = $expr->xql_toString; # Expand "number" to "XML::XQL::Number" etc. $expr = expandType ($expr); #?? I don't think empty lists are possible here. If so, add "[]" as expr ref($node) and $node->isa ($expr); } # # Not sure how useful this is, unless it supports XQL functions/methods... # sub can_oper { my ($node, $expr) = @_; return [] if isEmptyList ($node); #?? can this happen? $expr = prepareRvalue ($expr->solve ([$node])); return [] if isEmptyList ($expr); #?? can this happen? $expr = $expr->xql_toString; ref ($node) and $node->can ($expr); } sub once { my ($context, $list, $expr) = @_; $expr->solve ($context, $list); } sub xql_eval { my ($context, $list, $query, $type) = @_; # return [] if @$list == 0; $query = toList ($query->solve ($context, $list)); return [] unless @$query; if (defined $type) { $type = prepareRvalue ($type->solve ($context, $list)); $type = isEmptyList ($type) ? "Text" : $type->xql_toString; # Expand "number" to "XML::XQL::Number" etc. $type = expandType ($type); } else { $type = "XML::XQL::Text"; } my @result = (); for my $val (@$query) { $val = $val->xql_toString; $val = eval $val; #print "eval result=$val\n"; #?? check result? push @result, eval "new $type (\$val)" if defined $val; } \@result; } sub subst { my ($context, $list, $query, $expr, $repl, $mod, $mode) = @_; #?? not sure? return [] if @$list == 0; $expr = prepareRvalue ($expr->solve ($context, $list)); return [] if isEmptyList ($expr); $expr = $expr->xql_toString; $repl = prepareRvalue ($repl->solve ($context, $list)); return [] if isEmptyList ($repl); $repl = $repl->xql_toString; if (defined $mod) { $mod = prepareRvalue ($mod->solve ($context, $list)); $mod = isEmptyList ($mod) ? "" : $mod->xql_toString; } if (defined $mode) { $mode = prepareRvalue ($mode->solve ($context, $list)); $mode = isEmptyList ($mode) ? 0 : $mode->xql_toString; } else { $mode = 0; # default mode: use textBlocks for Elements } my @result = (); my $nodes = toList ($query->solve ($context, $list)); for my $node (@$nodes) { if ($mode == 0 && $node->xql_nodeType == 1) # 1: Element node { # For Element nodes, replace text in consecutive text blocks # Note that xql_rawtextBlocks, returns the blocks in reverse order, # so that the indices of nodes within previous blocks don't need # to be adjusted when a replacement occurs. my $block_matched = 0; BLOCK: for my $block ($node->xql_rawTextBlocks) { my $str = $block->[2]; my $result = eval "\$str =~ s/\$expr/\$repl/$mod"; croak "bad subst expression s/$expr/$repl/$mod: $@" if ($@); next BLOCK unless $result; $block_matched++; $node->xql_replaceBlockWithText ($block->[0], $block->[1], $str); } # Return the input parameter only if a substitution occurred push @result, $node if $block_matched; } else { my $str = $node->xql_toString; next unless defined $str; my $result = eval "\$str =~ s/\$expr/\$repl/$mod"; croak "bad subst expression s/$expr/$repl/$mod: $@" if ($@); next unless $result; #print "result=$result for str[$str] =~ s/$expr/$repl/$mod\n"; # Return the input parameter only if a substitution occurred $node->xql_setValue ($str); push @result, $node; } # xql_setValue will actually change the value of the node for an Attr, # Text, CDataSection, EntityRef or Element } \@result; } #?? redo match - what should it return? sub match { my ($context, $list, $query, $repl, $mod) = @_; return [] if @$list == 0; $query = prepareRvalue ($query->solve ($context, $list)); return [] if isEmptyList ($query); $query = $query->xql_toString; if (defined $mod) { $mod = prepareRvalue ($mod->solve ($context, $list)); $mod = isEmptyList ($mod) ? "" : $mod->xql_toString; } my $str = $list->[0]->xql_toString; return [] unless defined $str; my (@matches) = (); eval "\@matches = (\$str =~ /\$query/$mod)"; croak "bad match expression m/$query/$mod" if ($@); #?? or should I map undef to XML::XQL::Text("") ? @matches = map { defined($_) ? new XML::XQL::Text ($_) : [] } @matches; \@matches; } sub xql_map { my ($context, $list, $query, $code) = @_; #?? not sure? return [] if @$list == 0; $code = prepareRvalue ($code->solve ($context, $list)); return [] if isEmptyList ($code); $code = $code->xql_toString; my @result = (); my $nodes = toList ($query->solve ($context, $list)); for my $node (@$nodes) { my $str = $node->xql_toString; next unless defined $str; my (@mapresult) = ($str); #?? NOTE: the $code should eval "\@mapresult = map { $code } (\$str)"; croak "bad map expression '$code' ($@)" if ($@); # Return the input parameter only if a change occurred next unless $mapresult[0] eq $str; # xql_setValue will actually change the value of the node for an Attr, # Text, CDataSection, EntityRef or Element $node->xql_setValue ($str); push @result, $node; } \@result; } sub xql_new { my ($type, @arg) = @_; # Expand "number" to "XML::XQL::Number" etc. $type = expandType ($type); my $obj = eval "new $type (\@arg)"; $@ ? [] : $obj; # return empty list on exception } my $DOM_PARSER; # used by xql_document (below) sub setDocParser { $DOM_PARSER = shift; } sub xql_document { my ($docname) = @_; my $parser = $DOM_PARSER ||= new XML::DOM::Parser; my $doc; eval { $doc = $parser->parsefile ($docname); }; if ($@) { warn "xql_document: could not read XML file [$docname]: $@"; } return defined $doc ? $doc : []; } #----------- XQL+ methods -------------------------------------------- sub DOM_nodeType { my ($context, $list) = @_; return [] if @$list == 0; new XML::XQL::Number ($list->[0]->xql_DOM_nodeType, $list->[0]); } #----------- Perl Builtin Functions ---------------------------------- # Note that certain functions (like mkdir) are not considered "constant" # because we don't want their invocation values cached. (We want the # function to be called every time the Invocation is solved/evaluated.) my %PerlFunc = ( # Format: # "funcName", => [ARGCOUNT, RETURN_TYPE [, CONSTANT = 0, [QUERY_ARG = 0]]] #-------- Arithmetic Functions "abs" => [1, "Number", 1], "atan2" => [2, "Number", 1, -1], "cos" => [1, "Number", 1], "exp" => [1, "Number", 1], "int" => [1, "Number", 1], "log" => [1, "Number", 1], "rand" => [[0, 1], "Number", 0, -1], "sin" => [1, "Number", 1], "sqrt" => [1, "Number", 1], "srand" => [[0, 1], "Number", 0, -1], "time" => [0, "Number", 0, -1], #-------- Conversion Functions "chr" => [1, "Text", 1], # "gmtime" => [1, "List of Number", 1], "hex" => [1, "Number", 1], # "localtime" => [1, "List of Number", 1], "oct" => [1, "Number", 1], "ord" => [1, "Text", 1], "vec" => [3, "Number", 1], "pack" => [[1, -1], "Text", 1, -1], #?? how should this work?? # "unpack" => [2, "List of ?", 1], #-------- String Functions "chomp" => [1, "Text", 1], "chop" => [1, "Text", 1], "crypt" => [2, "Text", 1], "lindex" => [[2, 3], "Number", 1], # "index" is already taken by XQL "length" => [1, "Number", 1], "lc" => [1, "Text", 1], "lcfirst" => [1, "Text", 1], "quotemeta" => [1, "Text", 1], "rindex" => [[2, 3], "Number", 1], "substr" => [[2, 3], "Text", 1], "uc" => [1, "Text", 1], "ucfirst" => [1, "Text", 1], "reverse" => [1, "Text", 1], "sprintf" => [[1, -1], "Text", 1, -1], #-------- Array Functions "join" => [[1, -1], "Text", 1], # "split" => [[2, 3], "List of Text", 1], #-------- File Functions "chmod" => [2, "Boolean", 0, 1], "chown" => [3, "Boolean", 0, 2], "link" => [2, "Number", 0, -1], #?? no return value # "lstat" => [1, "List of Number"], "mkdir" => [2, "Boolean"], #?? or is 1 arg also allowed? "readlink" => [1, "Text"], "rename" => [2, "Boolean", 0, -1], "rmdir" => [1, "Boolean"], # "stat" => [1, "List of Number"], "symlink" => [2, "Boolean", 0, -1], "unlink" => [1, "Boolean"], "utime" => [3, "Boolean", 0, 2], "truncate" => [2, "Number"], #?? no return value #-------- System Interaction "exit" => [[0, 1], "Number"], # "glob" => [1, "List of Text"], "system" => [[1, -1], "Number", 0, -1], # "times" => [0, "List of Number"], #-------- Miscellaneous "defined" => [1, "Boolean"], # is this useful?? "dump" => [[0, 1], "Number", 0, -1], "ref" => [1, "Text"], ); #?? die, warn, croak (etc.), #?? file test (-X), tr// (same as y//) #?? array functions, sort # Generate wrapper for Perl builtin function on the fly sub generatePerlWrapper { my ($name) = @_; my $args = $PerlFunc{$name}; return undef unless defined $args; # not found my ($argCount, $returnType, $const, $queryArg) = @$args; my $funcName = $name; if ($name eq "lindex") # "index" is already taken { $funcName = "index"; } generateFunction ($name, $funcName, $returnType, $argCount, 0, $const, $queryArg); $Func{$name}; } #?? Inline functions, do they make sense? E.g. 'elem!sub("code", "arg1")' #?? Normally, user should use defineFunction, but if most of them have #?? a lot of common code, I could provide the pre- and post-code. #?? After processing the user-supplied code block, how should I convert the #?? user's result back to an Invocation result. E.g. do I get a single value #?? or a list back? defineFunction ("eval", \&XML::XQL::xql_eval, [1, 2]); defineFunction ("subst", \&XML::XQL::subst, [3, 5], 1); defineFunction ("s", \&XML::XQL::subst, [3, 5], 1); defineFunction ("match", \&XML::XQL::match, [1, 2]); defineFunction ("m", \&XML::XQL::match, [1, 2]); defineFunction ("map", \&XML::XQL::xql_map, 2, 1); defineFunction ("once", \&XML::XQL::once, 1, 1); defineMethod ("DOM_nodeType", \&XML::XQL::DOM_nodeType, 0, 0); generateFunction ("new", "XML::XQL::xql_new", "*", [1, -1], 1, 0, 1); generateFunction ("document", "XML::XQL::xql_document", "*", 1, 1, 0, 0); # doc() is an alias for document() defineFunction ("doc", \&XML::XQL::xql_wrap_document, 1, 1); #------------------------------------------------------------------------------ # The following functions were found in the XPath spec. # Found in XPath but not (yet) implemented in XML::XQL: # - type casting (string, number, boolean) - Not sure if needed... # Note that string() converts booleans to 'true' and 'false', but our # internal type casting converts it to perl values '0' and '1'... # - math (+,-,*,mod,div) - Use eval() for now # - last(), position() - Similar to end() and index() except they're 1-based # - local-name(node-set?), namespace-uri(node-set?) # - name(node-set?) - Can we pass a node-set in XQL? # - lang(string) sub xpath_concat { join ("", @_) } sub xpath_starts_with { $_[0] =~ /^\Q$_[1]\E/ } # ends-with is not part of XPath sub xpath_ends_with { $_[0] =~ /\Q$_[1]\E$/ } sub xpath_contains { $_[0] =~ /\Q$_[1]\E/ } # The following methods don't know about NaN, +/-Infinity or -0. sub xpath_floor { use POSIX; POSIX::floor ($_[0]) } sub xpath_ceiling { use POSIX; POSIX::ceil ($_[0]) } sub xpath_round { use POSIX; POSIX::floor ($_[0] + 0.5) } # Note that the start-index is 1-based in XPath sub xpath_substring { defined $_[2] ? substr ($_[0], $_[1] - 1, $_[2]) : substr ($_[0], $_[1] - 1) } sub xpath_substring_before { my $i = index ($_[0], $_[1]); $i == -1 ? undef : substr ($_[0], 0, $i) } sub xpath_substring_after { my $i = index ($_[0], $_[1]); $i == -1 ? undef : substr ($_[0], $i + length($_[1])) } # Note that d,c,s are tr/// modifiers. Also can't use open delimiters i.e. {[(< my @TR_DELIMITERS = split //, "/!%^&*)-_=+|~]}'\";:,.>/?abefghijklmnopqrtuvwxyz"; sub xpath_translate { my ($str, $from, $to) = @_; my $delim; for my $d (@TR_DELIMITERS) { if (index ($from, $d) == -1 && index ($to, $d) == -1) { $delim = $d; last; } } die "(xpath_)translate: can't find suitable 'tr' delimiter" unless defined $delim; # XPath defines that if length($from) > length($to), characters in $from # for which there is no match in $to, should be deleted. # (So we must use the 's' modifier.) eval "\$str =~ tr$delim$from$delim$to${delim}d"; $str; } sub xpath_string_length { my ($context, $list, $text) = @_; if (defined $text) { $text = XML::XQL::prepareRvalue ($text->solve ($context, $list)); return [] unless defined $text; return new XML::XQL::Number (length $text->xql_toString, $text->xql_sourceNode); } else { return [] if @$list == 0; my @result; for my $node (@$list) { push @result, new XML::XQL::Number (length $node->xql_toString, $node); } return \@result; } } sub _normalize { $_[0] =~ s/\s+/ /g; $_[0] =~ s/^\s+//; $_[0] =~ s/\s+$//; $_[0]; } sub xpath_normalize_space { my ($context, $list, $text) = @_; return [] if @$list == 0; if (defined $text) { $text = XML::XQL::prepareRvalue ($text->solve ($context, $list)); return [] unless defined $text; return new XML::XQL::Text (_normalize ($text->xql_toString), $text->xql_sourceNode); } else { my @result; for my $node (@$list) { push @result, new XML::XQL::Text (_normalize ($node->xql_toString), $node); } return \@result; } } sub xpath_sum { my ($context, $list, $expr) = @_; return [] if @$list == 0; #?? or return Number(0) ? my $sum = 0; $expr = XML::XQL::toList ($expr->solve ($context, $list)); for my $r (@{ $expr }) { $sum += $r->xql_toString; } return new XML::XQL::Number ($sum, undef); } generateFunction ("round", "XML::XQL::xpath_round", "Number", 1, 1); generateFunction ("floor", "XML::XQL::xpath_floor", "Number", 1, 1); generateFunction ("ceiling", "XML::XQL::xpath_ceiling", "Number", 1, 1); generateFunction ("concat", "XML::XQL::xpath_concat", "Text", [2, -1], 1); generateFunction ("starts-with", "XML::XQL::xpath_starts_with", "Boolean", 2, 1); generateFunction ("ends-with", "XML::XQL::xpath_ends_with", "Boolean", 2, 1); generateFunction ("contains", "XML::XQL::xpath_contains", "Boolean", 2, 1); generateFunction ("substring-before", "XML::XQL::xpath_substring_before", "Text", 2, 1); generateFunction ("substring-after", "XML::XQL::xpath_substring_after", "Text", 2, 1); # Same as Perl substr() except index is 1-based generateFunction ("substring", "XML::XQL::xpath_substring", "Text", [2, 3], 1); generateFunction ("translate", "XML::XQL::xpath_translate", "Text", 3, 1); defineMethod ("string-length", \&XML::XQL::xpath_string_length, [0, 1], 1); defineMethod ("normalize-space", \&XML::XQL::xpath_normalize_space, [0, 1], 1); defineFunction ("sum", \&XML::XQL::xpath_sum, 1, 1); 1; # module return code