Lines 1-91
Link Here
|
1 |
# $Id: OrigTree.pm,v 1.1 2003/11/26 15:18:28 kenneth Exp $ |
|
|
2 |
|
3 |
package XML::Parser::Style::OrigTree; |
4 |
$XML::Parser::Built_In_Styles{OrigTree} = 1; |
5 |
|
6 |
sub Init { |
7 |
my $expat = shift; |
8 |
$expat->{Lists} = []; |
9 |
$expat->{Curlist} = $expat->{OrigTree} = []; |
10 |
} |
11 |
|
12 |
sub Start { |
13 |
my $expat = shift; |
14 |
my $tag = shift; |
15 |
my $newlist = [ { @_ } ]; |
16 |
push @{ $expat->{Lists} }, $expat->{Curlist}; |
17 |
push @{ $expat->{Curlist} }, $tag => $newlist; |
18 |
$expat->{Curlist} = $newlist; |
19 |
} |
20 |
|
21 |
sub End { |
22 |
my $expat = shift; |
23 |
my $tag = shift; |
24 |
$expat->{Curlist} = pop @{ $expat->{Lists} }; |
25 |
} |
26 |
|
27 |
sub Char { |
28 |
my $expat = shift; |
29 |
my $text = shift; |
30 |
my $clist = $expat->{Curlist}; |
31 |
my $pos = $#$clist; |
32 |
|
33 |
if ($pos > 0 and $clist->[$pos - 1] eq '0') { |
34 |
$clist->[$pos] .= $expat->original_string(); |
35 |
} else { |
36 |
push @$clist, 0 => $expat->original_string(); |
37 |
} |
38 |
} |
39 |
|
40 |
sub Final { |
41 |
my $expat = shift; |
42 |
delete $expat->{Curlist}; |
43 |
delete $expat->{Lists}; |
44 |
$expat->{OrigTree}; |
45 |
} |
46 |
|
47 |
1; |
48 |
__END__ |
49 |
|
50 |
=head1 NAME |
51 |
|
52 |
XML::Parser::Style::OrigTree |
53 |
|
54 |
=head1 SYNOPSIS |
55 |
|
56 |
use XML::Parser; |
57 |
my $p = XML::Parser->new(Style => 'OrigTree'); |
58 |
my $tree = $p->parsefile('foo.xml'); |
59 |
|
60 |
=head1 DESCRIPTION |
61 |
|
62 |
This module is a variant of the XML::Parser's Tree style parser. It |
63 |
uses original_string, so that Entities are not converted. |
64 |
|
65 |
When parsing a document, C<parse()> will return a parse tree for the |
66 |
document. Each node in the tree |
67 |
takes the form of a tag, content pair. Text nodes are represented with |
68 |
a pseudo-tag of "0" and the string that is their content. For elements, |
69 |
the content is an array reference. The first item in the array is a |
70 |
(possibly empty) hash reference containing attributes. The remainder of |
71 |
the array is a sequence of tag-content pairs representing the content |
72 |
of the element. |
73 |
|
74 |
So for example the result of parsing: |
75 |
|
76 |
<foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo> |
77 |
|
78 |
would be: |
79 |
Tag Content |
80 |
================================================================== |
81 |
[foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], |
82 |
bar, [ {}, 0, "Howdy", ref, [{}]], |
83 |
0, "do" |
84 |
] |
85 |
] |
86 |
|
87 |
The root document "foo", has 3 children: a "head" element, a "bar" |
88 |
element and the text "do". After the empty attribute hash, these are |
89 |
represented in it's contents by 3 tag-content pairs. |
90 |
|
91 |
=cut |