Commit | Line | Data |
---|---|---|
8b5a642f CD |
1 | ############################################################################## |
2 | # $URL$ | |
3 | # $Date$ | |
4 | # $Author$ | |
5 | # $Revision$ | |
6 | ############################################################################## | |
7 | ||
8 | package Perl::Critic::Utils::PPIRegexp; | |
9 | ||
10 | use strict; | |
11 | use warnings; | |
29e94807 | 12 | |
90c0067c | 13 | use English qw(-no_match_vars); |
29e94807 | 14 | use Readonly; |
ec024463 | 15 | use Carp qw(croak); |
8b5a642f | 16 | |
29e94807 ES |
17 | use PPI::Node; |
18 | ||
8b5a642f CD |
19 | use base 'Exporter'; |
20 | ||
add38467 | 21 | our $VERSION = '1.082'; |
8b5a642f CD |
22 | |
23 | #----------------------------------------------------------------------------- | |
24 | ||
25 | our @EXPORT_OK = qw( | |
ece80297 ES |
26 | parse_regexp |
27 | get_match_string | |
28 | get_substitute_string | |
29 | get_modifiers | |
30 | get_delimiters | |
31 | ppiify | |
8b5a642f CD |
32 | ); |
33 | ||
34 | our %EXPORT_TAGS = ( | |
35 | all => \@EXPORT_OK, | |
36 | ); | |
37 | ||
38 | #----------------------------------------------------------------------------- | |
39 | ||
90c0067c CD |
40 | sub parse_regexp { |
41 | my ($elem) = @_; | |
42 | ||
43 | eval { require Regexp::Parser; }; | |
44 | return if $EVAL_ERROR; | |
45 | ||
46 | my $re = get_match_string($elem); | |
47 | return if !defined $re; | |
48 | ||
49 | # Are there any external regexp modifiers? If so, embed the ones | |
50 | # that matter before parsing. | |
51 | my %modifiers = get_modifiers($elem); | |
52 | my $mods = join q{}, map {$modifiers{$_} ? $_ : q{}} qw(i m x s); | |
53 | if ($mods) { | |
54 | $re = "(?$mods:$re)"; | |
55 | } | |
56 | ||
57 | my $parser = Regexp::Parser->new; | |
58 | # If we can't parse the regexp, don't return a parse tree | |
59 | { | |
60 | local $SIG{__WARN__} = sub {}; # blissful silence... | |
61 | return if ! $parser->regex($re); | |
62 | } | |
63 | ||
64 | return $parser; | |
65 | } | |
66 | ||
67 | #----------------------------------------------------------------------------- | |
68 | ||
8b5a642f CD |
69 | sub get_match_string { |
70 | my ($elem) = @_; | |
71 | return if !$elem->{sections}; | |
72 | my $section = $elem->{sections}->[0]; | |
73 | return if !$section; | |
74 | return substr $elem->content, $section->{position}, $section->{size}; | |
75 | } | |
76 | ||
77 | #----------------------------------------------------------------------------- | |
78 | ||
79 | sub get_substitute_string { | |
80 | my ($elem) = @_; | |
81 | return if !$elem->{sections}; | |
82 | my $section = $elem->{sections}->[1]; | |
83 | return if !$section; | |
84 | return substr $elem->content, $section->{position}, $section->{size}; | |
85 | } | |
86 | ||
87 | #----------------------------------------------------------------------------- | |
88 | ||
89 | sub get_modifiers { | |
90 | my ($elem) = @_; | |
91 | return if !$elem->{modifiers}; | |
92 | return %{ $elem->{modifiers} }; | |
93 | } | |
94 | ||
6a4d1045 CD |
95 | #----------------------------------------------------------------------------- |
96 | ||
97 | sub get_delimiters { | |
98 | my ($elem) = @_; | |
99 | return if !$elem->{sections}; | |
2fe94ff8 CD |
100 | my @delimiters; |
101 | if (!$elem->{sections}->[0]->{type}) { | |
102 | # PPI v1.118 workaround: the delimiters were not recorded in some cases | |
103 | # hack: pull them out ourselves | |
104 | # limitation: this regexp fails on s{foo}<bar> | |
105 | my $operator = defined $elem->{operator} ? $elem->{operator} : q{}; | |
106 | @delimiters = join q{}, $elem =~ m/\A $operator (.).*?(.) (?:[xmsocgie]*) \z/mx; | |
107 | } else { | |
108 | @delimiters = ($elem->{sections}->[0]->{type}); | |
109 | if ($elem->{sections}->[1]) { | |
110 | push @delimiters, $elem->{sections}->[1]->{type} || $delimiters[0]; | |
111 | } | |
6a4d1045 CD |
112 | } |
113 | return @delimiters; | |
114 | } | |
115 | ||
ec024463 CD |
116 | #----------------------------------------------------------------------------- |
117 | ||
118 | { | |
119 | ## This nastiness is to auto-vivify PPI packages from Regexp::Parser classes | |
120 | ||
121 | # Track which ones are already created | |
122 | my %seen = ('Regexp::Parser::__object__' => 1); | |
123 | ||
124 | sub _get_ppi_package { | |
125 | my ($src_class, $re_node) = @_; | |
2fe94ff8 | 126 | (my $dest_class = $src_class) =~ s/\A Regexp::Parser::/Perl::Critic::PPIRegexp::/mx; |
ec024463 CD |
127 | if (!$seen{$src_class}) { |
128 | $seen{$src_class} = 1; | |
129 | croak 'Regexp node which is not in the Regexp::Parser namespace' | |
130 | if $dest_class eq $src_class; | |
131 | my $src_isa_name = $src_class . '::ISA'; | |
132 | my $dest_isa_name = $dest_class . '::ISA'; | |
133 | my @isa; | |
134 | for my $isa (eval "\@$src_isa_name") { ##no critic(Eval) | |
135 | my $dest_isa = _get_ppi_package($isa, $re_node); | |
136 | push @isa, $dest_isa; | |
137 | } | |
138 | eval "\@$dest_isa_name = qw(@isa)"; ##no critic(Eval) | |
139 | croak $EVAL_ERROR if $EVAL_ERROR; | |
140 | } | |
141 | return $dest_class; | |
142 | } | |
143 | } | |
144 | ||
29e94807 ES |
145 | Readonly::Scalar my $NO_DEPTH_USED => -1; |
146 | ||
ec024463 CD |
147 | sub ppiify { |
148 | my ($re) = @_; | |
149 | return if !$re; | |
150 | ||
151 | # walk the Regexp::Parser tree, converting to PPI nodes as we go | |
152 | ||
153 | my $ppire = PPI::Node->new; | |
154 | my @stack = ($ppire); | |
155 | my $iter = $re->walker; | |
29e94807 | 156 | my $last_depth = $NO_DEPTH_USED; |
ec024463 CD |
157 | while (my ($node, $depth) = $iter->()) { |
158 | if ($last_depth > $depth) { # -> parent | |
159 | # walker() creates pseudo-closing nodes for reasons I don't understand | |
160 | while ($last_depth-- > $depth) { | |
161 | pop @stack; | |
162 | } | |
163 | } else { | |
164 | my $src_class = ref $node; | |
165 | my $ppipkg = _get_ppi_package($src_class, $node); | |
166 | my $ppinode = $ppipkg->new($node); | |
167 | if ($last_depth == $depth) { # -> sibling | |
168 | $stack[-1] = $ppinode; | |
169 | } else { # -> child | |
170 | push @stack, $ppinode; | |
171 | } | |
172 | $stack[-2]->add_element($ppinode); | |
173 | } | |
174 | $last_depth = $depth; | |
175 | } | |
176 | return $ppire; | |
177 | } | |
178 | ||
179 | { | |
180 | package ##no critic (Package) # hide from PAUSE | |
181 | Perl::Critic::PPIRegexp::__object__; | |
182 | use base 'PPI::Node'; | |
183 | ||
184 | # Base wrapper class for PPI versions of Regexp::Parser classes | |
185 | ||
186 | # This is a hack because we call everything PPI::Node instances instead of | |
187 | # PPI::Token instances. One downside is that PPI::Dumper doesn't work on | |
188 | # regexps. | |
189 | ||
190 | sub new { | |
191 | my ($class, $re_node) = @_; | |
192 | my $self = $class->SUPER::new(); | |
193 | $self->{_re} = $re_node; | |
194 | return $self; | |
195 | } | |
196 | sub content { | |
197 | my ($self) = @_; | |
198 | return $self->{_re}->visual; | |
199 | } | |
cf330359 CD |
200 | sub re { |
201 | my ($self) = @_; | |
202 | return $self->{_re}; | |
203 | } | |
ec024463 CD |
204 | } |
205 | ||
8b5a642f CD |
206 | 1; |
207 | ||
208 | __END__ | |
209 | ||
210 | #----------------------------------------------------------------------------- | |
211 | ||
212 | =pod | |
213 | ||
214 | =for stopwords | |
215 | ||
216 | =head1 NAME | |
217 | ||
218 | Perl::Critic::Utils::PPIRegexp - Utility functions for dealing with PPI regexp tokens. | |
219 | ||
220 | =head1 SYNOPSIS | |
221 | ||
222 | use Perl::Critic::Utils::PPIRegexp qw(:all); | |
223 | use PPI::Document; | |
224 | my $doc = PPI::Document->new(\'m/foo/'); | |
225 | my $elem = $doc->find('PPI::Token::Regexp::Match')->[0]; | |
226 | print get_match_string($elem); # yields 'foo' | |
227 | ||
228 | =head1 DESCRIPTION | |
229 | ||
230 | As of PPI v1.1xx, the PPI regexp token classes | |
231 | (L<PPI::Token::Regexp::Match>, L<PPI::Token::Regexp::Substitute> and | |
232 | L<PPI::Token::QuoteLike::Regexp>) has a very weak interface, so it is | |
233 | necessary to dig into internals to learn anything useful. This | |
234 | package contains subroutines to encapsulate that excess intimacy. If | |
235 | future versions of PPI gain better accessors, this package will start | |
236 | using those. | |
237 | ||
238 | =head1 IMPORTABLE SUBS | |
239 | ||
240 | =over | |
241 | ||
90c0067c CD |
242 | =item C<parse_regexp( $token )> |
243 | ||
244 | Parse the regexp token with L<Regexp::Parser>. If that module is not | |
245 | available or if there is a parse error, returns undef. If a parse success, | |
246 | returns a Regexp::Parser instance that can be used to walk the regexp object | |
247 | model. | |
248 | ||
249 | CAVEAT: This method pays special attention to the C<x> modifier to the regexp. | |
250 | If present, we wrap the regexp string in C<(?x:...)> to ensure a proper parse. | |
251 | This does change the object model though. | |
252 | ||
ec024463 CD |
253 | Someday if PPI gets native Regexp support, this method may become deprecated. |
254 | ||
255 | =item C<ppiify( $regexp )> | |
256 | ||
257 | Given a L<Regexp::Parser> instance (perhaps as returned from C<parse_regexp>) | |
258 | convert it to a tree of L<PPI::Node> instances. This is useful because PPI | |
259 | has a more familiar and powerful programming model than the Regexp::Parser | |
260 | object tree. | |
261 | ||
262 | Someday if PPI gets native Regexp support, this method may become a no-op. | |
263 | ||
8b5a642f CD |
264 | =item C<get_match_string( $token )> |
265 | ||
266 | Returns the match portion of the regexp or undef if the specified | |
267 | token is not a regexp. Examples: | |
268 | ||
269 | m/foo/; # yields 'foo' | |
270 | s/foo/bar/; # yields 'foo' | |
271 | / \A a \z /xms; # yields ' \\A a \\z ' | |
272 | qr{baz}; # yields 'baz' | |
273 | ||
274 | =item C<get_substitute_string( $token )> | |
275 | ||
276 | Returns the substitution portion of a search-and-replace regexp or | |
277 | undef if the specified token is not a valid regexp. Examples: | |
278 | ||
279 | m/foo/; # yields undef | |
280 | s/foo/bar/; # yields 'bar' | |
281 | ||
282 | =item C<get_modifiers( $token )> | |
283 | ||
284 | Returns a hash containing booleans for the modifiers of the regexp, or | |
285 | undef if the token is not a regexp. | |
286 | ||
287 | /foo/xms; # yields (m => 1, s => 1, x => 1) | |
288 | s/foo//; # yields () | |
289 | qr/foo/i; # yields (i => 1) | |
290 | ||
6a4d1045 CD |
291 | =item C<get_delimiters( $token )> |
292 | ||
293 | Returns one (or two for a substitution regexp) two-character strings | |
294 | indicating the delimiters of the regexp, or an empty list if the token is not | |
295 | a regular expression token. For example: | |
296 | ||
297 | m/foo/; # yields ('//') | |
298 | m#foo#; # yields ('##') | |
299 | m<foo>; # yields ('<>') | |
300 | s/foo/bar/; # yields ('//', '//') | |
301 | s{foo}{bar}; # yields ('{}', '{}') | |
302 | s{foo}/bar/; # yields ('{}', '//') valid, but yuck! | |
303 | qr/foo/; # yields ('//') | |
304 | ||
8b5a642f CD |
305 | =back |
306 | ||
307 | =head1 AUTHOR | |
308 | ||
309 | Chris Dolan <cdolan@cpan.org> | |
310 | ||
311 | =head1 COPYRIGHT | |
312 | ||
20dfddeb | 313 | Copyright (c) 2007-2008 Chris Dolan. Many rights reserved. |
8b5a642f CD |
314 | |
315 | This program is free software; you can redistribute it and/or modify | |
316 | it under the same terms as Perl itself. The full text of this license | |
317 | can be found in the LICENSE file included with this module. | |
318 | ||
319 | =cut | |
320 | ||
321 | # Local Variables: | |
322 | # mode: cperl | |
323 | # cperl-indent-level: 4 | |
324 | # fill-column: 78 | |
325 | # indent-tabs-mode: nil | |
326 | # c-indentation-style: bsd | |
327 | # End: | |
328 | # ex: set ts=8 sts=4 sw=4 tw=78 ft=perl expandtab : |