Login
Clean up some self-compliance issues now that MagicNumbers
[gknop/Perl-Critic.git] / lib / Perl / Critic / Utils / PPIRegexp.pm
CommitLineData
8b5a642f
CD
1##############################################################################
2# $URL$
3# $Date$
4# $Author$
5# $Revision$
6##############################################################################
7
8package Perl::Critic::Utils::PPIRegexp;
9
10use strict;
11use warnings;
29e94807 12
90c0067c 13use English qw(-no_match_vars);
29e94807 14use Readonly;
ec024463 15use Carp qw(croak);
8b5a642f 16
29e94807
ES
17use PPI::Node;
18
8b5a642f
CD
19use base 'Exporter';
20
add38467 21our $VERSION = '1.082';
8b5a642f
CD
22
23#-----------------------------------------------------------------------------
24
25our @EXPORT_OK = qw(
ece80297
ES
26 parse_regexp
27 get_match_string
28 get_substitute_string
29 get_modifiers
30 get_delimiters
31 ppiify
8b5a642f
CD
32);
33
34our %EXPORT_TAGS = (
35 all => \@EXPORT_OK,
36);
37
38#-----------------------------------------------------------------------------
39
90c0067c
CD
40sub parse_regexp {
41 my ($elem) = @_;
42
43 eval { require Regexp::Parser; };
44 return if $EVAL_ERROR;
45
46 my $re = get_match_string($elem);
47 return if !defined $re;
48
49 # Are there any external regexp modifiers? If so, embed the ones
50 # that matter before parsing.
51 my %modifiers = get_modifiers($elem);
52 my $mods = join q{}, map {$modifiers{$_} ? $_ : q{}} qw(i m x s);
53 if ($mods) {
54 $re = "(?$mods:$re)";
55 }
56
57 my $parser = Regexp::Parser->new;
58 # If we can't parse the regexp, don't return a parse tree
59 {
60 local $SIG{__WARN__} = sub {}; # blissful silence...
61 return if ! $parser->regex($re);
62 }
63
64 return $parser;
65}
66
67#-----------------------------------------------------------------------------
68
8b5a642f
CD
69sub get_match_string {
70 my ($elem) = @_;
71 return if !$elem->{sections};
72 my $section = $elem->{sections}->[0];
73 return if !$section;
74 return substr $elem->content, $section->{position}, $section->{size};
75}
76
77#-----------------------------------------------------------------------------
78
79sub get_substitute_string {
80 my ($elem) = @_;
81 return if !$elem->{sections};
82 my $section = $elem->{sections}->[1];
83 return if !$section;
84 return substr $elem->content, $section->{position}, $section->{size};
85}
86
87#-----------------------------------------------------------------------------
88
89sub get_modifiers {
90 my ($elem) = @_;
91 return if !$elem->{modifiers};
92 return %{ $elem->{modifiers} };
93}
94
6a4d1045
CD
95#-----------------------------------------------------------------------------
96
97sub get_delimiters {
98 my ($elem) = @_;
99 return if !$elem->{sections};
2fe94ff8
CD
100 my @delimiters;
101 if (!$elem->{sections}->[0]->{type}) {
102 # PPI v1.118 workaround: the delimiters were not recorded in some cases
103 # hack: pull them out ourselves
104 # limitation: this regexp fails on s{foo}<bar>
105 my $operator = defined $elem->{operator} ? $elem->{operator} : q{};
106 @delimiters = join q{}, $elem =~ m/\A $operator (.).*?(.) (?:[xmsocgie]*) \z/mx;
107 } else {
108 @delimiters = ($elem->{sections}->[0]->{type});
109 if ($elem->{sections}->[1]) {
110 push @delimiters, $elem->{sections}->[1]->{type} || $delimiters[0];
111 }
6a4d1045
CD
112 }
113 return @delimiters;
114}
115
ec024463
CD
116#-----------------------------------------------------------------------------
117
118{
119 ## This nastiness is to auto-vivify PPI packages from Regexp::Parser classes
120
121 # Track which ones are already created
122 my %seen = ('Regexp::Parser::__object__' => 1);
123
124 sub _get_ppi_package {
125 my ($src_class, $re_node) = @_;
2fe94ff8 126 (my $dest_class = $src_class) =~ s/\A Regexp::Parser::/Perl::Critic::PPIRegexp::/mx;
ec024463
CD
127 if (!$seen{$src_class}) {
128 $seen{$src_class} = 1;
129 croak 'Regexp node which is not in the Regexp::Parser namespace'
130 if $dest_class eq $src_class;
131 my $src_isa_name = $src_class . '::ISA';
132 my $dest_isa_name = $dest_class . '::ISA';
133 my @isa;
134 for my $isa (eval "\@$src_isa_name") { ##no critic(Eval)
135 my $dest_isa = _get_ppi_package($isa, $re_node);
136 push @isa, $dest_isa;
137 }
138 eval "\@$dest_isa_name = qw(@isa)"; ##no critic(Eval)
139 croak $EVAL_ERROR if $EVAL_ERROR;
140 }
141 return $dest_class;
142 }
143}
144
29e94807
ES
145Readonly::Scalar my $NO_DEPTH_USED => -1;
146
ec024463
CD
147sub ppiify {
148 my ($re) = @_;
149 return if !$re;
150
151 # walk the Regexp::Parser tree, converting to PPI nodes as we go
152
153 my $ppire = PPI::Node->new;
154 my @stack = ($ppire);
155 my $iter = $re->walker;
29e94807 156 my $last_depth = $NO_DEPTH_USED;
ec024463
CD
157 while (my ($node, $depth) = $iter->()) {
158 if ($last_depth > $depth) { # -> parent
159 # walker() creates pseudo-closing nodes for reasons I don't understand
160 while ($last_depth-- > $depth) {
161 pop @stack;
162 }
163 } else {
164 my $src_class = ref $node;
165 my $ppipkg = _get_ppi_package($src_class, $node);
166 my $ppinode = $ppipkg->new($node);
167 if ($last_depth == $depth) { # -> sibling
168 $stack[-1] = $ppinode;
169 } else { # -> child
170 push @stack, $ppinode;
171 }
172 $stack[-2]->add_element($ppinode);
173 }
174 $last_depth = $depth;
175 }
176 return $ppire;
177}
178
179{
180 package ##no critic (Package) # hide from PAUSE
181 Perl::Critic::PPIRegexp::__object__;
182 use base 'PPI::Node';
183
184 # Base wrapper class for PPI versions of Regexp::Parser classes
185
186 # This is a hack because we call everything PPI::Node instances instead of
187 # PPI::Token instances. One downside is that PPI::Dumper doesn't work on
188 # regexps.
189
190 sub new {
191 my ($class, $re_node) = @_;
192 my $self = $class->SUPER::new();
193 $self->{_re} = $re_node;
194 return $self;
195 }
196 sub content {
197 my ($self) = @_;
198 return $self->{_re}->visual;
199 }
cf330359
CD
200 sub re {
201 my ($self) = @_;
202 return $self->{_re};
203 }
ec024463
CD
204}
205
8b5a642f
CD
2061;
207
208__END__
209
210#-----------------------------------------------------------------------------
211
212=pod
213
214=for stopwords
215
216=head1 NAME
217
218Perl::Critic::Utils::PPIRegexp - Utility functions for dealing with PPI regexp tokens.
219
220=head1 SYNOPSIS
221
222 use Perl::Critic::Utils::PPIRegexp qw(:all);
223 use PPI::Document;
224 my $doc = PPI::Document->new(\'m/foo/');
225 my $elem = $doc->find('PPI::Token::Regexp::Match')->[0];
226 print get_match_string($elem); # yields 'foo'
227
228=head1 DESCRIPTION
229
230As of PPI v1.1xx, the PPI regexp token classes
231(L<PPI::Token::Regexp::Match>, L<PPI::Token::Regexp::Substitute> and
232L<PPI::Token::QuoteLike::Regexp>) has a very weak interface, so it is
233necessary to dig into internals to learn anything useful. This
234package contains subroutines to encapsulate that excess intimacy. If
235future versions of PPI gain better accessors, this package will start
236using those.
237
238=head1 IMPORTABLE SUBS
239
240=over
241
90c0067c
CD
242=item C<parse_regexp( $token )>
243
244Parse the regexp token with L<Regexp::Parser>. If that module is not
245available or if there is a parse error, returns undef. If a parse success,
246returns a Regexp::Parser instance that can be used to walk the regexp object
247model.
248
249CAVEAT: This method pays special attention to the C<x> modifier to the regexp.
250If present, we wrap the regexp string in C<(?x:...)> to ensure a proper parse.
251This does change the object model though.
252
ec024463
CD
253Someday if PPI gets native Regexp support, this method may become deprecated.
254
255=item C<ppiify( $regexp )>
256
257Given a L<Regexp::Parser> instance (perhaps as returned from C<parse_regexp>)
258convert it to a tree of L<PPI::Node> instances. This is useful because PPI
259has a more familiar and powerful programming model than the Regexp::Parser
260object tree.
261
262Someday if PPI gets native Regexp support, this method may become a no-op.
263
8b5a642f
CD
264=item C<get_match_string( $token )>
265
266Returns the match portion of the regexp or undef if the specified
267token is not a regexp. Examples:
268
269 m/foo/; # yields 'foo'
270 s/foo/bar/; # yields 'foo'
271 / \A a \z /xms; # yields ' \\A a \\z '
272 qr{baz}; # yields 'baz'
273
274=item C<get_substitute_string( $token )>
275
276Returns the substitution portion of a search-and-replace regexp or
277undef if the specified token is not a valid regexp. Examples:
278
279 m/foo/; # yields undef
280 s/foo/bar/; # yields 'bar'
281
282=item C<get_modifiers( $token )>
283
284Returns a hash containing booleans for the modifiers of the regexp, or
285undef if the token is not a regexp.
286
287 /foo/xms; # yields (m => 1, s => 1, x => 1)
288 s/foo//; # yields ()
289 qr/foo/i; # yields (i => 1)
290
6a4d1045
CD
291=item C<get_delimiters( $token )>
292
293Returns one (or two for a substitution regexp) two-character strings
294indicating the delimiters of the regexp, or an empty list if the token is not
295a regular expression token. For example:
296
297 m/foo/; # yields ('//')
298 m#foo#; # yields ('##')
299 m<foo>; # yields ('<>')
300 s/foo/bar/; # yields ('//', '//')
301 s{foo}{bar}; # yields ('{}', '{}')
302 s{foo}/bar/; # yields ('{}', '//') valid, but yuck!
303 qr/foo/; # yields ('//')
304
8b5a642f
CD
305=back
306
307=head1 AUTHOR
308
309Chris Dolan <cdolan@cpan.org>
310
311=head1 COPYRIGHT
312
20dfddeb 313Copyright (c) 2007-2008 Chris Dolan. Many rights reserved.
8b5a642f
CD
314
315This program is free software; you can redistribute it and/or modify
316it under the same terms as Perl itself. The full text of this license
317can be found in the LICENSE file included with this module.
318
319=cut
320
321# Local Variables:
322# mode: cperl
323# cperl-indent-level: 4
324# fill-column: 78
325# indent-tabs-mode: nil
326# c-indentation-style: bsd
327# End:
328# ex: set ts=8 sts=4 sw=4 tw=78 ft=perl expandtab :