Skip to content

Commit c4a0b1e

Browse files
committed
ParseXS: refactor: move Sig::parse() into Node.pm
The previous two commits have moved the signature parsing code into its own sub and made it a method of Extutils::ParseXS::Node::Sig. This commit moves the body of the method into Node.pm. There are no changes to the method's code apart from no longer needing to fully qualify the method name in the declaration. This commit also moves the our ($C_group_rex, $C_arg) var definitions from ParseXS.pm to Node.pm, as they're only used to hold regexes used to split the parameters in the signature. They've also been changed from 'our' to 'my'.
1 parent d14e0cb commit c4a0b1e

File tree

2 files changed

+266
-265
lines changed

2 files changed

+266
-265
lines changed

dist/ExtUtils-ParseXS/lib/ExtUtils/ParseXS.pm

Lines changed: 0 additions & 265 deletions
Original file line numberDiff line numberDiff line change
@@ -107,40 +107,6 @@ our $AUTHOR_WARNINGS;
107107
$AUTHOR_WARNINGS = ($ENV{AUTHOR_WARNINGS} || 0)
108108
unless defined $AUTHOR_WARNINGS;
109109

110-
our ($C_group_rex, $C_arg);
111-
112-
# Group in C (no support for comments or literals)
113-
#
114-
# DAPM 2024: I'm not entirely clear what this is supposed to match.
115-
# It appears to match balanced and possibly nested [], {} etc, with
116-
# similar but possibly unbalanced punctuation within. But the balancing
117-
# brackets don't have to correspond: so [} is just as valid as [] or {},
118-
# as is [{{{{] or even [}}}}}
119-
120-
$C_group_rex = qr/ [({\[]
121-
(?: (?> [^()\[\]{}]+ ) | (??{ $C_group_rex }) )*
122-
[)}\]] /x;
123-
124-
# $C_arg: match a chunk in C without comma at toplevel (no comments),
125-
# i.e. a single arg within an XS signature, such as
126-
# foo = ','
127-
#
128-
# DAPM 2024. This appears to match zero, one or more of:
129-
# a random collection of non-bracket/quote/comma chars (e.g, a word or
130-
# number or 'int *foo' etc), or
131-
# a balanced(ish) nested brackets, or
132-
# a "string literal", or
133-
# a 'c' char literal
134-
# So (I guess), it captures the next item in a function signature
135-
136-
$C_arg = qr/ (?: (?> [^()\[\]{},"']+ )
137-
| (??{ $C_group_rex })
138-
| " (?: (?> [^\\"]+ )
139-
| \\.
140-
)* " # String literal
141-
| ' (?: (?> [^\\']+ ) | \\. )* ' # Char literal
142-
)* /xs;
143-
144110
# "impossible" keyword (multiple newline)
145111
my $END = "!End!\n\n";
146112
# Match an XS Keyword
@@ -1877,237 +1843,6 @@ EOF
18771843
return 1;
18781844
}
18791845

1880-
# ----------------------------------------------------------------
1881-
# Process the XSUB's signature: $sig->{sig_text}
1882-
#
1883-
# Split the signature on commas into parameters, while allowing for
1884-
# things like '(a = ",", b)'. Then for each parameter, parse its
1885-
# various fields and store in a ExtUtils::ParseXS::Node::Param object.
1886-
# Store those Param objects within the Sig object, plus any other state
1887-
# deduced from the signature, such as min/max permitted number of args.
1888-
#
1889-
# A typical signature might look like:
1890-
#
1891-
# OUT char *s, \
1892-
# int length(s), \
1893-
# OUTLIST int size = 10)
1894-
#
1895-
# ----------------------------------------------------------------
1896-
1897-
sub ExtUtils::ParseXS::Node::Sig::parse {
1898-
my ExtUtils::ParseXS::Node::Sig $self = shift;
1899-
my ExtUtils::ParseXS $pxs = shift;
1900-
1901-
# remove line continuation chars (\)
1902-
$self->{sig_text} =~ s/\\\s*/ /g;
1903-
my $sig_text = $self->{sig_text};
1904-
1905-
my @args;
1906-
my $optional_args_count = 0;# how many default params seen
1907-
my $args_count = 0; # how many args are expected
1908-
1909-
# First, split signature into separate parameters
1910-
1911-
if ($sig_text =~ /\S/) {
1912-
my $sig_c = "$sig_text ,";
1913-
use re 'eval'; # needed for 5.16.0 and earlier
1914-
my $can_use_regex = ($sig_c =~ /^( (??{ $C_arg }) , )* $ /x);
1915-
no re 'eval';
1916-
1917-
if ($can_use_regex) {
1918-
# If the parameters are capable of being split by using the fancy
1919-
# regex, do so. This splits the params on commas, but can handle
1920-
# things like foo(a = ",", b)
1921-
use re 'eval';
1922-
@args = ($sig_c =~ /\G ( (??{ $C_arg }) ) , /xg);
1923-
}
1924-
else {
1925-
# This is the fallback parameter-splitting path for when the $C_arg
1926-
# regex doesn't work. This code path should ideally never be
1927-
# reached, and indicates a design weakness in $C_arg.
1928-
@args = split(/\s*,\s*/, $sig_text);
1929-
Warn($pxs, "Warning: cannot parse argument list '$sig_text', fallback to split");
1930-
}
1931-
}
1932-
else {
1933-
@args = ();
1934-
}
1935-
1936-
# C++ methods get a fake object/class arg at the start.
1937-
# This affects arg numbering.
1938-
if (defined($pxs->{xsub_class})) {
1939-
my ($var, $type) =
1940-
($pxs->{xsub_seen_static} or $pxs->{xsub_func_name} eq 'new')
1941-
? ('CLASS', "char *")
1942-
: ('THIS', "$pxs->{xsub_class} *");
1943-
1944-
my ExtUtils::ParseXS::Node::Param $param
1945-
= ExtUtils::ParseXS::Node::Param->new( {
1946-
var => $var,
1947-
type => $type,
1948-
is_synthetic => 1,
1949-
arg_num => ++$args_count,
1950-
});
1951-
push @{$self->{params}}, $param;
1952-
$self->{names}{$var} = $param;
1953-
$param->check($pxs)
1954-
}
1955-
1956-
for (@args) {
1957-
# Process each parameter. A parameter is of the general form:
1958-
#
1959-
# OUT char* foo = expression
1960-
#
1961-
# where:
1962-
# IN/OUT/OUTLIST etc are only allowed under
1963-
# $pxs->{config_allow_inout}
1964-
#
1965-
# a C type is only allowed under
1966-
# $pxs->{config_allow_argtypes}
1967-
#
1968-
# foo can be a plain C variable name, or can be
1969-
# length(foo) but only under $pxs->{config_allow_argtypes}
1970-
#
1971-
# = default default value - only allowed under
1972-
# $pxs->{config_allow_argtypes}
1973-
1974-
s/^\s+//;
1975-
s/\s+$//;
1976-
1977-
# Process ellipsis (...)
1978-
1979-
$pxs->blurt("further XSUB parameter seen after ellipsis (...)")
1980-
if $self->{seen_ellipsis};
1981-
1982-
if ($_ eq '...') {
1983-
$self->{seen_ellipsis} = 1;
1984-
next;
1985-
}
1986-
1987-
# Decompose parameter into its components.
1988-
# Note that $name can be either 'foo' or 'length(foo)'
1989-
1990-
my ($out_type, $type, $name, $sp1, $sp2, $default) =
1991-
/^
1992-
(?:
1993-
(IN|IN_OUT|IN_OUTLIST|OUT|OUTLIST)
1994-
\b\s*
1995-
)?
1996-
(.*?) # optional type
1997-
\s*
1998-
\b
1999-
( \w+ # var
2000-
| length\( \s*\w+\s* \) # length(var)
2001-
)
2002-
(?:
2003-
(\s*) = (\s*) ( .*?) # default expr
2004-
)?
2005-
\s*
2006-
$
2007-
/x;
2008-
2009-
unless (defined $name) {
2010-
$pxs->blurt("Unparseable XSUB parameter: '$_'");
2011-
next;
2012-
}
2013-
2014-
my ExtUtils::ParseXS::Node::Param $param
2015-
= ExtUtils::ParseXS::Node::Param->new( {
2016-
var => $name,
2017-
});
2018-
2019-
if (exists $self->{names}{$name}) {
2020-
$pxs->blurt(
2021-
"Error: duplicate definition of argument '$name' ignored");
2022-
next;
2023-
}
2024-
2025-
push @{$self->{params}}, $param;
2026-
$self->{names}{$name} = $param;
2027-
2028-
# Process optional IN/OUT etc modifier
2029-
2030-
if (defined $out_type) {
2031-
if ($pxs->{config_allow_inout}) {
2032-
$out_type = $1 eq 'IN' ? '' : $1;
2033-
}
2034-
else {
2035-
$pxs->blurt("parameter IN/OUT modifier not allowed under -noinout");
2036-
}
2037-
}
2038-
else {
2039-
$out_type = '';
2040-
}
2041-
2042-
# Process optional type
2043-
2044-
undef $type unless length($type) && $type =~ /\S/;
2045-
2046-
if (defined($type) && !$pxs->{config_allow_argtypes}) {
2047-
$pxs->blurt("parameter type not allowed under -noargtypes");
2048-
undef $type;
2049-
}
2050-
2051-
# Process 'length(foo)' pseudo-parameter
2052-
2053-
my $is_length;
2054-
my $len_name;
2055-
2056-
if ($name =~ /^length\( \s* (\w+) \s* \)\z/x) {
2057-
if ($pxs->{config_allow_argtypes}) {
2058-
$len_name = $1;
2059-
$is_length = 1;
2060-
if (defined $default) {
2061-
$pxs->blurt("Default value not allowed on length() parameter '$len_name'");
2062-
undef $default;
2063-
}
2064-
}
2065-
else {
2066-
$pxs->blurt("length() pseudo-parameter not allowed under -noargtypes");
2067-
}
2068-
}
2069-
2070-
# Handle ANSI params: those which have a type or 'length(s)',
2071-
# and which thus don't need a matching INPUT line.
2072-
2073-
if (defined $type or $is_length) { # 'int foo' or 'length(foo)'
2074-
@$param{qw(type is_ansi)} = ($type, 1);
2075-
2076-
if ($is_length) {
2077-
$param->{no_init} = 1;
2078-
$param->{is_length} = 1;
2079-
$param->{len_name} = $len_name;
2080-
}
2081-
}
2082-
2083-
$param->{in_out} = $out_type if length $out_type;
2084-
$param->{no_init} = 1 if $out_type =~ /^OUT/;
2085-
2086-
# Process the default expression, including making the text
2087-
# to be used in "usage: ..." error messages.
2088-
my $report_def = '';
2089-
if (defined $default) {
2090-
$optional_args_count++;
2091-
# The default expression for reporting usage. For backcompat,
2092-
# sometimes preserve the spaces either side of the '='
2093-
$report_def = ((defined $type or $is_length) ? '' : $sp1)
2094-
. "=$sp2$default";
2095-
$param->{default_usage} = $report_def;
2096-
$param->{default} = $default;
2097-
}
2098-
2099-
if ($out_type eq "OUTLIST" or $is_length) {
2100-
$param->{arg_num} = undef;
2101-
}
2102-
else {
2103-
$param->{arg_num} = ++$args_count;
2104-
}
2105-
} # for (@args)
2106-
2107-
$self->{nargs} = $args_count;
2108-
$self->{min_args} = $args_count - $optional_args_count;
2109-
}
2110-
21111846

21121847
sub report_error_count {
21131848
if (@_) {

0 commit comments

Comments
 (0)