Skip to content

Commit dc42f22

Browse files
authored
Merge pull request SCons#4556 from mwichmann/perf/env-setitem
Update to current fastest env setitem
2 parents 80e5b0c + 88f7f9a commit dc42f22

File tree

10 files changed

+108
-110
lines changed

10 files changed

+108
-110
lines changed

CHANGES.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,9 @@ RELEASE VERSION/DATE TO BE FILLED IN LATER
100100
SCons.Environment to SCons.Util to avoid the chance of import loops. Variables
101101
and Environment both use the routine and Environment() uses a Variables()
102102
object so better to move to a safer location.
103+
- Performance tweak: the __setitem__ method of an Environment, used for
104+
setting construction variables, now uses the string method isidentifier
105+
to validate the name (updated from microbenchmark results).
103106
- AddOption and the internal add_local_option which AddOption calls now
104107
recognize a "settable" keyword argument to indicate a project-added
105108
option can also be modified using SetOption. Fixes #3983.

RELEASE.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,9 @@ IMPROVEMENTS
8181
- Make the testing framework a little more resilient: the temporary
8282
directory for tests now includes a component named "scons" which can
8383
be given to antivirus software to exclude.
84+
- Performance tweak: the __setitem__ method of an Environment, used for
85+
setting construction variables, now uses the string method isidentifier
86+
to validate the name (updated from microbenchmark results).
8487

8588
PACKAGING
8689
---------

SCons/Environment.py

Lines changed: 17 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,6 @@
7676
to_String_for_subst,
7777
uniquer_hashables,
7878
)
79-
from SCons.Util.envs import is_valid_construction_var
8079
from SCons.Util.sctyping import ExecutorType
8180

8281
class _Null:
@@ -581,26 +580,20 @@ def __getitem__(self, key):
581580
return self._dict[key]
582581

583582
def __setitem__(self, key, value):
584-
# This is heavily used. This implementation is the best we have
585-
# according to the timings in bench/env.__setitem__.py.
586-
#
587-
# The "key in self._special_set_keys" test here seems to perform
588-
# pretty well for the number of keys we have. A hard-coded
589-
# list worked a little better in Python 2.5, but that has the
590-
# disadvantage of maybe getting out of sync if we ever add more
591-
# variable names.
592-
# So right now it seems like a good trade-off, but feel free to
593-
# revisit this with bench/env.__setitem__.py as needed (and
594-
# as newer versions of Python come out).
595583
if key in self._special_set_keys:
596584
self._special_set[key](self, key, value)
597585
else:
598-
# If we already have the entry, then it's obviously a valid
599-
# key and we don't need to check. If we do check, using a
600-
# global, pre-compiled regular expression directly is more
601-
# efficient than calling another function or a method.
602-
if key not in self._dict and not is_valid_construction_var(key):
603-
raise UserError("Illegal construction variable `%s'" % key)
586+
# Performance: since this is heavily used, try to avoid checking
587+
# if the variable is valid unless necessary. bench/__setitem__.py
588+
# times a bunch of different approaches. Based the most recent
589+
# run, against Python 3.6-3.13(beta), the best we can do across
590+
# different combinations of actions is to use a membership test
591+
# to see if we already have the variable, if so it must already
592+
# have been checked, so skip; if we do check, "isidentifier()"
593+
# (new in Python 3 so wasn't in benchmark until recently)
594+
# on the key is the best.
595+
if key not in self._dict and not key.isidentifier():
596+
raise UserError(f"Illegal construction variable {key!r}")
604597
self._dict[key] = value
605598

606599
def get(self, key, default=None):
@@ -2579,8 +2572,12 @@ def __getitem__(self, key):
25792572
return self.__dict__['__subject'].__getitem__(key)
25802573

25812574
def __setitem__(self, key, value):
2582-
if not is_valid_construction_var(key):
2583-
raise UserError("Illegal construction variable `%s'" % key)
2575+
# This doesn't have the same performance equation as a "real"
2576+
# environment: in an override you're basically just writing
2577+
# new stuff; it's not a common case to be changing values already
2578+
# set in the override dict, so don't spend time checking for existance.
2579+
if not key.isidentifier():
2580+
raise UserError(f"Illegal construction variable {key!r}")
25842581
self.__dict__['overrides'][key] = value
25852582

25862583
def __delitem__(self, key):

SCons/Util/envs.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,10 +330,13 @@ def AddMethod(obj, function: Callable, name: Optional[str] = None) -> None:
330330
setattr(obj, name, method)
331331

332332

333+
# This routine is used to validate that a construction var name can be used
334+
# as a Python identifier, which we require. However, Python 3 introduced an
335+
# isidentifier() string method so there's really not any need for it now.
333336
_is_valid_var_re = re.compile(r'[_a-zA-Z]\w*$')
334337

335338
def is_valid_construction_var(varstr: str) -> bool:
336-
"""Return True if *varstr* is a legitimate construction variable."""
339+
"""Return True if *varstr* is a legitimate name of a construction variable."""
337340
return bool(_is_valid_var_re.match(varstr))
338341

339342
# Local Variables:

SCons/Variables/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ def _do_add(
131131
option.key = key
132132
# TODO: normalize to not include key in aliases. Currently breaks tests.
133133
option.aliases = [key,]
134-
if not SCons.Util.is_valid_construction_var(option.key):
134+
if not option.key.isidentifier():
135135
raise SCons.Errors.UserError(f"Illegal Variables key {option.key!r}")
136136
option.help = help
137137
option.default = default

bench/bench.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,10 @@
6262
-h, --help Display this help and exit
6363
-i ITER, --iterations ITER Run each code snippet ITER times
6464
--time Use the time.time function
65-
-r RUNS, --runs RUNS Average times for RUNS invocations of
65+
-r RUNS, --runs RUNS Average times for RUNS invocations of
6666
"""
6767

68-
# How many times each snippet of code will be (or should be) run by the
68+
# How many times each snippet of code will be (or should be) run by the
6969
# functions under test to gather the time (the "inner loop").
7070

7171
Iterations = 1000
@@ -191,19 +191,17 @@ def display(func, result_label, results):
191191

192192

193193
# pprint(results_dict)
194-
# breakpoint()
194+
195195
tests = [label for label, args, kw in Data]
196-
columns = ['Python Version', 'Implementation', 'Test'] + tests
196+
columns = ['Python Version', 'Implementation'] + tests
197197
with open(results_filename, 'a') as r:
198-
print("Python Version,%s" % ".".join(columns), file=r)
199-
# print("Python Version,%s" % ".".join(columns))
198+
print(",".join(columns), file=r)
200199

201-
for implementation in results_dict.keys():
200+
for implementation in results_dict:
201+
print(f'{py_ver_string},"{implementation}"', file=r, end='')
202202
for test in tests:
203-
print(f'{py_ver_string},"{implementation}","{test}",%8.3f' % results_dict[implementation][test], file=r)
204-
# print(f'{py_ver_string},"{implementation}","{test}",%8.3f' % results_dict[implementation][test])
205-
206-
203+
print(',%8.3f' % results_dict[implementation][test], file=r, end='')
204+
print(file=r)
207205

208206
# Local Variables:
209207
# tab-width:4

bench/env.__setitem__.py

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,14 +57,14 @@ def times(num=1000000, init='', title='Results:', **statements):
5757
script_dir = os.path.split(filename)[0]
5858
if script_dir:
5959
script_dir = script_dir + '/'
60-
sys.path = [os.path.abspath(script_dir + '../src/engine')] + sys.path
60+
sys.path = [os.path.abspath(script_dir + '..')] + sys.path
6161

6262
import SCons.Errors
6363
import SCons.Environment
6464
import SCons.Util
6565

6666
is_valid_construction_var = SCons.Util.is_valid_construction_var
67-
global_valid_var = re.compile(r'[_a-zA-Z]\w*$')
67+
global_valid_var = SCons.Util.envs._is_valid_var_re
6868

6969
# The classes with different __setitem__() implementations that we're
7070
# going to horse-race.
@@ -105,7 +105,7 @@ class Environment:
105105
'SOURCES' : None,
106106
}
107107
_special_set_keys = list(_special_set.keys())
108-
_valid_var = re.compile(r'[_a-zA-Z]\w*$')
108+
_valid_var = global_valid_var
109109
def __init__(self, **kw):
110110
self._dict = kw
111111

@@ -247,8 +247,8 @@ def __setitem__(self, key, value):
247247
raise SCons.Errors.UserError("Illegal construction variable `%s'" % key)
248248
self._dict[key] = value
249249

250-
class env_Best_has_key(Environment):
251-
"""Best __setitem__(), with has_key"""
250+
class env_Best_has_key_global_regex(Environment):
251+
"""Best __setitem__(), with membership test and global regex"""
252252
def __setitem__(self, key, value):
253253
if key in self._special_set:
254254
self._special_set[key](self, key, value)
@@ -258,6 +258,18 @@ def __setitem__(self, key, value):
258258
raise SCons.Errors.UserError("Illegal construction variable `%s'" % key)
259259
self._dict[key] = value
260260

261+
class env_Best_has_key_function(Environment):
262+
"""Best __setitem__(), with membership_test and validator function"""
263+
def __setitem__(self, key, value):
264+
if key in self._special_set:
265+
self._special_set[key](self, key, value)
266+
else:
267+
if key not in self._dict \
268+
and not is_valid_construction_var(key):
269+
raise SCons.Errors.UserError("Illegal construction variable `%s'" % key)
270+
self._dict[key] = value
271+
272+
261273
class env_Best_list(Environment):
262274
"""Best __setitem__(), with a list"""
263275
def __setitem__(self, key, value):
@@ -284,6 +296,16 @@ def __setitem__(self, key, value):
284296
raise SCons.Errors.UserError("Illegal construction variable `%s'" % key)
285297
self._dict[key] = value
286298

299+
class env_Best_isidentifier(Environment):
300+
"""Best __setitem__(), with membership test and isidentifier"""
301+
def __setitem__(self, key, value):
302+
if key in self._special_set:
303+
self._special_set[key](self, key, value)
304+
else:
305+
if key not in self._dict and not key.isidentifier():
306+
raise SCons.Errors.UserError("Illegal construction variable `%s'" % key)
307+
self._dict[key] = value
308+
287309
# We'll use the names of all the env_* classes we find later to build
288310
# the dictionary of statements to be timed, and the import statement
289311
# that the timer will use to get at these classes.

bench/is_types.py

Lines changed: 28 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,11 @@
22
#
33
# Benchmarks for testing various possible implementations
44
# of the is_Dict(), is_List() and is_String() functions in
5-
# src/engine/SCons/Util.py.
5+
# SCons/Util.py.
66

77
import types
8-
from collections import UserDict, UserList, UserString
8+
from collections import UserDict, UserList, UserString, deque
9+
from collections.abc import Iterable, MappingView, MutableMapping, MutableSequence
910

1011
DictType = dict
1112
ListType = list
@@ -28,19 +29,20 @@ def original_is_String(e):
2829

2930
# New candidates that explicitly check for whether the object is an
3031
# InstanceType before calling isinstance() on the corresponding User*
31-
# type.
32-
# InstanceType was only for old-style classes, so absent in Python 3
33-
# this this is no different than the previous
32+
# type. Update: meaningless in Python 3, InstanceType was only for
33+
# old-style classes, so these are just removed.
34+
# XXX
3435

35-
def checkInstanceType_is_Dict(e):
36-
return isinstance(e, (dict, UserDict))
36+
# New candidates that try more generic names from collections:
3737

38-
def checkInstanceType_is_List(e):
39-
return isinstance(e, (list, UserList))
38+
def new_is_Dict(e):
39+
return isinstance(e, MutableMapping)
4040

41-
def checkInstanceType_is_String(e):
42-
return isinstance(e, (str, UserString))
41+
def new_is_List(e):
42+
return isinstance(e, MutableSequence)
4343

44+
def new_is_String(e):
45+
return isinstance(e, (str, UserString))
4446

4547
# Improved candidates that cache the type(e) result in a variable
4648
# before doing any checks.
@@ -51,7 +53,7 @@ def cache_type_e_is_Dict(e):
5153

5254
def cache_type_e_is_List(e):
5355
t = type(e)
54-
return t is list or isinstance(e, UserList)
56+
return t is list or isinstance(e, (UserList, deque))
5557

5658
def cache_type_e_is_String(e):
5759
t = type(e)
@@ -68,7 +70,7 @@ def global_cache_type_e_is_Dict(e):
6870

6971
def global_cache_type_e_is_List(e):
7072
t = type(e)
71-
return t is ListType or isinstance(e, UserList)
73+
return t is ListType or isinstance(e, (UserList, deque))
7274

7375
def global_cache_type_e_is_String(e):
7476
t = type(e)
@@ -77,30 +79,10 @@ def global_cache_type_e_is_String(e):
7779

7880
# Alternative that uses a myType() function to map the User* objects
7981
# to their corresponding underlying types.
80-
81-
instanceTypeMap = {
82-
UserDict : dict,
83-
UserList : list,
84-
UserString : str,
85-
}
86-
87-
def myType(obj):
88-
t = type(obj)
89-
if t is types.InstanceType:
90-
t = instanceTypeMap.get(obj.__class__, t)
91-
return t
92-
93-
def myType_is_Dict(e):
94-
return myType(e) is dict
95-
96-
def myType_is_List(e):
97-
return myType(e) is list
98-
99-
def myType_is_String(e):
100-
return myType(e) is str
101-
82+
# Again, since this used InstanceType, it's not useful for Python 3.
10283

10384

85+
# These are the actual test entry points
10486

10587
def Func01(obj):
10688
"""original_is_String"""
@@ -118,19 +100,19 @@ def Func03(obj):
118100
original_is_Dict(obj)
119101

120102
def Func04(obj):
121-
"""checkInstanceType_is_String"""
103+
"""new_is_String"""
122104
for i in IterationList:
123-
checkInstanceType_is_String(obj)
105+
new_is_String(obj)
124106

125107
def Func05(obj):
126-
"""checkInstanceType_is_List"""
108+
"""new_is_List"""
127109
for i in IterationList:
128-
checkInstanceType_is_List(obj)
110+
new_is_List(obj)
129111

130112
def Func06(obj):
131-
"""checkInstanceType_is_Dict"""
113+
"""new_is_Dict"""
132114
for i in IterationList:
133-
checkInstanceType_is_Dict(obj)
115+
new_is_Dict(obj)
134116

135117
def Func07(obj):
136118
"""cache_type_e_is_String"""
@@ -162,22 +144,6 @@ def Func12(obj):
162144
for i in IterationList:
163145
global_cache_type_e_is_Dict(obj)
164146

165-
#def Func13(obj):
166-
# """myType_is_String"""
167-
# for i in IterationList:
168-
# myType_is_String(obj)
169-
#
170-
#def Func14(obj):
171-
# """myType_is_List"""
172-
# for i in IterationList:
173-
# myType_is_List(obj)
174-
#
175-
#def Func15(obj):
176-
# """myType_is_Dict"""
177-
# for i in IterationList:
178-
# myType_is_Dict(obj)
179-
180-
181147

182148
# Data to pass to the functions on each run. Each entry is a
183149
# three-element tuple:
@@ -217,6 +183,11 @@ class A:
217183
(UserList([]),),
218184
{},
219185
),
186+
(
187+
"deque",
188+
(deque([]),),
189+
{},
190+
),
220191
(
221192
"UserDict",
222193
(UserDict({}),),

0 commit comments

Comments
 (0)