Skip to content
This repository was archived by the owner on Nov 23, 2024. It is now read-only.

Commit e1b4c57

Browse files
feat: purity analysis for chained functions and simple cyclic functions (#203)
Closes #77 ### Summary of Changes This feature adds the last part of the purity analysis: - We now build a topologically sorted graph and filter out simple cyclic function calls. - then we infer the purity/(impurity) for all functions in that call graph - if a function is impure, we also collect all reasons for its impurity - we use caching to store already computed functions and look them up when needed There are still a few things that need improvement: - Nested cyclic functions - member access - further analysis of unknown reasons for impurity --------- Co-authored-by: megalinter-bot <129584137+megalinter-bot@users.noreply.github.com>
1 parent 26adffe commit e1b4c57

File tree

14 files changed

+2876
-1061
lines changed

14 files changed

+2876
-1061
lines changed
Lines changed: 6 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,15 @@
1-
"""Analyse the purity of a library's API."""
1+
"""Analyze the purity of a library's API."""
22

3+
from ._build_call_graph import (
4+
build_call_graph,
5+
)
36
from ._get_module_data import (
47
ModuleDataBuilder,
58
calc_node_id,
69
get_base_expression,
710
get_module_data,
811
)
9-
from ._infer_purity import ( # TODO: rework this
10-
DefinitelyImpure,
11-
DefinitelyPure,
12-
FunctionID,
13-
MaybeImpure,
14-
OpenMode,
15-
PurityHandler,
16-
PurityInformation,
17-
PurityResult,
18-
calc_function_id,
19-
determine_open_mode,
20-
determine_purity,
21-
extract_impurity_reasons,
22-
generate_purity_information,
23-
get_function_defs,
24-
get_purity_result_str,
12+
from ._infer_purity import (
2513
infer_purity,
2614
)
2715
from ._resolve_references import (
@@ -34,20 +22,6 @@
3422
"get_base_expression",
3523
"ModuleDataBuilder",
3624
"resolve_references",
37-
"FunctionID",
38-
"PurityInformation",
39-
"PurityResult",
40-
"DefinitelyPure",
41-
"MaybeImpure",
42-
"DefinitelyImpure",
43-
"PurityHandler",
44-
"OpenMode",
45-
"determine_open_mode",
46-
"determine_purity",
47-
"extract_impurity_reasons",
48-
"generate_purity_information",
49-
"get_function_defs",
50-
"get_purity_result_str",
5125
"infer_purity",
52-
"calc_function_id",
26+
"build_call_graph",
5327
]
Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
import builtins
2+
3+
import astroid
4+
5+
from library_analyzer.processing.api.purity_analysis.model import (
6+
CallGraphForest,
7+
CallGraphNode,
8+
FunctionScope,
9+
NodeID,
10+
Reasons,
11+
Symbol,
12+
)
13+
14+
BUILTINS = dir(builtins)
15+
16+
17+
def build_call_graph(
18+
functions: dict[str, list[FunctionScope]],
19+
function_references: dict[str, Reasons],
20+
) -> CallGraphForest:
21+
# """Build a call graph from a list of functions.
22+
#
23+
# Parameters
24+
# ----------
25+
# * functions: a dict of functions
26+
# * function_references: a dict of function references - contains the reasons for impurity
27+
# Returns
28+
# -------
29+
# * call_graph_forest: the call graph forest with cycles contracted
30+
# """ # TODO: fix whaterver is wrong with the docstring
31+
call_graph_forest = CallGraphForest()
32+
33+
for function_name, function_scopes in functions.items():
34+
for function_scope in function_scopes:
35+
# Add reasons for impurity to the corresponding function
36+
if function_references[function_name]:
37+
function_node = CallGraphNode(data=function_scope, reasons=function_references[function_name])
38+
else:
39+
function_node = CallGraphNode(data=function_scope, reasons=Reasons())
40+
41+
# Case where the function is not called before by any other function
42+
if function_name not in call_graph_forest.graphs:
43+
call_graph_forest.add_graph(
44+
function_name,
45+
function_node,
46+
) # We save the tree in the forest by the name of the root function
47+
48+
# Default case where a function calls no other functions in its body - therefore, the tree has just one node
49+
if not function_scope.calls:
50+
continue
51+
52+
# If the function calls other functions in its body, we need to build a tree
53+
else:
54+
for call in function_scope.calls:
55+
if call.symbol.name in functions:
56+
current_tree_node = call_graph_forest.get_graph(function_name)
57+
58+
# We need to check if the called function is already in the tree
59+
if call_graph_forest.get_graph(call.symbol.name):
60+
current_tree_node.add_child(call_graph_forest.get_graph(call.symbol.name))
61+
# If the called function is not in the forest, we need to compute it first and then connect it to the current tree
62+
else:
63+
for called_function_scope in functions[call.symbol.name]:
64+
if function_references[call.symbol.name]:
65+
call_graph_forest.add_graph(
66+
call.symbol.name,
67+
CallGraphNode(
68+
data=called_function_scope,
69+
reasons=function_references[call.symbol.name],
70+
),
71+
)
72+
else:
73+
call_graph_forest.add_graph(
74+
call.symbol.name,
75+
CallGraphNode(data=called_function_scope, reasons=Reasons()),
76+
)
77+
current_tree_node.add_child(call_graph_forest.get_graph(call.symbol.name))
78+
79+
# Handle builtins: builtins are not in the functions dict, and therefore we need to handle them separately
80+
# since we do not analyze builtins any further at this stage, we can simply add them as a child to the current tree node
81+
elif call.symbol.name in BUILTINS:
82+
current_tree_node = call_graph_forest.get_graph(function_name)
83+
current_tree_node.add_child(CallGraphNode(data=call, reasons=Reasons()))
84+
85+
# Deal with unknown calls:
86+
# - calls of external code => call node not in function_reference dict
87+
# - calls of parameters # TODO: parameter calls are not handled yet
88+
# These functions get an unknown flag
89+
else:
90+
current_tree_node = call_graph_forest.get_graph(function_name)
91+
if isinstance(current_tree_node.reasons, Reasons):
92+
if not isinstance(current_tree_node.reasons.unknown_calls, list):
93+
current_tree_node.reasons.unknown_calls = []
94+
current_tree_node.reasons.unknown_calls.append(call.symbol.node)
95+
96+
handle_cycles(call_graph_forest, function_references)
97+
98+
return call_graph_forest
99+
100+
101+
def handle_cycles(call_graph_forest: CallGraphForest, function_references: dict[str, Reasons]) -> CallGraphForest:
102+
"""Handle cycles in the call graph.
103+
104+
This function checks for cycles in the call graph forest and contracts them into a single node.
105+
106+
Parameters
107+
----------
108+
* call_graph_forest: the call graph forest
109+
* function_references: a dict of function references - contains the reasons for impurity
110+
111+
Returns
112+
-------
113+
* call_graph_forest: the call graph forest with contracted cycles
114+
"""
115+
for graph in call_graph_forest.graphs.copy().values():
116+
visited_nodes: set[CallGraphNode] = set()
117+
path: list[CallGraphNode] = []
118+
cycle = test_for_cycles(graph, visited_nodes, path)
119+
if cycle:
120+
# print("cycle found", cycle)
121+
contract_cycle(call_graph_forest, cycle, function_references)
122+
# TODO: check if other cycles exists
123+
else:
124+
# print("no cycles found")
125+
pass
126+
return call_graph_forest
127+
128+
129+
def test_for_cycles(
130+
graph: CallGraphNode,
131+
visited_nodes: set[CallGraphNode],
132+
path: list[CallGraphNode],
133+
) -> list[CallGraphNode]:
134+
"""Tests for cycles in the call graph.
135+
136+
This function recursively traverses the call graph and checks for cycles.
137+
It uses a DFS approach to traverse the graph.
138+
If a cycle is found, the cycle is returned.
139+
It is possible that multiple cycles exist, but only one is returned.
140+
141+
Parameters
142+
----------
143+
* graph: the current node in the call graph
144+
* visited_nodes: a set of all visited nodes
145+
* path: a list of all nodes in the current path
146+
"""
147+
# If a node has no children, it is a leaf node, and we can return an empty list
148+
if not graph.children:
149+
return []
150+
151+
if graph in path:
152+
return path[path.index(graph) :] # A cycle is found, return the path containing the cycle
153+
154+
# Mark the current node as visited
155+
visited_nodes.add(graph)
156+
path.append(graph)
157+
158+
cycle = []
159+
160+
# Check for cycles in children
161+
for child in graph.children:
162+
cycle = test_for_cycles(child, visited_nodes, path)
163+
if cycle:
164+
return cycle
165+
path.pop() # Remove the current node from the path when backtracking
166+
167+
return cycle
168+
169+
170+
def contract_cycle(
171+
forest: CallGraphForest,
172+
cycle: list[CallGraphNode],
173+
function_references: dict[str, Reasons],
174+
) -> None:
175+
"""Contracts a cycle in the call graph.
176+
177+
Given a cycle in the call graph, this function contracts the cycle into a single node.
178+
179+
Parameters
180+
----------
181+
* forest: the call graph forest
182+
* cycle: a list of nodes in the cycle
183+
* function_references: a dict of function references - contains the reasons for impurity
184+
"""
185+
# Create the new combined node
186+
cycle_names = [node.data.symbol.name for node in cycle]
187+
combined_node_name = "+".join(sorted(cycle_names))
188+
combined_node_data = FunctionScope(
189+
Symbol(
190+
None,
191+
NodeID(cycle[0].data.parent.get_module_scope(), combined_node_name, None, None),
192+
combined_node_name,
193+
),
194+
)
195+
combined_reasons = Reasons.join_reasons_list([node.reasons for node in cycle])
196+
combined_node = CallGraphNode(data=combined_node_data, reasons=combined_reasons, combined_node_names=cycle_names)
197+
198+
# Add children to the combined node if they are not in the cycle (other calls)
199+
if any([isinstance(node.data, FunctionScope) and hasattr(node.data, "calls") for node in cycle]): # noqa: C419
200+
other_calls = [
201+
call
202+
for node in cycle
203+
for call in node.data.calls
204+
if call.symbol.name not in cycle_names and call.symbol.name not in BUILTINS
205+
]
206+
builtin_calls = [call for node in cycle for call in node.data.calls if call.symbol.name in BUILTINS]
207+
combined_node_data.calls = other_calls + builtin_calls
208+
combined_node.children = {
209+
CallGraphNode(data=call, reasons=function_references[call.symbol.name]) for call in other_calls
210+
}
211+
combined_node.children.update({CallGraphNode(data=call, reasons=Reasons()) for call in builtin_calls})
212+
213+
# Remove all nodes in the cycle from the forest and add the combined node instead
214+
for node in cycle:
215+
if node.data.symbol.name in BUILTINS:
216+
continue # This should not happen since builtins never call self-defined functions
217+
if node.data.symbol.name in forest.graphs:
218+
forest.delete_graph(node.data.symbol.name)
219+
220+
# Only add the combined node once - (it is possible that the same cycle is found multiple times)
221+
if combined_node_name not in forest.graphs:
222+
forest.add_graph(combined_node_name, combined_node)
223+
224+
# Set all pointers to the nodes in the cycle to the combined node
225+
for graph in forest.graphs.values():
226+
update_pointers(graph, cycle_names, combined_node)
227+
228+
229+
def update_pointers(node: CallGraphNode, cycle_names: list[str], combined_node: CallGraphNode) -> None:
230+
"""Replace all pointers to nodes in the cycle with the combined node.
231+
232+
Recursively traverses the tree and replaces all pointers to nodes in the cycle with the combined node.
233+
234+
Parameters
235+
----------
236+
* node: the current node in the tree
237+
* cycle_names: a list of all names of nodes in the cycle
238+
* combined_node: the combined node that replaces all nodes in the cycle
239+
"""
240+
for child in node.children:
241+
if child.data.symbol.name in BUILTINS:
242+
continue
243+
if child.data.symbol.name in cycle_names:
244+
node.children.remove(child)
245+
node.children.add(combined_node)
246+
# Update data
247+
if isinstance(node.data, FunctionScope):
248+
node.data.remove_call_node_by_name(child.data.symbol.name)
249+
node.data.calls.append(combined_node.data)
250+
# Remove the call from the reasons (reasons need to be updated later)
251+
if isinstance(node.reasons, Reasons):
252+
for call in node.reasons.calls.copy():
253+
if isinstance(call.node, astroid.Call) and call.node.func.name == child.data.symbol.name:
254+
node.reasons.calls.remove(call)
255+
256+
else:
257+
update_pointers(child, cycle_names, combined_node)

0 commit comments

Comments
 (0)