From 08e4c99c675e9bf1aaaf1eb81a9674ee6cbed00b Mon Sep 17 00:00:00 2001 From: Erdem Sariyuce Date: Tue, 10 Jun 2025 18:17:02 -0400 Subject: [PATCH] Implementing %degreeDistribution magic command --- .gitignore | 102 ++++++ ChangeLog.md | 1 + pyproject.toml | 1 + requirements.txt | 1 + src/graph_notebook/magics/graph_magic.py | 415 ++++++++++++++++++++++- src/graph_notebook/neptune/client.py | 1 + 6 files changed, 520 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index eeee3ce5..38d2af96 100644 --- a/.gitignore +++ b/.gitignore @@ -31,3 +31,105 @@ src/graph_notebook/widgets/package-lock.json blazegraph.jnl rules.log *.env +notebook/destination/dir/About-the-Neptune-Notebook.ipynb +notebook/destination/dir/Overview.ipynb +notebook/destination/dir/Untitled.ipynb +notebook/destination/dir/Untitled1.ipynb +notebook/destination/dir/.ipynb_checkpoints/Untitled-checkpoint.ipynb +notebook/destination/dir/.ipynb_checkpoints/Untitled1-checkpoint.ipynb +notebook/destination/dir/01-Neptune-Database/01-Getting-Started/01-About-the-Neptune-Notebook.ipynb +notebook/destination/dir/01-Neptune-Database/01-Getting-Started/02-Using-Gremlin-to-Access-the-Graph.ipynb +notebook/destination/dir/01-Neptune-Database/01-Getting-Started/03-Using-RDF-and-SPARQL-to-Access-the-Graph.ipynb +notebook/destination/dir/01-Neptune-Database/01-Getting-Started/04-Social-Network-Recommendations-with-Gremlin.ipynb +notebook/destination/dir/01-Neptune-Database/01-Getting-Started/05-Dining-By-Friends-in-Amazon-Neptune.ipynb +notebook/destination/dir/01-Neptune-Database/02-Visualization/Air-Routes-Gremlin.ipynb +notebook/destination/dir/01-Neptune-Database/02-Visualization/Air-Routes-openCypher.ipynb +notebook/destination/dir/01-Neptune-Database/02-Visualization/Air-Routes-SPARQL.ipynb +notebook/destination/dir/01-Neptune-Database/02-Visualization/Blog Workbench Visualization.ipynb +notebook/destination/dir/01-Neptune-Database/02-Visualization/EPL-Gremlin.ipynb +notebook/destination/dir/01-Neptune-Database/02-Visualization/EPL-openCypher.ipynb +notebook/destination/dir/01-Neptune-Database/02-Visualization/EPL-SPARQL.ipynb +notebook/destination/dir/01-Neptune-Database/02-Visualization/Grouping-and-Appearance-Customization-Gremlin.ipynb +notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/00-Sample-Applications-Overview.ipynb +notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/README.md +notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/01-Fraud-Graphs/01-Building-a-Fraud-Graph-Application.ipynb +notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/02-Knowledge-Graphs/Building-a-Knowledge-Graph-Application-Gremlin.ipynb +notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/02-Knowledge-Graphs/Building-a-Knowledge-Graph-Application-openCypher.ipynb +notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/01-Building-an-Identity-Graph-Application.ipynb +notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/02-Data-Modeling-for-Identity-Graphs.ipynb +notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/03-Jumpstart-Identity-Graphs-Using-Canonical-Model-and-ETL/03-Jumpstart-Identity-Graphs-Using-Canonical-Model-and-ETL.ipynb +notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/03-Jumpstart-Identity-Graphs-Using-Canonical-Model-and-ETL/glue_utils.py +notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/03-Jumpstart-Identity-Graphs-Using-Canonical-Model-and-ETL/script/neptune-glue-demographics.py +notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/03-Jumpstart-Identity-Graphs-Using-Canonical-Model-and-ETL/script/neptune-glue-telemetry.py +notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/03-Identity-Graphs/03-Jumpstart-Identity-Graphs-Using-Canonical-Model-and-ETL/script/neptune-glue-transactions.py +notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/04-Security-Graphs/01-Building-a-Security-Graph-Application-with-Gremlin.ipynb +notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/04-Security-Graphs/01-Building-a-Security-Graph-Application-with-openCypher.ipynb +notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/05-Healthcare-and-Life-Sciences-Graphs/01-Modeling-Molecular-Structures-as-Graph-Data-Gremlin.ipynb +notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/06-Data-Science-Samples/01-Identifying-Fraud-Rings-Using-Social-Network-Analytics.ipynb +notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/06-Data-Science-Samples/02-Identifying-1st-Person-Synthetic-Identity-Fraud-Using-Graph-Similarity.ipynb +notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/06-Data-Science-Samples/03-Logistics-Analysis-using-a-Transportation-Network.ipynb +notebook/destination/dir/01-Neptune-Database/03-Sample-Applications/07-Games-Industry-Graphs/01-Building-a-Social-Network-for-Games-Gremlin.ipynb +notebook/destination/dir/02-Neptune-Analytics/01-Getting-Started/01-Getting-Started-With-Neptune-Analytics.ipynb +notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/00-Amazon-Neptune-Analytics-Algorithm-Support.pdf +notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/01-Getting-Started-With-Graph-Algorithms.ipynb +notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/02-Path-Finding-Algorithms.ipynb +notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/03-Centrality-Algorithms.ipynb +notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/04-Community-Detection-Algorithms.ipynb +notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/05-Similarity-Algorithms.ipynb +notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/06-Vector-Similarity-Algorithms.ipynb +notebook/destination/dir/02-Neptune-Analytics/02-Graph-Algorithms/.ipynb_checkpoints/03-Centrality-Algorithms-checkpoint.ipynb +notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/Overview.ipynb +notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/01-FinTech/01-Fraud-Ring-Identifcation.ipynb +notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/02-Investment-Analysis/01-EDGAR-Competitor-Analysis-using-Knowledge-Graph-Graph-Algorithms-and-Vector-Search.ipynb +notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/00-Intro-to-Software-Bill-Of-Materials.ipynb +notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/01-SBOM-Dependency-Analysis.ipynb +notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/02-SBOM-Vulnerability-Analysis.ipynb +notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/nodestream_template.yaml +notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/01/aws-sdk-pandas_aws_de5d1610d6d4ea3be44a01ab3f09b64e291a4ab7.json +notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/01/boto3_boto_6bbdf83ee00b749587f0fe54778fbec5411147b5.json +notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/01/graph-explorer_aws_39eed2c8bae4afc1b38fa7975c720461a7c7c3a6.json +notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/01/graph-notebook_aws_bb96dd8d0d9ef9d0e9060f8c5e26a042a3db40c4.json +notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/02/aws-cli-2-0-6.json +notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/02/gremlin-console-3-7-1_cydx.json +notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/example_sboms/02/gremlin-server-3-7-1-cydx.json +notebook/destination/dir/02-Neptune-Analytics/03-Sample-Use-Cases/03-Software-Bill-Of-Materials/sbom_code/sbom_helper.py +notebook/destination/dir/02-Neptune-Analytics/04-OpenCypher-Over-RDF/01-Air-Routes.ipynb +notebook/destination/dir/02-Neptune-Analytics/04-OpenCypher-Over-RDF/02-Air-Routes-GeoNames.ipynb +notebook/destination/dir/02-Neptune-Analytics/04-OpenCypher-Over-RDF/Air-Routes-Ontology-Diagram.png +notebook/destination/dir/03-Neptune-ML/neptune_ml_utils.py +notebook/destination/dir/03-Neptune-ML/neptune-ml-pretrained-model-config.json +notebook/destination/dir/03-Neptune-ML/01-Gremlin/01-Getting-Started-with-Neptune-ML-Gremlin.ipynb +notebook/destination/dir/03-Neptune-ML/01-Gremlin/02-Introduction-to-Node-Classification-Gremlin.ipynb +notebook/destination/dir/03-Neptune-ML/01-Gremlin/03-Introduction-to-Node-Regression-Gremlin.ipynb +notebook/destination/dir/03-Neptune-ML/01-Gremlin/04-Introduction-to-Link-Prediction-Gremlin.ipynb +notebook/destination/dir/03-Neptune-ML/01-Gremlin/05-Introduction-to-Edge-Classification-Gremlin.ipynb +notebook/destination/dir/03-Neptune-ML/01-Gremlin/06-Introduction-to-Edge-Regression-Gremlin.ipynb +notebook/destination/dir/03-Neptune-ML/02-SPARQL/neptune_ml_sparql_utils.py +notebook/destination/dir/03-Neptune-ML/02-SPARQL/Neptune-ML-00-Getting-Started-with-Neptune-ML-SPARQL.ipynb +notebook/destination/dir/03-Neptune-ML/02-SPARQL/Neptune-ML-01-Introduction-to-Object-Classification-SPARQL.ipynb +notebook/destination/dir/03-Neptune-ML/02-SPARQL/Neptune-ML-02-Introduction-to-Object-Regression-SPARQL.ipynb +notebook/destination/dir/03-Neptune-ML/02-SPARQL/Neptune-ML-03-Introduction-to-Link-Prediction-SPARQL.ipynb +notebook/destination/dir/03-Neptune-ML/02-SPARQL/neptune-ml-pretrained-rdf-model-config.json +notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/02-Job-Recommendation-Text-Encoding.ipynb +notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/03-Real-Time-Fraud-Detection-Using-Inductive-Inference.ipynb +notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/01-People-Analytics/People-Analytics-using-Neptune-ML.ipynb +notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/01-People-Analytics/WA_Fn-UseC_-HR-Employee-Attrition.csv +notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/1a-Use-case.ipynb +notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/1b-Graph_init.ipynb +notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/2a-GraphQueryGremlin.ipynb +notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/2b-GraphQueryLLM.ipynb +notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/3a-TransductiveMode-CellPrediction.ipynb +notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/3b-InductiveModeCell-Prediction.ipynb +notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/neptune_ml_utils.py +notebook/destination/dir/03-Neptune-ML/03-Sample-Applications/04-Telco-Networks/Transform2Neptune.py +notebook/destination/dir/04-Language-Tutorials/01-Gremlin/01-Basic-Read-Queries.ipynb +notebook/destination/dir/04-Language-Tutorials/01-Gremlin/02-Loops-Repeats.ipynb +notebook/destination/dir/04-Language-Tutorials/01-Gremlin/03-Ordering-Functions-Grouping.ipynb +notebook/destination/dir/04-Language-Tutorials/01-Gremlin/04-Creating-Updating-Deleting-Queries.ipynb +notebook/destination/dir/04-Language-Tutorials/01-Gremlin/Gremlin-Exercises-Answer-Sheet.ipynb +notebook/destination/dir/04-Language-Tutorials/02-openCypher/01-Basic-Read-Queries.ipynb +notebook/destination/dir/04-Language-Tutorials/02-openCypher/02-Variable-Length-Paths.ipynb +notebook/destination/dir/04-Language-Tutorials/02-openCypher/03-Ordering-Functions-Grouping.ipynb +notebook/destination/dir/04-Language-Tutorials/02-openCypher/04-Creating-Updating-Delete-Queries.ipynb +notebook/destination/dir/04-Language-Tutorials/02-openCypher/openCypher-Exercises-Answer-Key.ipynb +notebook/destination/dir/04-Language-Tutorials/03-SPARQL/01-SPARQL-Basics.ipynb diff --git a/ChangeLog.md b/ChangeLog.md index af0d7cbe..af079d5e 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -3,6 +3,7 @@ Starting with v1.31.6, this file will contain a record of major features and updates made in each release of graph-notebook. ## Upcoming +- Added %degreeDistribution magic command ([PR](https://github.com/aws/graph-notebook/pull/749)) TODO: add to the specific release below when it's released - Locked numba dependency to 0.60.0 to avoid numpy conflict ([Link to PR](https://github.com/aws/graph-notebook/pull/735)) - Fixed library target for nbclassic nbextension for graph_notebook_widget ([Link to PR](https://github.com/aws/graph-notebook/pull/739)) diff --git a/pyproject.toml b/pyproject.toml index e7a95ccf..391ba89c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,7 @@ dependencies = [ 'networkx==2.4', 'numpy>=1.23.5,<1.24.0', 'pandas>=2.1.0,<=2.2.2', + 'matplotlib>=3.9.4', # Graph databases and query languages 'gremlinpython>=3.5.1,<=3.7.2', diff --git a/requirements.txt b/requirements.txt index a80fd44e..7e1f7328 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,6 +18,7 @@ itables>=2.0.0,<=2.1.0 networkx==2.4 numpy>=1.23.5,<1.24.0 pandas>=2.1.0,<=2.2.2 +matplotlib>=3.9.4 # Graph databases and query languages gremlinpython>=3.5.1,<=3.7.2 diff --git a/src/graph_notebook/magics/graph_magic.py b/src/graph_notebook/magics/graph_magic.py index acf1a0e3..fba4d15d 100644 --- a/src/graph_notebook/magics/graph_magic.py +++ b/src/graph_notebook/magics/graph_magic.py @@ -16,6 +16,12 @@ import ast import re +import numpy as np +import matplotlib.pyplot as plt + +import numpy as np +import matplotlib.pyplot as plt + from ipyfilechooser import FileChooser from enum import Enum from copy import copy @@ -53,7 +59,8 @@ STATISTICS_LANGUAGE_INPUTS, STATISTICS_LANGUAGE_INPUTS_SPARQL, STATISTICS_MODES, SUMMARY_MODES, \ SPARQL_EXPLAIN_MODES, OPENCYPHER_EXPLAIN_MODES, GREMLIN_EXPLAIN_MODES, \ OPENCYPHER_PLAN_CACHE_MODES, OPENCYPHER_DEFAULT_TIMEOUT, OPENCYPHER_STATUS_STATE_MODES, \ - normalize_service_name, NEPTUNE_DB_SERVICE_NAME, NEPTUNE_ANALYTICS_SERVICE_NAME, GRAPH_PG_INFO_METRICS, \ + normalize_service_name, NEPTUNE_DB_SERVICE_NAME, NEPTUNE_ANALYTICS_SERVICE_NAME, GRAPH_PG_INFO_METRICS, TRAVERSAL_DIRECTIONS, \ + normalize_service_name, NEPTUNE_DB_SERVICE_NAME, NEPTUNE_ANALYTICS_SERVICE_NAME, GRAPH_PG_INFO_METRICS, TRAVERSAL_DIRECTIONS, \ GREMLIN_PROTOCOL_FORMATS, DEFAULT_HTTP_PROTOCOL, DEFAULT_WS_PROTOCOL, GRAPHSONV4_UNTYPED, \ GREMLIN_SERIALIZERS_WS, get_gremlin_serializer_mime, normalize_protocol_name, generate_snapshot_name) from graph_notebook.network import SPARQLNetwork @@ -3920,3 +3927,409 @@ def handle_opencypher_status(self, line, local_ns): store_to_ns(args.store_to, js, local_ns) if not args.silent: print(json.dumps(js, indent=2)) + + + + + + # %degreeDistribution magic command. + # It obtains the degree distribution of a graph in the form of a visual histogram in notebook. Histogram simply + # shows the number of vertices with a given degree, where degree is shown on the x-axis and the count on y-axis. + # It takes traversalDirection [both (default), inbound, outbound], vertexLabels [default is empty list], + # edgeLabels parameters [default is empty list], and then gives the histogram for the specified degree + # (both/in/out) distribution of the vertices in the graph filtered by the specified vertex labels and edge + # labels. Parameters can be defined as command line argument and/or through the dropdown widgets. + # Example usages: + # > %degreeDistribution + # > %degreeDistribution --traversalDirection inbound + # > %degreeDistribution --traversalDirection inbound --vertexLabels airport country + + # TODO: Error handling + + @line_magic + @needs_local_scope + @display_exceptions + @neptune_graph_only + def degreeDistribution(self, line, local_ns: dict = None): + if not self.client.is_analytics_domain(): + print("This command is only supported for Neptune Analytics domains.") + return + + parser = argparse.ArgumentParser() + + # Get the vertexLabels and edgeLabels from graph summary, to be shown in the widgets for selection. + try: + summary_res = self.client.statistics("propertygraph", True, "detailed", True) + summary_res.raise_for_status() + summary_res_json = summary_res.json() + available_vertex_labels = summary_res_json['graphSummary']['nodeLabels'] + available_edge_labels = summary_res_json['graphSummary']['edgeLabels'] + except Exception as e: + print(f"Error retrieving graph summary: {e}") + return + + # traversalDirection: Type of the degree computed: + # - inbound: Counts only the incoming edges for each vertex + # - outbound: Counts only the outgoing edges for each vertex + # - both [default]: Counts both the incoming and outgoing edges for each vertex. + parser.add_argument('--traversalDirection', nargs='?', type=str.lower, default='both', + help=f'Type of the degree for which the distribution is shown. Valid inputs: {TRAVERSAL_DIRECTIONS}. ' + f'Default: both.', + choices=TRAVERSAL_DIRECTIONS) + + # vertexLabels: List of the vertex labels, space separated, for which the degrees are computed: + # - default value is empty list, which means the degrees are computed for any vertex label. + parser.add_argument('--vertexLabels', nargs='*', default=[], + help="The vertex labels for which the induced graph is considered and the degree distribution is shown. " + "If not supplied, we will default to using all the vertex labels.") + + # edgeLabels: List of the edge labels, space separated, for which the degrees are computed: + # - default value is empty list, which means the degrees are computed for any edge label. + parser.add_argument('--edgeLabels', nargs='*', default=[], + help="The edge labels for which the degree distribution is shown. If not supplied, " + "we will default to using all the edge labels.") + + + # TODO: Additional parameter for saving the visualization? + # parser.add_argument('--export-to', type=str, default='', + # help='Export the degree distribution results to the provided file path.') + + args = parser.parse_args(line.split()) + + # If the traversalDirection parameter selection is specified on the command line, it is shown as the default + # in the dropdown menu. Othweise, the default in the dropdown is 'both' + td_val = args.traversalDirection + td_val = td_val.lower() if td_val else 'both' + + td_dropdown = widgets.Dropdown( + options=TRAVERSAL_DIRECTIONS, + description='Traversal direction:', + disabled=False, + style=SEED_WIDGET_STYLE, + value = td_val + ) + + # Existing vertex labels in the graph are shown in the dropdown menu. If any vertex label is specified on + # the command line, they are shown to be selected in the dropdown menu. Otherwise, no label is selected + # in the dropdown menu, which means any label and all the labels are considered in the computation. + available_vertex_labels = sorted(available_vertex_labels) + selected_vlabels = args.vertexLabels if args.vertexLabels else [] + vertex_labels_select = widgets.SelectMultiple( + options=available_vertex_labels, + description='Vertex labels:', + disabled=False, + style=SEED_WIDGET_STYLE, + value = selected_vlabels + ) + + # Existing edge labels in the graph are shown in the dropdown menu. If any edge label is specified on + # the command line, they are shown to be selected in the dropdown menu. Otherwise, no label is selected + # in the dropdown menu, which means any label and all the labels are considered in the computation. + available_edge_labels = sorted(available_edge_labels) + selected_elabels = args.edgeLabels if args.edgeLabels else [] + edge_labels_select = widgets.SelectMultiple( + options=available_edge_labels, + description='Edge labels:', + disabled=False, + style=SEED_WIDGET_STYLE, + value = selected_elabels + ) + + submit_button = widgets.Button(description="Submit") + output = widgets.Output() + + # Display widgets + display(td_dropdown, vertex_labels_select, edge_labels_select, submit_button, output) + + def on_button_clicked(b): + # Get the selected parameters + td = td_dropdown.value + vlabels = list(vertex_labels_select.value) + elabels = list(edge_labels_select.value) + + # Clear the output widget before displaying new content + output.clear_output(wait=True) + + # Call the function with the selected parameters + with output: + res = self.execute_degree_distribution_query(td, vlabels, elabels, local_ns) + + # Retrieve the distribution + pairs = np.array(res['results'][0]['output']['distribution']) + keys = pairs[:,0] + values = pairs[:,1] + + # Retrieve some statistics + max_deg = res['results'][0]['output']['statistics']['maxDeg'] + median_deg = res['results'][0]['output']['statistics']['p50'] + mean_deg = res['results'][0]['output']['statistics']['mean'] + + # Create the interactive visualization + self.plot_interactive_degree_distribution(keys, values, max_deg, median_deg, mean_deg) + + submit_button.on_click(on_button_clicked) + + def execute_degree_distribution_query (self, td, vlabels, elabels, local_ns): + query_parts = [f'traversalDirection: "{td}"'] + + if vlabels: + vertex_str = ", ".join([f'"{v}"' for v in vlabels]) + query_parts.append(f'vertexLabels: [{vertex_str}]') + + if elabels: + edge_str = ", ".join([f'"{e}"' for e in elabels]) + query_parts.append(f'edgeLabels: [{edge_str}]') + + # Construct the query + line = "CALL neptune.algo.degreeDistribution({" + ", ".join(query_parts) + "}) YIELD output RETURN output" + + oc_rebuild_args = (f"{f'--store-to js --silent'}") + + self.handle_opencypher_query(oc_rebuild_args, line, local_ns) + + return local_ns['js'] + + + def plot_interactive_degree_distribution(self, unique_degrees, counts, max_deg, median_deg, mean_deg): + + min_deg = 0 + + def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, show_maxdeg): + # Start timing + start_time = time.time() + + alpha = 1 + plt.clf() + + # Get zero degree count + zero_idx = np.where(unique_degrees == 0)[0] + zero_degree_count = counts[zero_idx[0]] if len(zero_idx) > 0 else 0 + + isolateds_exist = zero_degree_count > 0 + # Get non-zero degrees and counts + mask = unique_degrees > 0 + filtered_degrees = unique_degrees[mask] + filtered_counts = counts[mask] + + # Obtain the minimum non-zero degree, unless it's all zero degrees + if len(filtered_degrees) == 0: + min_deg = 0 + else: + min_deg = np.min(filtered_degrees) + + n_bins = 1 + # Create histogram only if there are non-zero degree nodes + if len(filtered_degrees) > 0: + if bin_type != 'Raw': + # Arrange the bins for a given bin_width + if bin_type == 'Linear': + n_bins = max(1, int((max_deg - min_deg) / bin_width)) + bins = np.linspace(min_deg, max_deg, n_bins + 1) + else: # Logarithmic + min_deg_log = np.log10(min_deg) if min_deg > 0 else 0 + max_deg_log = np.log10(max_deg) if max_deg > 0 else 1 + n_bins = max(1, int((max_deg_log - min_deg_log) / np.log10(bin_width+0.01))) + bins = np.logspace(min_deg_log, max_deg_log, n_bins + 1) + + all_degrees = np.repeat(filtered_degrees, filtered_counts) + + plt.hist(all_degrees, bins=bins, density=False, alpha=alpha, + histtype='bar', color='#000080') + else: + # For raw data, create bars at each unique degree + plt.bar(filtered_degrees, filtered_counts, alpha=alpha, + label='Raw', color='#000000') + + # Plot zero degree node count separately + if isolateds_exist: + # Use a special x position for zero degree nodes in log scale + zero_x_pos = 0.1 if scale_type in ['Log-Log', 'Log(x)-Linear(y)'] else 0 + plt.bar(zero_x_pos, zero_degree_count, color='red', + label='Isolated', alpha=alpha, width=0.1 if scale_type in ['Log-Log', 'Log(x)-Linear(y)'] else 2) + + plt.xlim(x_range[0], x_range[1]) + + if isolateds_exist: + plt.xlim(x_range[0], x_range[1]) + + # Set scales based on selection + if scale_type == 'Log-Log': + plt.xscale('log') + plt.yscale('log') + if isolateds_exist: + plt.xlim(0.05, x_range[1]) + else: + plt.xlim(x_range[0]+0.05, x_range[1]) + + elif scale_type == 'Log(x)-Linear(y)': + plt.xscale('log') + if isolateds_exist: + plt.xlim(0.05, x_range[1]) + else: + plt.xlim(x_range[0]+0.05, x_range[1]) + elif scale_type == 'Linear(x)-Log(y)': + plt.yscale('log') + + plt.gca().set_ylim(top=y_max) + + # Add vertical dashed lines for min and max degree, if enabled + if show_mindeg and min_deg > 0: + plt.axvline(x=min_deg, color='darkgreen', linestyle='--', linewidth=2, label=f'Min non-zero degree: {min_deg}') + + if show_maxdeg: + plt.axvline(x=max_deg, color='darkred', linestyle='--', linewidth=2, label=f'Max degree: {max_deg}') + + plt.grid(True, which="both", ls="-", alpha=0.2) + plt.xlabel('Degree') + plt.ylabel('Number of nodes') + plt.legend() + + plt.title(f'Degree Distribution') + + # End timing and display + end_time = time.time() + runtime = end_time - start_time + + # Update statistics + with stats_output: + stats_output.clear_output(wait=True) + total_nodes = sum(counts) + total_edges = sum(d * c for d, c in zip(unique_degrees, counts)) // 2 + avg_degree = sum(d * c for d, c in zip(unique_degrees, counts)) / total_nodes + + print(f"Render time: {runtime:.3f} seconds") + print(f"--------------------") + + print(f"Number of nodes: {total_nodes}") + print(f"Number of edges: {total_edges}") + print(f"Number of isolated nodes: {zero_degree_count}") + print(f"Average degree: {mean_deg:.2f}") + print(f"Median degree: {median_deg:.2f}") + print(f"Max degree: {max_deg}") + if min_deg > 0: + print(f"Min non-zero degree: {min_deg}") + if bin_type != 'Raw': + print(f"Number of bins: {n_bins}") + + + max_count = np.max(counts) + + # Scale widget, four options + scale_widget = widgets.Dropdown( + options=['Linear-Linear', 'Log-Log', 'Log(x)-Linear(y)', 'Linear(x)-Log(y)'], + value='Linear-Linear', + description='Scale:' + ) + + # Binning widget, three options + bin_widget = widgets.Dropdown( + options=['Raw', 'Linear', 'Logarithmic'], + value='Linear', + description='Binning:' + ) + + # Define a function to update bin_width_widget based on bin_type + def update_bin_width_widget(change): + if change['new'] == 'Logarithmic': + # For logarithmic binning, use a FloatSlider with smaller values + bin_width_widget.min = 1.00 + bin_width_widget.max = 10.00 + bin_width_widget.step = 0.01 + bin_width_widget.value = 1.00 + bin_width_widget.readout_format = '.2f' + bin_width_widget.disabled = False + elif change['new'] == 'Raw': + # For raw binning, disable the widget + bin_width_widget.value = 1 + bin_width_widget.disabled = True + else: + # For linear binning, use integer values + bin_width_widget.min = 1 + bin_width_widget.max = (max_deg+2)/10 + bin_width_widget.step = 1 + bin_width_widget.value = 1 + bin_width_widget.readout_format = 'd' + bin_width_widget.disabled = False + + def update_y_max_widget(change): + if bin_widget.value == 'Raw': + # For raw data, use the original max count + y_max_widget.max = max_count * 1.1 + y_max_widget.value = max_count * 1.1 + elif bin_widget.value == 'Linear': + y_max_widget.max = max_count * bin_width_widget.value * 0.5 + y_max_widget.value = max_count * bin_width_widget.value * 0.5 + else: # 'Logarithmic' + y_max_widget.max = max_count * (10 ** bin_width_widget.value) * 0.5 + y_max_widget.value = max_count * (10 ** bin_width_widget.value) * 0.5 + + # Bin width widget, integer options in [1, 1+(max_deg/2)] interval + bin_width_widget = widgets.FloatSlider( + value=1, + min=1, + max=(max_deg+2)/10, + step=1, + description='Bin width:', + tooltip=('For linear binning: actual width\n' + 'For log binning: multiplicative factor') + ) + + # Observe changes to bin_width_widget and bin_widget + bin_width_widget.observe(update_y_max_widget, names='value') + bin_widget.observe(update_y_max_widget, names='value') + + # Upper limit for y-axis range, enables zooming (lower limit is always zero) + y_max_widget = widgets.IntSlider( + value=max_count * 1.1, + min=1, + max=max_count * 1.1, + step=1, + description='y-max:', + ) + + # Range slider for x-axis, enables zooming + x_range_widget = widgets.FloatRangeSlider( + min=0, + max=max_deg * 1.1 + 5, + value=[0, max_deg * 1.1 + 5], + step=1, + description='x-axis range:', + disabled=False, + continuous_update=True, + readout=True, + readout_format='.0f', + ) + + # Toggle switches for min/max degree lines + show_mindeg_widget = widgets.Checkbox( + value=True, + description='Show Min Degree Line', + disabled=False + ) + + show_maxdeg_widget = widgets.Checkbox( + value=True, + description='Show Max Degree Line', + disabled=False + ) + + # Output widget for statistics + stats_output = widgets.Output() + + # Interactive plot + interactive_plot = widgets.interactive( + update_plot, + scale_type=scale_widget, + bin_type=bin_widget, + bin_width=bin_width_widget, + y_max=y_max_widget, + x_range=x_range_widget, + show_mindeg=show_mindeg_widget, + show_maxdeg=show_maxdeg_widget + ) + + # Vertical box layout + vbox = widgets.VBox([interactive_plot, stats_output]) + + # Display the interactive plot and stats + display(vbox) diff --git a/src/graph_notebook/neptune/client.py b/src/graph_notebook/neptune/client.py index 2a683452..b5b94d83 100644 --- a/src/graph_notebook/neptune/client.py +++ b/src/graph_notebook/neptune/client.py @@ -174,6 +174,7 @@ GRAPH_PG_INFO_METRICS = {'numVertices', 'numEdges', 'numVertexProperties', 'numEdgeProperties'} +TRAVERSAL_DIRECTIONS = ['both', 'inbound', 'outbound'] def is_allowed_neptune_host(hostname: str, host_allowlist: list): for host_snippet in host_allowlist: